diff --git a/.github/workflows/build-apps-job/setup.sh b/.github/workflows/build-apps-job/setup.sh index c53816c5b..dd29e72b7 100755 --- a/.github/workflows/build-apps-job/setup.sh +++ b/.github/workflows/build-apps-job/setup.sh @@ -7,8 +7,9 @@ echo ======================================== echo ======================================== # Create the virtual environment and install the requirements. -make venv -. .venv/bin/activate +conda init bash +source /root/.bashrc +conda activate core-v-mini-mcu echo ======================================== echo ======================================== @@ -17,12 +18,12 @@ echo ======================================== echo ======================================== # The variable could also be obtained from the container. -export RISCV='/home/root/tools/riscv' &&\ +export RISCV='/tools/riscv' &&\ # All peripherals are included to make sure all apps can be built. sed 's/is_included: "no",/is_included: "yes",/' -i mcu_cfg.hjson # The MCU is generated with various memory banks to avoid example code not fitting. -make mcu-gen MEMORY_BANKS=6 +make mcu-gen X_HEEP_CFG=configs/ci.hjson echo ======================================== echo ======================================== diff --git a/.github/workflows/building.yml b/.github/workflows/building.yml index 12b2cdb83..893822c61 100644 --- a/.github/workflows/building.yml +++ b/.github/workflows/building.yml @@ -5,7 +5,7 @@ jobs: test_applications: runs-on: ubuntu-latest container: - image: ghcr.io/esl-epfl/xheep-compiler:latest + image: ghcr.io/esl-epfl/x-heep-toolchain:latest name: Builds apps with gcc and clang. All must build successfully. steps: - name: Checkout the pushed code. diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml index 2a69ed391..e1baeeb7c 100644 --- a/.github/workflows/lint.yml +++ b/.github/workflows/lint.yml @@ -1,16 +1,12 @@ # Copyright 2022 OpenHW Group # Solderpad Hardware License, Version 2.1, see LICENSE.md for details. # SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 - # Run all lint checks name: lint on: [push, pull_request] - env: VERIBLE_VERSION: 0.0-1824-ga3b5bedf - jobs: - ##################### # Vendor Up-to-Date # ##################### @@ -28,9 +24,9 @@ jobs: - name: Re-vendor and diff run: | find . \ - -name '*.vendor.hjson' \ - | xargs -n1 util/vendor.py --verbose \ - && util/git-diff.py --error-msg "::error ::Found differences, please re-vendor." + -name '*.vendor.hjson' \ + | xargs -n1 util/vendor.py --verbose \ + && util/git-diff.py --error-msg "::error ::Found differences, please re-vendor." ################## # MCU Generator # ################## diff --git a/.github/workflows/sim-apps-job/test_apps.py b/.github/workflows/sim-apps-job/test_apps.py new file mode 100755 index 000000000..0d3cdfce7 --- /dev/null +++ b/.github/workflows/sim-apps-job/test_apps.py @@ -0,0 +1,188 @@ +""" +This script compiles and runs all the apps in the sw/applications directory +""" + +import os +import subprocess +import re + + +class BColors: + """ + Class to define colors in the terminal output. + """ + HEADER = "\033[95m" + OKBLUE = "\033[94m" + OKCYAN = "\033[96m" + OKGREEN = "\033[92m" + WARNING = "\033[93m" + FAIL = "\033[91m" + ENDC = "\033[0m" + BOLD = "\033[1m" + UNDERLINE = "\033[4m" + + +# Define parameters for the test_apps.py script +SIMULATOR = "verilator" +SIM_TIMEOUT_S = 120 +LINKER = "on_chip" +COMPILER = "gcc" + +# Blacklist of apps to skip +blacklist = [ + "example_spi_read", + "example_spi_host_dma_power_gate", + "example_spi_write", +] + +app_list = [app for app in os.listdir("sw/applications")] + +print(BColors.OKCYAN + "Apps to test:" + BColors.ENDC) +for app in app_list: + if app not in blacklist: + print(BColors.OKCYAN + f" - {app}" + BColors.ENDC) + +apps = {app: {"building": "", "simulation": ""} for app in app_list} + + +# Compile the {SIMULATOR} model and suppress the output +print(BColors.OKBLUE + f"Generating {SIMULATOR} model of X-HEEP..." + BColors.ENDC) +try: + simulation_build_output = subprocess.run( + ["make", f"{SIMULATOR}-sim"], capture_output=True, check=False + ) +except subprocess.CalledProcessError as exc: + print(BColors.FAIL + "=====================================" + BColors.ENDC) + print(BColors.FAIL + "Error building verilated model!" + BColors.ENDC) + print(BColors.FAIL + "=====================================" + BColors.ENDC) + print(str(exc.stderr.decode("utf-8"))) + exit(1) +else: + print( + BColors.OKGREEN + + f"Generated {SIMULATOR} model of X-HEEP successfully!" + + BColors.ENDC + ) + +error_pattern = r"Program Finished with value (\d+)" + +# Compile every app and run the simulator +for an_app in apps.keys(): + if an_app not in blacklist: + apps[an_app] = {"building": "OK", "simulation": "OK"} + print(BColors.OKBLUE + f"Compiling {an_app}..." + BColors.ENDC) + try: + compile_output = subprocess.run( + [ + "make", + "app", + f"PROJECT={an_app}", + f"COMPILER={COMPILER}", + f"LINKER={LINKER}", + ], + capture_output=True, + check=True, + ) + except subprocess.CalledProcessError as exc: + print(BColors.FAIL + f"Error compiling {an_app}!" + BColors.ENDC) + print(exc.stderr.decode("utf-8")) + apps[an_app] = {"building": "Failed", "simulation": "Skipped"} + else: + apps[an_app] = {"building": "OK", "simulation": "Skipped"} + print(BColors.OKGREEN + f"Compiled successfully {an_app}!" + BColors.ENDC) + print( + BColors.OKBLUE + + f"Running {SIMULATOR} simulation of {an_app}..." + + BColors.ENDC + ) + try: + run_output = subprocess.run( + ["./Vtestharness", "+firmware=../../../sw/build/main.hex"], + cwd="build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-verilator", + capture_output=True, + timeout=SIM_TIMEOUT_S, + check=False, + ) + except subprocess.TimeoutExpired: + print( + BColors.FAIL + f"Simulation of {an_app} timed out!" + BColors.ENDC + ) + apps[an_app] = {"building": "OK", "simulation": "Timed out"} + else: + match = re.search(error_pattern, str(run_output.stdout.decode("utf-8"))) + if ( + "Error" in str(run_output.stdout.decode("utf-8")) + or match.group(1) != "0" + ): + print( + BColors.FAIL + + str(run_output.stdout.decode("utf-8")) + + BColors.ENDC + ) + uart_output = open( + "build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-verilator/uart0.log", + "r", + encoding="utf-8", + ) + print(BColors.FAIL + "UART output:" + BColors.ENDC) + print(BColors.FAIL + uart_output.read() + BColors.ENDC) + uart_output.close() + apps[an_app] = {"building": "OK", "simulation": "Failed"} + else: + apps[an_app] = {"building": "OK", "simulation": "OK"} + print( + BColors.OKGREEN + f"Ran {an_app} successfully!" + BColors.ENDC + ) + print(BColors.OKBLUE + f"Finished running {an_app}." + BColors.ENDC) + else: + print(BColors.WARNING + f"Skipping {an_app}..." + BColors.ENDC) + apps[an_app] = {"building": "Skipped", "simulation": "Skipped"} + +# Print the results +print(BColors.BOLD + "=================================" + BColors.ENDC) +print(BColors.BOLD + "Results:" + BColors.ENDC) +print(BColors.BOLD + "=================================" + BColors.ENDC) + +# Filter the dictionary by values +ok_apps = [ + app + for app, status in apps.items() + if (status["simulation"] == "OK" and status["building"] == "OK") +] +no_build_apps = [app for app, status in apps.items() if status["building"] == "Failed"] +no_sim_apps = [app for app, status in apps.items() if status["simulation"] == "Failed"] +skipped_apps = [ + app + for app, status in apps.items() + if (status["simulation"] == "Skipped" or status["building"] == "Skipped") +] +timed_out_apps = [ + app for app, status in apps.items() if status["simulation"] == "Timed out" +] + +# Print the filtered results +print( + BColors.OKGREEN + + f"{len(ok_apps)} out of {len(app_list)} apps compiled and ran successfully!" + + BColors.ENDC +) +if len(no_build_apps) > 0: + print(BColors.FAIL + f"{len(no_build_apps)} apps failed to build!" + BColors.ENDC) + for failed_build_app in no_build_apps: + print(BColors.FAIL + f" - {failed_build_app}" + BColors.ENDC) +if len(no_sim_apps) > 0: + print(BColors.FAIL + f"{len(no_sim_apps)} apps failed to run!" + BColors.ENDC) + for failed_run_app in no_sim_apps: + print(BColors.FAIL + f" - {failed_run_app}" + BColors.ENDC) +if len(skipped_apps) > 0: + print(BColors.WARNING + f"{len(skipped_apps)} apps were skipped!" + BColors.ENDC) + for skipped_app in skipped_apps: + print(BColors.WARNING + f" - {skipped_app}" + BColors.ENDC) +if len(timed_out_apps) > 0: + print(BColors.FAIL + f"{len(timed_out_apps)} apps timed out!" + BColors.ENDC) + for timed_out_app in timed_out_apps: + print(BColors.FAIL + f" - {timed_out_app}" + BColors.ENDC) +print(BColors.BOLD + "=================================" + BColors.ENDC) + +if len(no_build_apps) > 0 or len(no_sim_apps) > 0: + exit(1) diff --git a/.github/workflows/simulate.yml b/.github/workflows/simulate.yml new file mode 100644 index 000000000..ffb167220 --- /dev/null +++ b/.github/workflows/simulate.yml @@ -0,0 +1,21 @@ +name: Simulate all apps +on: [pull_request] + +jobs: + simulate: + runs-on: ubuntu-latest + container: + image: ghcr.io/esl-epfl/x-heep-toolchain:latest + name: Simulate all apps. All must pass. + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Run all apps + run: | + conda init bash + source /root/.bashrc + conda activate core-v-mini-mcu + make clean-all + sed 's/is_included: "no",/is_included: "yes",/' -i mcu_cfg.hjson + make mcu-gen MEMORY_BANKS=6 + python3 .github/workflows/sim-apps-job/test_apps.py \ No newline at end of file diff --git a/.gitignore b/.gitignore index f2b85af8d..710c28c7f 100644 --- a/.gitignore +++ b/.gitignore @@ -7,8 +7,10 @@ build/ *.dis *.map *.do +*.jou +*.str .venv/ -util/__pycache__/* +__pycache__/ # ignore apps output file run_verif_rtl_log.txt @@ -32,8 +34,11 @@ hw/system/pad_control/data/pad_control.hjson hw/system/pad_control/rtl/pad_control.sv hw/system/pad_control/rtl/pad_control_reg_pkg.sv hw/system/pad_control/rtl/pad_control_reg_top.sv +hw/fpga/sram_wrapper.sv +hw/fpga/scripts/generate_sram.tcl -# same for the C header file and linker scripts +# same for the C header file and linker scripts and assembly files +sw/device/lib/crt/crt0.S sw/device/lib/runtime/core_v_mini_mcu.h sw/linker/link.ld sw/linker/link_flash_exec.ld @@ -46,6 +51,9 @@ sw/device/lib/drivers/**/*_structs.h # openroad flow/OpenROAD-flow-scripts +# automatically generated docs +/docs/source/Configuration/generated + # User-dependent configuration files .vscode private/ diff --git a/Makefile b/Makefile index cef2e5c06..d0579fcc0 100644 --- a/Makefile +++ b/Makefile @@ -9,8 +9,10 @@ mkfile_path := $(shell dirname "$(realpath $(firstword $(MAKEFILE_LIST)))") $(info $$You are executing from: $(mkfile_path)) # Include the self-documenting tool -FILE=$(mkfile_path)/Makefile -include $(mkfile_path)/util/generate-makefile-help +export FILE_FOR_HELP=$(mkfile_path)/Makefile + +help: + ${mkfile_path}/util/MakefileHelp # Setup to autogenerate python virtual environment VENVDIR?=$(WORKDIR)/.venv @@ -36,7 +38,8 @@ LINKER ?= on_chip # Target options are 'sim' (default) and 'pynq-z2' and 'nexys-a7-100t' TARGET ?= sim -MCU_CFG ?= mcu_cfg.hjson +MCU_CFG_PERIPHERALS ?= mcu_cfg.hjson +X_HEEP_CFG ?= configs/general.hjson PAD_CFG ?= pad_cfg.hjson EXT_PAD_CFG ?= @@ -63,23 +66,23 @@ FLASHREAD_ADDR ?= 0x0 FLASHREAD_FILE ?= $(mkfile_path)/flashcontent.hex FLASHREAD_BYTES ?= 256 + +#binary to store in flash memory +FLASHWRITE_FILE ?= $(mkfile_path)/sw/build/main.hex + #max address in the hex file, used to program the flash -ifeq ($(wildcard sw/build/main.hex),) - MAX_HEX_ADDRESS = 0 - MAX_HEX_ADDRESS_DEC = 0 - BYTES_AFTER_MAX_HEX_ADDRESS = 0 - FLASHRWITE_BYTES = 0 +ifeq ($(wildcard $(FLASHWRITE_FILE)),) + MAX_HEX_ADDRESS := 0 + MAX_HEX_ADDRESS_DEC := 0 + BYTES_AFTER_MAX_HEX_ADDRESS := 0 + FLASHWRITE_BYTES := 0 else - MAX_HEX_ADDRESS = $(shell cat sw/build/main.hex | grep "@" | tail -1 | cut -c2-) - MAX_HEX_ADDRESS_DEC = $(shell printf "%d" 0x$(MAX_HEX_ADDRESS)) - BYTES_AFTER_MAX_HEX_ADDRESS = $(shell tac sw/build/main.hex | awk 'BEGIN {count=0} /@/ {print count; exit} {count++}') - FLASHRWITE_BYTES = $(shell echo $(MAX_HEX_ADDRESS_DEC) + $(BYTES_AFTER_MAX_HEX_ADDRESS)*16 | bc) + MAX_HEX_ADDRESS := $(shell cat $(FLASHWRITE_FILE) | grep "@" | tail -1 | cut -c2-) + MAX_HEX_ADDRESS_DEC := $(shell printf "%d" 0x$(MAX_HEX_ADDRESS)) + BYTES_AFTER_MAX_HEX_ADDRESS := $(shell tac $(FLASHWRITE_FILE) | awk 'BEGIN {count=0} /@/ {print count; exit} {count++}') + FLASHWRITE_BYTES := $(shell echo $(MAX_HEX_ADDRESS_DEC) + $(BYTES_AFTER_MAX_HEX_ADDRESS)*16 | bc) endif - -#binary to store in flash memory -FLASHWRITE_FILE = $(mkfile_path)/sw/build/main.hex - # Export variables to sub-makefiles export @@ -98,28 +101,31 @@ environment.yml: python-requirements.txt ## @param MEMORY_BANKS=[2(default) to (16 - MEMORY_BANKS_IL)] ## @param MEMORY_BANKS_IL=[0(default),2,4,8] mcu-gen: - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/include --cpu $(CPU) --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --external_pads $(EXT_PAD_CFG) --pkg-sv hw/core-v-mini-mcu/include/core_v_mini_mcu_pkg.sv.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/system_bus.sv.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/system_xbar.sv.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/memory_subsystem.sv.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir tb/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv tb/tb_util.svh.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/system/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/system/pad_ring.sv.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/system/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/system/x_heep_system.sv.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir sw/device/lib/runtime --cpu $(CPU) --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --header-c sw/device/lib/runtime/core_v_mini_mcu.h.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir sw/linker --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --linker_script sw/linker/link.ld.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir . --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --pkg-sv ./core-v-mini-mcu.upf.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/ip/power_manager/rtl --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --pkg-sv hw/ip/power_manager/data/power_manager.sv.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/ip/power_manager/data --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --pkg-sv hw/ip/power_manager/data/power_manager.hjson.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/include --cpu $(CPU) --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --external_pads $(EXT_PAD_CFG) --pkg-sv hw/core-v-mini-mcu/include/core_v_mini_mcu_pkg.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/system_bus.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/system_xbar.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/memory_subsystem.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir tb/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv tb/tb_util.svh.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/system/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/system/pad_ring.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/core-v-mini-mcu/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/core-v-mini-mcu/core_v_mini_mcu.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/system/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/system/x_heep_system.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir sw/device/lib/runtime --cpu $(CPU) --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --header-c sw/device/lib/runtime/core_v_mini_mcu.h.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir sw/linker --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --linker_script sw/linker/link.ld.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir . --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --pkg-sv ./core-v-mini-mcu.upf.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/ip/power_manager/rtl --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --pkg-sv hw/ip/power_manager/data/power_manager.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/ip/power_manager/data --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --pkg-sv hw/ip/power_manager/data/power_manager.hjson.tpl bash -c "cd hw/ip/power_manager; source power_manager_gen.sh; cd ../../../" - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir sw/device/lib/drivers/power_manager --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --pkg-sv sw/device/lib/drivers/power_manager/data/power_manager.h.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/system/pad_control/data --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_pads $(EXT_PAD_CFG) --pkg-sv hw/system/pad_control/data/pad_control.hjson.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir hw/system/pad_control/rtl --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_pads $(EXT_PAD_CFG) --pkg-sv hw/system/pad_control/rtl/pad_control.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir sw/device/lib/drivers/power_manager --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_domains $(EXTERNAL_DOMAINS) --pkg-sv sw/device/lib/drivers/power_manager/data/power_manager.h.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/system/pad_control/data --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_pads $(EXT_PAD_CFG) --pkg-sv hw/system/pad_control/data/pad_control.hjson.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/system/pad_control/rtl --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --external_pads $(EXT_PAD_CFG) --pkg-sv hw/system/pad_control/rtl/pad_control.sv.tpl bash -c "cd hw/system/pad_control; source pad_control_gen.sh; cd ../../../" - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir sw/linker --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --linker_script sw/linker/link_flash_exec.ld.tpl - $(PYTHON) util/mcu_gen.py --cfg $(MCU_CFG) --pads_cfg $(PAD_CFG) --outdir sw/linker --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --linker_script sw/linker/link_flash_load.ld.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir sw/linker --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --linker_script sw/linker/link_flash_exec.ld.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir sw/linker --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --linker_script sw/linker/link_flash_load.ld.tpl $(PYTHON) ./util/structs_periph_gen.py + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/fpga/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/fpga/sram_wrapper.sv.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir hw/fpga/scripts/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv hw/fpga/scripts/generate_sram.tcl.tpl + $(PYTHON) util/mcu_gen.py --config $(X_HEEP_CFG) --cfg_peripherals $(MCU_CFG_PERIPHERALS) --pads_cfg $(PAD_CFG) --outdir sw/device/lib/crt/ --bus $(BUS) --memorybanks $(MEMORY_BANKS) --memorybanks_il $(MEMORY_BANKS_IL) --tpl-sv sw/device/lib/crt/crt0.S.tpl $(MAKE) verible ## Display mcu_gen.py help @@ -134,7 +140,7 @@ verible: ## Generates the build folder in sw using CMake to build (compile and linking) ## @param PROJECT= -## @param TARGET=sim(default),pynq-z2,nexys-a7-100t +## @param TARGET=sim(default),systemc,pynq-z2,nexys-a7-100t,zcu104 ## @param LINKER=on_chip(default),flash_load,flash_exec ## @param COMPILER=gcc(default), clang ## @param COMPILER_PREFIX=riscv32-unknown-(default) @@ -153,10 +159,14 @@ app-compile-all: ## @section Simulation -## Verilator simulation +## Verilator simulation with C++ verilator-sim: $(FUSESOC) --cores-root . run --no-export --target=sim --tool=verilator $(FUSESOC_FLAGS) --build openhwgroup.org:systems:core-v-mini-mcu ${FUSESOC_PARAM} 2>&1 | tee buildsim.log +## Verilator simulation with SystemC +verilator-sim-sc: + $(FUSESOC) --cores-root . run --no-export --target=sim_sc --tool=verilator $(FUSESOC_FLAGS) --build openhwgroup.org:systems:core-v-mini-mcu ${FUSESOC_PARAM} 2>&1 | tee buildsim.log + ## Questasim simulation questasim-sim: $(FUSESOC) --cores-root . run --no-export --target=sim --tool=modelsim $(FUSESOC_FLAGS) --build openhwgroup.org:systems:core-v-mini-mcu ${FUSESOC_PARAM} 2>&1 | tee buildsim.log @@ -219,7 +229,7 @@ app-simulate-all: ## @section Vivado ## Builds (synthesis and implementation) the bitstream for the FPGA version using Vivado -## @param FPGA_BOARD=nexys-a7-100t,pynq-z2 +## @param FPGA_BOARD=nexys-a7-100t,pynq-z2,zcu104 ## @param FUSESOC_FLAGS=--flag= vivado-fpga: $(FUSESOC) --cores-root . run --no-export --target=$(FPGA_BOARD) $(FUSESOC_FLAGS) --build openhwgroup.org:systems:core-v-mini-mcu ${FUSESOC_PARAM} 2>&1 | tee buildvivado.log @@ -252,13 +262,18 @@ flash-readid: ## Loads the obtained binary to the EPFL_Programmer flash flash-prog: cd sw/vendor/yosyshq_icestorm/iceprog; make; \ - ./iceprog -a $(FLASHRWITE_BYTES) -d i:0x0403:0x6011 -I B $(FLASHWRITE_FILE); + ./iceprog -a $(FLASHWRITE_BYTES) -d i:0x0403:0x6011 -I B $(FLASHWRITE_FILE); ## Read the EPFL_Programmer flash flash-read: cd sw/vendor/yosyshq_icestorm/iceprog; make; \ ./iceprog -d i:0x0403:0x6011 -I B -o $(shell printf "%d" $(FLASHREAD_ADDR)) -R $(FLASHREAD_BYTES) $(FLASHREAD_FILE); +## Erase the EPFL_Programmer flash +flash-erase: + cd sw/vendor/yosyshq_icestorm/iceprog; make; \ + ./iceprog -d i:0x0403:0x6011 -I B -b; + ## Run openOCD w/ EPFL_Programmer openOCD_epflp: xterm -e openocd -f ./tb/core-v-mini-mcu-pynq-z2-esl-programmer.cfg; diff --git a/README.md b/README.md index 18c97a10c..7d2a32734 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,22 @@ +
+

+ +`X-HEEP` (eXtendable Heterogeneous Energy-Efficient Platform) is a `RISC-V` microcontroller described in `SystemVerilog` +that can be configured to target small and tiny platforms as well as extended to support accelerators. +The cool thing about `X-HEEP` is that we provide a simple customizable MCU, so CPUs, common peripherals, memories, etc. +so that you can extend it with your own accelerator without modifying the MCU, but just instantiating it in your design. +By doing so, you inherit an IP capable of booting RTOS (such as `freeRTOS`) with the whole FW stack, including `HAL` drivers and `SDK`, +and you can focus on building your special HW supported by the microcontroller. + +`X-HEEP` supports simulation with Verilator, Questasim, etc. Morever, FW can be built and linked by using `CMake` either with gcc or with clang. It can be implemented on FPGA, and it supports implementation in Silicon, which is its main (but not only) target. See below for more details. + +The block diagram below shows the `X-HEEP` MCU + +

+ + +> :bookmark_tabs: Please refer to the documentation in [Read the Docs](https://x-heep.readthedocs.io/en/latest/index.html) + # Repository folder structure . @@ -23,22 +42,6 @@ ├── util └── README.md -
-

- -`X-HEEP` (eXtendable Heterogeneous Energy-Efficient Platform) is a `RISC-V` microcontroller described in `SystemVerilog` -that can be configured to target small and tiny platforms as well as extended to support accelerators. -The cool thing about `X-HEEP` is that we provide a simple customizable MCU, so CPUs, common peripherals, memories, etc. -so that you can extend it with your own accelerator without modifying the MCU, but just instantiating it in your design. -By doing so, you inherit an IP capable of booting RTOS (such as `freeRTOS`) with the whole FW stack, including `HAL` drivers and `SDK`, -and you can focus on building your special HW supported by the microcontroller. - -`X-HEEP` supports simulation with Verilator, Questasim, etc. Morever, FW can be built and linked by using `CMake` either with gcc or with clang. It can be implemented on FPGA, and it supports implementation in Silicon, which is its main (but not only) target. See below for more details. - -The block diagram below shows the `X-HEEP` MCU - -

- # Reference If you use X-HEEP in your academic work you can cite us: [X-HEEP Paper](https://arxiv.org/abs/2401.05548) @@ -52,556 +55,4 @@ If you use X-HEEP in your academic work you can cite us: [X-HEEP Paper](https:// archivePrefix={arXiv}, primaryClass={cs.AR} } -``` - -# Self-documented Makefile - -Note that under `util` folder, the file `generate-makefile-help` is employed to generate a self-documented helping output. In case of including any other target or command under the main `Makefile`, follow the same general and parameter descriptions as already provided for every target. Check the `help` output by doing `make` or `make help`. Moreover, **note that some of the parameters required for some of the targets are initiated with _default values_** - -# Prerequisite - -## 1. OS requirements - -To use `X-HEEP`, first make sure you have the following apt packages, or install them as: - -```bash -sudo apt install lcov libelf1 libelf-dev libftdi1-2 libftdi1-dev libncurses5 libssl-dev libudev-dev libusb-1.0-0 lsb-release texinfo autoconf cmake flex bison libexpat-dev gawk tree xterm python3-venv python3-dev -``` - -In general, have a look at the [Install required software](https://opentitan.org/guides/getting_started/index.html) section of the OpenTitan documentation. - -It has been tested only on `Ubuntu 20`, and we know it does NOT WORK on `Ubuntu 22`. - -## 2. Python - -We rely on either (a) `miniconda`, or (b) `virtual environment` enviroment. - -Choose between `2.a` or `2.b` to setup your enviroment. - -### 2.a Miniconda - -Install [Miniconda](https://docs.conda.io/en/latest/miniconda.html#linux-installers) python 3.8 version as described in the link, -and create the Conda enviroment: - -```bash -make conda -``` - -You need to do it only the first time, then just activate the environment everytime you work with `X-HEEP` as - -```bash -conda activate core-v-mini-mcu -``` - -### 2.b Virtual Environment - -Install the python virtual environment just as: - -```bash -make venv -``` - -You need to do it only the first time, then just activate the environment everytime you work with `X-HEEP` as - -```bash -source .venv/bin/activate -``` - -## 3. Install the RISC-V Compiler: - -``` -git clone --branch 2022.01.17 --recursive https://github.com/riscv/riscv-gnu-toolchain -cd riscv-gnu-toolchain -./configure --prefix=/home/$USER/tools/riscv --with-abi=ilp32 --with-arch=rv32imc --with-cmodel=medlow -make -``` - -Then, set the `RISCV` env variable as: - -``` -export RISCV=/home/$USER/tools/riscv -``` - -Optionally you can also compile with clang/LLVM instead of gcc. For that you must install the clang compiler into the same `RISCV` path. The binaries of gcc and clang do not collide so you can have both residing in the same `RISCV` directory. For this you can set the `-DCMAKE_INSTALL_PREFIX` cmake variable to `$RISCV` when building LLVM. This can be accomplished by doing the following: - -``` -git clone https://github.com/llvm/llvm-project.git -cd llvm-project -git checkout llvmorg-14.0.0 -mkdir build && cd build -cmake -G "Unix Makefiles" -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=$RISCV -DLLVM_TARGETS_TO_BUILD="RISCV" ../llvm -cmake --build . --target install -``` - -## 4. Install Verilator: - -``` -export VERILATOR_VERSION=4.210 - -git clone https://github.com/verilator/verilator.git -cd verilator -git checkout v$VERILATOR_VERSION - -autoconf -./configure --prefix=/home/$USER/tools/verilator/$VERILATOR_VERSION -make -make install -``` -Then, set the `PATH` env variable to as: - -``` -export PATH=/home/$USER/tools/verilator/$VERILATOR_VERSION/bin:$PATH -``` - -In general, have a look at the [Install Verilator](https://opentitan.org/guides/getting_started/setup_verilator.html) section of the OpenTitan documentation. - -If you want to see the vcd waveforms generated by the Verilator simulation, install GTKWAVE: - -``` -sudo apt install libcanberra-gtk-module libcanberra-gtk3-module -sudo apt-get install -y gtkwave -``` - -## Files are formatted with Verible - -We use version v0.0-1824-ga3b5bedf - -See: [Install Verible](https://opentitan.org/guides/getting_started/index.html#step-7a-install-verible-optional) - -To format your RTL code type: - -``` -make verible -``` -## Compilation Flow and Package Manager - -We use [FuseSoC](https://github.com/olofk/fusesoc) for all the tools we use. - -The `fusesoc` commands are inside the Makefile. - -# Adding external IPs - -This repository relies on [Vendor](https://opentitan.org/book/util/doc/vendor.html) to add new IPs. The `vendor.py` script in the [`./util`](./util/) folder implements what is described above, while [this](./docs/source/How_to/ExternalDevices.md) file contains additional information on how to connect external devices to the system. - -# Compiling with Makefile - -You can compile the example applications and the platform using the Makefile. Type 'make help' or 'make' for more information. Moreover, please, check the different 'clean' commands to verify that you are using the corret one. - -## Generate core-v-mini-mcu package - -First, you have to generate the SystemVerilog package and C header file of the core-v-mini-mcu: - -``` -make mcu-gen -``` - -By default, `X-HEEP` deploys the [cv32e20](https://github.com/openhwgroup/cve2) RISC-V CPU. -Other supported CPUs are: the [cv32e40p](https://github.com/openhwgroup/cv32e40p), [cv32e40x](https://github.com/openhwgroup/cv32e40x), and the [cv32e40px](https://github.com/esl-epfl/cv32e40px). -The default bus type of `X-HEEP` is a single-master-at-a-time architecture, (called `onetoM`), but the cross-bar architecture is also supported by setting -the bus to `NtoM`. Also, the user can select the number of 32kB banks addressed in continuous mode and/or the interleaved mode. -By default, `X-HEEP` is generated with 2 continuous banks and 0 interleaved banks. - -Below an example that changes the default configuration: - -``` -make mcu-gen CPU=cv32e40p BUS=NtoM MEMORY_BANKS=12 MEMORY_BANKS_IL=4 -``` - -The last command generates x-heep with the cv32e40p core, with a parallel bus, and 16 memory banks (12 continuous and 4 interleaved), -each 32KB, for a total memory of 512KB. - -If you are using `X-HEEP` just as a controller for your own system and you do not need any peripheral, you can use the `minimal` configuration file -when generating the MCU as: - -``` -make mcu-gen MCU_CFG=mcu_cfg_minimal.hjson -``` - -The `minimal` configuration is a work-in-progress, thus not all the APPs have been tested. - -## Compiling Software - -Don't forget to set the `RISCV` env variable to the compiler folder (without the `/bin` included). -To run 'hello world' application, just type 'make app'. - -``` -make app -``` - -To run any other application, please use the following command with appropiate parameters: - -``` -app PROJECT= TARGET=sim(default),pynq-z2 LINKER=on_chip(default),flash_load,flash_exec COMPILER=gcc(default),clang COMPILER_PREFIX=riscv32-unknown-(default) ARCH=rv32imc(default), - -Params: -- PROJECT (ex: , hello_world(default)) -- TARGET (ex: sim(default),pynq-z2) -- LINKER (ex: on_chip(default),flash_load,flash_exec) -- COMPILER (ex: gcc(default),clang) -- COMPILER_PREFIX (ex: riscv32-unknown-(default)) -- ARCH (ex: rv32imc(default),) -``` - -For instance, to run 'hello world' app for the pynq-z2 FPGA targets, just run: - -``` -make app TARGET=pynq-z2 -``` - -Or, if you use the OpenHW Group [GCC](https://www.embecosm.com/resources/tool-chain-downloads/#corev) compiler with CORE_PULP extensions, make sure to point the `RISCV` env variable to the OpenHW Group compiler, then just run: - -``` -make app COMPILER_PREFIX=riscv32-corev- ARCH=rv32imc_zicsr_zifencei_xcvhwlp1p0_xcvmem1p0_xcvmac1p0_xcvbi1p0_xcvalu1p0_xcvsimd1p0_xcvbitmanip1p0 -``` - -This will create the executable file to be loaded into your target system (ASIC, FPGA, Simulation). -Remember that, `X-HEEP` is using CMake to compile and link. Thus, the generated files after having -compiled and linked are under `sw\build` - -Alternatively, in case you are doing pure FW development and you are used to developing using Integrated Development Evironments (IDEs), please check [the IDE readme](./IDEs.md). - -## FreeROTS based applications - -'X-HEEP' supports 'FreeRTOS' based applications. Please see `sw\applications\blinky_freertos`. - -After that, you can run the command to compile and link the FreeRTOS based application. Please also set 'LINKER' and 'TARGET' parameters if needed. - -``` -make app PROJECT=blinky_freertos -``` - -The main FreeRTOS configuration is allocated under `sw\freertos`, in `FreeRTOSConfig.h`. Please, change this file based on your application requirements. -Moreover, FreeRTOS is being fetch from 'https://github.com/FreeRTOS/FreeRTOS-Kernel.git' by CMake. Specifically, 'V10.5.1' is used. Finally, the fetch repository is located under `sw\build\_deps` after building. - -## Simulating - -This project supports simulation with Verilator, Synopsys VCS, Siemens Questasim and Cadence Xcelium. -It relies on `fusesoc` to handle multiple EDA tools and parameters. -For example, if you want to set the `FPU` and `COREV_PULP` parameters of the `cv32e40p` CPU, -you need to add next to your compilation command `FUSESOC_PARAM="--COREV_PULP=1 --FPU=1"` -Below the different EDA examples commands. - -### Compiling for Verilator - -To simulate your application with Verilator, first compile the HDL: - -``` -make verilator-sim -``` - -then, go to your target system built folder - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-verilator -``` - -and type to run your compiled software: - -``` -./Vtestharness +firmware=../../../sw/build/main.hex -``` - -or to execute all these three steps type: - -``` -make run-helloworld -``` - -### Compiling for VCS - -To simulate your application with VCS, first compile the HDL: - -``` -make vcs-sim -``` - -then, go to your target system built folder - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-vcs -``` - -and type to run your compiled software: - -``` -./openhwgroup.org_systems_core-v-mini-mcu_0 +firmware=../../../sw/build/main.hex -``` - -Waveforms can be viewed with Verdi. Make sure you have the env variable `VERDI_HOME` set to your Verdi install folder, then run your compiled software as above, but with the `-gui` flag: - -``` -./openhwgroup.org_systems_core-v-mini-mcu_0 +firmware=../../../sw/build/main.hex -gui -``` - -An Analog / Mixed-Signal simulation of X-HEEP, combining both the RTL system verilog files for the digital part and a SPICE file connected through a `control.init` file for the analog / mixed-signal part, can be ran by typing - -``` -make vcs-ams-sim -``` - -then going to the target system built folder - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-vcs -``` - -and running the same executable as for the digital simulation. Note that with Verdi you can view both the digital and the analog waveforms. - -Additional instructions on how to run an analog / mixed-signal simulation of X-HEEP can be found [here](./docs/source/How_to/AnalogMixedSignal.md). To try out the simulation, we provide an example SPICE netlist of an simple 1-bit ADC created by us and exported from [xschem](https://xschem.sourceforge.io/stefan/index.html) and which uses the PTM 65nm bulk CMOS model from [https://ptm.asu.edu](https://ptm.asu.edu/). - -### Compiling for Questasim - -To simulate your application with Questasim, first set the env variable `MODEL_TECH` to your Questasim bin folder, then compile the HDL: - -``` -make questasim-sim -``` - -then, go to your target system built folder - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-modelsim/ -``` - -and type to run your compiled software: - -``` -make run PLUSARGS="c firmware=../../../sw/build/main.hex" -``` - -You can also use vopt for HDL optimized compilation: - -``` -make questasim-sim-opt -``` - -then go to - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim_opt-modelsim/ -``` -and - -``` -make run RUN_OPT=1 PLUSARGS="c firmware=../../../sw/build/main.hex" -``` - -You can also compile with the UPF power domain description as: - -``` -make questasim-sim-opt-upf FUSESOC_PARAM="--USE_UPF" -``` - -and then execute software as: - -``` -make run RUN_OPT=1 RUN_UPF=1 PLUSARGS="c firmware=../../../sw/build/main.hex" -``` - -Questasim version must be >= Questasim 2020.4 - -### Compiling for Xcelium - -To simulate your application with Xcelium, first compile the HDL: - -``` -make xcelium-sim -``` - -then, go to your target system built folder - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-xcelium/ -``` - -and type to run your compiled software: - -``` -make run PLUSARGS="c firmware=../../../sw/build/main.hex" -``` - -### UART DPI - -To simulate the UART, we use the LowRISC OpenTitan [UART DPI](https://github.com/lowRISC/opentitan/tree/master/hw/dv/dpi/uartdpi). -Read how to interact with it in the Section "Running Software on a Verilator Simulation with Bazel" [here](https://opentitan.org/guides/getting_started/setup_verilator.html#running-software-on-a-verilator-simulation-with-bazel). -The output of the UART DPI module is printed in the `uart0.log` file in the simulation folder. - -For example, to see the "hello world!" output of the Verilator simulation: - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-verilator -./Vtestharness +firmware=../../../sw/build/main.hex -cat uart0.log -``` - -## Automatic testing - -X-HEEP includes two tools to perform automatic tests over your modifications. - -### Github CIs - -Upon push, tests are run on Github runners, these include: -* The generated `.sv` files pushed are equal to those generated in the runner (the code does not depend on the modification of generated files) -* Vendor is up to date (the code does not depend on the modification of vendorized files) -* All applications can be built successfully using both gcc and clang - -All test must be successful before PRs can be merged. - -### Simulation script - -Additionally, a `test_all.sh` script is provided. Apart from compiling all apps with both gcc and clang, it will simulate them and check the result. - -The available parameters are: -* COMPILER: `gcc` (default) or `clang` (can provide more than one) -* SIMULATOR: `verilator` (default), `questasim` or disable simulation with `nosim` (only one, the last provided is used). -* LINKER: `on_chip`(default), `flash_load` or `flash_exec` (can provide more than one) -* TIMEOUT: Integer number of seconds (default 120) - -#### Usage - -##### Comands -You can use two different commands to compile or simulate all the existing APPs: -``` -make app-compile-all -``` -``` -make app-simulate-all -``` -Note that both commands allow the previous parameters to specify compiling or simulation options. E.g.: -``` -make app-simulate-all LINKER=on_chip SIMULATOR=questasim COMPILER=clang TIMEOUT=150 -``` - -##### Manually -You can also **SOURCE** the script as -```bash -. util/test_all.sh on_chip questasim clang 150 -``` - -*Pay special attention to the first period in the command!* -You will be killing simulations that take too long, if you **EXECUTE** (`./test_all.sh`) this action kills the script. - -For both usages (commands or manual), the order of the arguments is irrelevant. - -> Note: Be sure to commit all your changes before running the script! - -* Applications that fail being built with gcc will not be simulated (skipped). -* Some applications are skipped by default for not being suitable for simulation. -* If a simulation takes too long (>timeout), it is killed. - -* Upon starting, the script will modify the `mcu_cfg.hjson` file to include all peripherals (so the largest number of apps can be run), re-generates the mcu and re-builds the simulation model for the chosen tool. -These changes can be reverted at the end of the execution (default). If changes were not commited, accepting this operation will revert them! - -The success of the script is not required for merging of a PR. - -## Debug - -Follow the [Debug](./docs/source/How_to/Debug.md) guide to debug core-v-mini-mcu. - -Alternatively, in case you are used to developing using Integrated Development Environments (IDEs), please check [the IDE readme](./IDEs.md). - -## Execute From Flash - -Follow the [ExecuteFromFlash](./docs/source/How_to/ExecuteFromFlash.md) guide to exxecute code directly from the FLASH with modelsim, FPGA, or ASIC. - -## Emulation on Xilinx FPGAs - -This project offers two different X-HEEP implementetions on Xilinx FPGAs, called Standalone and FEMU. - -### Standalone - -In this version, the X-HEEP architecture is implemented on the programmable logic (PL) side of the FPGA, and its input/output are connected to the available headers on the FPGA board. - -Two FPGA boards are supported: the Xilinx Pynq-z2 and Nexys-A7-100t. - -Make sure you have the FPGA board files installed in your Vivado. - -For example, for the Pynq-Z2 board, use the documentation provided at the following [link](https://pynq.readthedocs.io/en/v2.5/overlay_design_methodology/board_settings.html) to download and install them: - -To build and program the bitstream for your FPGA with vivado, type: - -``` -make vivado-fpga FPGA_BOARD=pynq-z2 -``` - -or - -``` -make vivado-fpga FPGA_BOARD=nexys-a7-100t -``` - -or add the flag `use_bscane_xilinx` to use the native Xilinx scanchain: - -``` -make vivado-fpga FPGA_BOARD=pynq-z2 FUSESOC_FLAGS=--flag=use_bscane_xilinx -``` - -Only Vivado 2021.2 has been tried. - -To program the bitstream, open Vivado, - -``` -open --> Hardware Manager --> Open Target --> Autoconnect --> Program Device -``` - -and choose the file `openhwgroup.org_systems_core-v-mini-mcu_0.bit`. - -Or simply type: - -``` -bash vivado-fpga-pgm FPGA_BOARD=pynq-z2 -``` - -or - -``` -make vivado-fpga-pgm FPGA_BOARD=nexys-a7-100t -``` - -To run SW, follow the [Debug](./docs/source/How_to/Debug.md) guide -to load the binaries with the HS2 cable over JTAG, -or follow the [ExecuteFromFlash](./docs/source/How_to/ExecuteFromFlash.md) -guide if you have a FLASH attached to the FPGA. - -Do not forget that the `pynq-z2` board requires you to have the ethernet cable attached to the board while running. - -For example, if you want to run your application using flash_exec, do as follow: - -compile your application, e.g. `make app PROJECT=example_matfadd TARGET=pynq-z2 ARCH=rv32imfc LINKER=flash_exec` - -and then follow the [ExecuteFromFlash](./docs/source/How_to/ExecuteFromFlash.md) to program the flash and set the boot buttons on the FPGA correctly. - -To look at the output of your printf, run in another terminal: - -`picocom -b 9600 -r -l --imap lfcrlf /dev/ttyUSB2` - -Please be sure to use the right `ttyUSB` number (you can discover it with `dmesg --time-format iso | grep FTDI` for example). - -### FPGA EMUlation Platform (FEMU) - -In this version, the X-HEEP architecture is implemented on the programmable logic (PL) side of the Xilinx Zynq-7020 chip on the Pynq-Z2 board and Linux is run on the ARM-based processing system (PS) side of the same chip. - -NOTE: This platform is not part of this repository, but you can access it with the following link: [FEMU](https://github.com/esl-epfl/x-heep-femu-sdk). - -# ASIC Implementation - -This project can be implemented using standard cells based ASIC flow. - -## Synthesis with Synopsys Design Compiler - -First, you need to provide technology-dependent implementations of some of the cells which require specific instantiation. - -Then, please provide a set_libs.tcl and set_constraints.tcl scripts to set link and target libraries, and constraints as the clock. - -To generate the `analyze` script for the synthesis scripts with DC, execute: - -``` -make asic -``` - -## OpenRoad support for SkyWater 130nm - -We are working on supporting OpenRoad and SkyWater 130nm PDK, please refer to the -[Implement on ASIC](./docs/source/How_to/ImplementASIC.md) page. This is not ready yet, it has not been tested. - -This relies on a fork of [edalize](https://github.com/davideschiavone/edalize) that contains templates for Design Compiler and OpenRoad. +``` \ No newline at end of file diff --git a/configs/ci.hjson b/configs/ci.hjson new file mode 100644 index 000000000..02ea10d82 --- /dev/null +++ b/configs/ci.hjson @@ -0,0 +1,23 @@ +{ + bus_type: "onetoM", + ram_banks: { + code_and_data: { + num: 6 + sizes: 32 + } + } + + linker_sections: + [ + { + name: code + start: 0 + #minimum size for freeRTOS and clang + size: 0x00000C800 + }, + { + name: data + start: 0x00000C800 + } + ] +} \ No newline at end of file diff --git a/configs/example.py b/configs/example.py new file mode 100644 index 000000000..b9e3faf2b --- /dev/null +++ b/configs/example.py @@ -0,0 +1,18 @@ +from x_heep_gen.linker_section import LinkerSection +from x_heep_gen.system import XHeep, BusType + +def config(): + system = XHeep(BusType.NtoM) + system.add_ram_banks([32] * 2) + system.add_ram_banks_il(2, 64, "data_interleaved") + + system.add_linker_section(LinkerSection.by_size("code", 0, 0x00000C800)) + system.add_linker_section(LinkerSection("data", 0x00000C800, None)) + + # Here the system is build, + # The missing gaps are filled, like the missing end address of the data section. + system.build() + if not system.validate(): + raise RuntimeError("there are errors") + + return system diff --git a/configs/example_interleaved.hjson b/configs/example_interleaved.hjson new file mode 100644 index 000000000..e831d7978 --- /dev/null +++ b/configs/example_interleaved.hjson @@ -0,0 +1,40 @@ +{ + bus_type: "NtoM", + ram_banks: { + code_and_data: { + sizes: 64 + } + more_data: { + type: continuous + num: 2 + sizes: 32 + } + data_interleaved: { + auto_section: auto + // the name is used by example_matadd_interleaved as .xheep_data_interleaved + type: interleaved + num: 4 + size: 16 + } + data_interleaved_2: { + auto_section: auto + type: interleaved + num: 2 + size: 16 + } + } + + linker_sections: + [ + { + name: code + start: 0 + // minimum size for freeRTOS and clang + size: 0x00000C800 + }, + { + name: data + start: 0x00000C800 + } + ] +} \ No newline at end of file diff --git a/configs/general.hjson b/configs/general.hjson new file mode 100644 index 000000000..d3465c500 --- /dev/null +++ b/configs/general.hjson @@ -0,0 +1,24 @@ +{ + ram_address: 0 + bus_type: "onetoM", + ram_banks: { + code_and_data: { + num: 2 + sizes: [32] + } + } + + linker_sections: + [ + { + name: code + start: 0 + #minimum size for freeRTOS and clang + size: 0x00000C800 + }, + { + name: data + start: 0x00000C800 + } + ] +} \ No newline at end of file diff --git a/configs/testall.hjson b/configs/testall.hjson new file mode 100644 index 000000000..07a96b645 --- /dev/null +++ b/configs/testall.hjson @@ -0,0 +1,23 @@ +{ + bus_type: "onetoM", + ram_banks: { + code_and_data: { + num: 3 + sizes: 32 + } + } + + linker_sections: + [ + { + name: code + start: 0 + #minimum size for freeRTOS and clang + size: 0x00000C800 + }, + { + name: data + start: 0x00000C800 + } + ] +} \ No newline at end of file diff --git a/core-v-mini-mcu-fpga.core b/core-v-mini-mcu-fpga.core new file mode 100644 index 000000000..38663aea9 --- /dev/null +++ b/core-v-mini-mcu-fpga.core @@ -0,0 +1,72 @@ +CAPI=2: + +# Copyright 2024 EPFL +# Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +name: openhwgroup.org:systems:core-v-mini-mcu-fpga +description: CORE-V MINI-MCU FPGA related files. + +filesets: + rtl-fpga: + depend: + - x-heep::packages + files: + - hw/fpga/sram_wrapper.sv + file_type: systemVerilogSource + + ip-fpga: + files: + - hw/fpga/scripts/generate_sram.tcl: { file_type: tclSource } + - hw/fpga/prim_xilinx_clk.sv: { file_type: systemVerilogSource } + - hw/fpga/cv32e40p_xilinx_clock_gate.sv: { file_type: systemVerilogSource } + - hw/fpga/cv32e40x_xilinx_clock_gate.sv: { file_type: systemVerilogSource } + - hw/fpga/cve2_xilinx_clock_gate.sv: { file_type: systemVerilogSource } + - hw/fpga/cv32e40px_xilinx_clock_gate.sv: { file_type: systemVerilogSource } + - hw/fpga/pad_cell_input_xilinx.sv: { file_type: systemVerilogSource } + - hw/fpga/pad_cell_output_xilinx.sv: { file_type: systemVerilogSource } + - hw/fpga/pad_cell_inout_xilinx.sv: { file_type: systemVerilogSource } + - hw/fpga/pad_cell_bypass_input_xilinx.sv: { file_type: systemVerilogSource } + - hw/fpga/pad_cell_bypass_output_xilinx.sv: { file_type: systemVerilogSource } + + ip-fpga-pynq-z2: + files: + - hw/fpga/scripts/pynq-z2/xilinx_generate_clk_wizard.tcl: { file_type: tclSource } + + ip-fpga-nexys: + files: + - hw/fpga/scripts/nexys/xilinx_generate_clk_wizard.tcl: { file_type: tclSource } + + ip-fpga-zcu104: + files: + - hw/fpga/scripts/zcu104/xilinx_generate_clk_wizard.tcl: { file_type: tclSource } + + xdc-fpga-nexys: + files: + - hw/fpga/constraints/nexys/pin_assign.xdc + - hw/fpga/constraints/nexys/constraints.xdc + file_type: xdc + + xdc-fpga-pynq-z2: + files: + - hw/fpga/constraints/pynq-z2/pin_assign.xdc + - hw/fpga/constraints/pynq-z2/constraints.xdc + file_type: xdc + + xdc-fpga-zcu104: + files: + - hw/fpga/constraints/zcu104/pin_assign.xdc + file_type: xdc + + +targets: + default: &default_target + filesets: + - rtl-fpga + - ip-fpga + - target_pynq-z2 ? (ip-fpga-pynq-z2) + - target_pynq-z2 ? (xdc-fpga-pynq-z2) + - target_nexys-a7-100t ? (ip-fpga-nexys) + - target_nexys-a7-100t ? (xdc-fpga-nexys) + - target_zcu104 ? (ip-fpga-zcu104) + - target_zcu104 ? (xdc-fpga-zcu104) diff --git a/core-v-mini-mcu.core b/core-v-mini-mcu.core index e4dc21c8e..8dac12332 100644 --- a/core-v-mini-mcu.core +++ b/core-v-mini-mcu.core @@ -95,35 +95,12 @@ filesets: file_type: vlt rtl-fpga: + depend: + - openhwgroup.org:systems:core-v-mini-mcu-fpga files: - hw/fpga/xilinx_core_v_mini_mcu_wrapper.sv - - hw/fpga/sram_wrapper.sv file_type: systemVerilogSource - ip-fpga: - files: - - hw/fpga/scripts/generate_sram.tcl: { file_type: tclSource } - - hw/fpga/prim_xilinx_clk.sv: { file_type: systemVerilogSource } - - hw/fpga/cv32e40p_xilinx_clock_gate.sv: { file_type: systemVerilogSource } - - hw/fpga/cv32e40x_xilinx_clock_gate.sv: { file_type: systemVerilogSource } - - hw/fpga/cve2_xilinx_clock_gate.sv: { file_type: systemVerilogSource } - - hw/fpga/cv32e40px_xilinx_clock_gate.sv: { file_type: systemVerilogSource } - - hw/fpga/pad_cell_input_xilinx.sv: { file_type: systemVerilogSource } - - hw/fpga/pad_cell_output_xilinx.sv: { file_type: systemVerilogSource } - - hw/fpga/pad_cell_inout_xilinx.sv: { file_type: systemVerilogSource } - - hw/fpga/pad_cell_bypass_input_xilinx.sv: { file_type: systemVerilogSource } - - hw/fpga/pad_cell_bypass_output_xilinx.sv: { file_type: systemVerilogSource } - - ip-fpga-pynq-z2: - files: - - hw/fpga/scripts/pynq-z2/set_board.tcl: { file_type: tclSource } - - hw/fpga/scripts/pynq-z2/xilinx_generate_clk_wizard.tcl: { file_type: tclSource } - - ip-fpga-nexys: - files: - - hw/fpga/scripts/nexys/set_board.tcl: { file_type: tclSource } - - hw/fpga/scripts/nexys/xilinx_generate_clk_wizard.tcl: { file_type: tclSource } - ip-asic: depend: - technology::prim_mytech @@ -142,23 +119,6 @@ filesets: files: - hw/asic/sky130/sky130_sram_4kbyte_1rw_32x1024_8_TT_1p8V_25C.lib : { copyto: lib/sky130_sram_4kbyte_1rw_32x1024_8_TT_1p8V_25C.lib } - xdc-fpga-nexys: - files: - - hw/fpga/constraints/nexys/pin_assign.xdc - - hw/fpga/constraints/nexys/constraints.xdc - file_type: xdc - - xdc-fpga-pynq-z2: - files: - - hw/fpga/constraints/pynq-z2/pin_assign.xdc - - hw/fpga/constraints/pynq-z2/constraints.xdc - file_type: xdc - - netlist-fpga: - files: - - build/openhwgroup.org_systems_core-v-mini-mcu_0/nexys-a7-100t-vivado/core_v_mini_mcu_xiling_postsynth.v - file_type: verilogSource - # Scripts for hooks post_build_modelsim_scripts: files: @@ -193,6 +153,8 @@ filesets: tb-verilator: files: + - tb/XHEEP_CmdLineOptions.hh: { is_include_file: true } + - tb/XHEEP_CmdLineOptions.cpp - tb/tb_top.cpp file_type: cppSource @@ -201,6 +163,14 @@ filesets: - tb/tb_top.sv file_type: systemVerilogSource + tb-sc-verilator: + files: + - tb/XHEEP_CmdLineOptions.hh: { is_include_file: true } + - tb/XHEEP_CmdLineOptions.cpp + - tb/tb_sc_top.cpp + file_type: cppSource + + openroad_base_files: files: - flow/OpenROAD-flow-scripts/flow/Makefile : {file_type: Makefile} @@ -261,10 +231,18 @@ parameters: datatype: bool paramtype: vlogdefine default: false + SIM_SYSTEMC: + datatype: bool + paramtype: vlogdefine + default: false FPGA_NEXYS: datatype: bool paramtype: vlogdefine default: false + FPGA_ZCU104: + datatype: bool + paramtype: vlogdefine + default: false # Make the parameter known to FuseSoC to enable overrides from the # command line. If not overwritten, use the generic technology library. PRIM_DEFAULT_IMPL: @@ -301,6 +279,8 @@ targets: - files_rtl_generic - target_sim ? (rtl-simulation) - target_sim ? (tool_verilator? (files_verilator_waiver)) + - target_sim_sc ? (rtl-simulation) + - target_sim_sc ? (tool_verilator? (files_verilator_waiver)) toplevel: [core_v_mini_mcu] sim: @@ -392,6 +372,40 @@ targets: - '-LDFLAGS "-pthread -lutil -lelf"' - "-Wall" + sim_sc: + <<: *default_target + default_tool: modelsim + filesets_append: + - tb-utils + - tool_verilator? (tb-sc-verilator) + - "!integrated_heep? (x_heep_system)" + toplevel: + - tool_verilator? (testharness) + parameters: + - COREV_PULP + - FPU + - JTAG_DPI + - X_EXT + - USE_EXTERNAL_DEVICE_EXAMPLE + - USE_UPF + - REMOVE_OBI_FIFO + - SIM_SYSTEMC=true + tools: + verilator: + mode: sc + verilator_options: + - '--sc' + - '--trace' + - '--trace-structs' + - '--trace-params' + - '--trace-max-array 1024' + - '--x-assign unique' + - '--x-initial unique' + - '--exe tb_sc_top.cpp' + - '-CFLAGS "-std=c++11 -Wall -g -fpermissive"' + - '-LDFLAGS "-pthread -lutil -lelf $(SYSTEMC_LIBDIR)/libsystemc.a"' + - "-Wall" + nexys-a7-100t: <<: *default_target default_tool: vivado @@ -399,9 +413,6 @@ targets: filesets_append: - x_heep_system - rtl-fpga - - ip-fpga-nexys - - ip-fpga - - xdc-fpga-nexys parameters: - COREV_PULP - FPU @@ -412,6 +423,8 @@ targets: tools: vivado: part: xc7a100tcsg324-1 + board_part: digilentinc.com:nexys-a7-100t:part0:1.3 + board_repo_paths: [../../../hw/fpga/board_files/vendor/esl_epfl_nexys_a7_100t_board_files] toplevel: [xilinx_core_v_mini_mcu_wrapper] pynq-z2: @@ -421,9 +434,6 @@ targets: filesets_append: - x_heep_system - rtl-fpga - - ip-fpga-pynq-z2 - - ip-fpga - - xdc-fpga-pynq-z2 parameters: - COREV_PULP - FPU @@ -433,8 +443,31 @@ targets: tools: vivado: part: xc7z020clg400-1 + board_part: tul.com.tw:pynq-z2:part0:1.0 + board_repo_paths: [../../../hw/fpga/board_files/vendor/esl_epfl_pynq_z2_board_files] toplevel: [xilinx_core_v_mini_mcu_wrapper] + zcu104: + <<: *default_target + default_tool: vivado + description: ZCU104 Evaluation Board + filesets_append: + - x_heep_system + - rtl-fpga + parameters: + - COREV_PULP + - FPU + - X_EXT + - SYNTHESIS=true + - REMOVE_OBI_FIFO + - FPGA_ZCU104=true + tools: + vivado: + part: xczu7ev-ffvc1156-2-e + board_part: xilinx.com:zcu104:part0:1.0 + board_repo_paths: [../../../hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files] + toplevel: [xilinx_core_v_mini_mcu_wrapper] + asic_synthesis: <<: *default_target default_tool: design_compiler diff --git a/core-v-mini-mcu.upf.tpl b/core-v-mini-mcu.upf.tpl index 5d687f9ac..378a74a68 100644 --- a/core-v-mini-mcu.upf.tpl +++ b/core-v-mini-mcu.upf.tpl @@ -13,8 +13,8 @@ set_scope . create_power_domain PD_TOP -include_scope create_power_domain PD_CPU -elements {cpu_subsystem_i} create_power_domain PD_PERIP_SUBS -elements {peripheral_subsystem_i} -% for bank in range(ram_numbanks): -create_power_domain PD_MEM_BANK_${bank} -elements {memory_subsystem_i/gen_sram[${bank}].ram_i} +% for bank in xheep.iter_ram_banks(): +create_power_domain PD_MEM_BANK_${bank.name()} -elements {memory_subsystem_i/ram${bank.name()}_i} % endfor @@ -39,11 +39,11 @@ add_power_state PD_PERIP_SUBS.primary -state PERIP_SUBS_ON <%text>\ add_power_state PD_PERIP_SUBS.primary -state PERIP_SUBS_OFF <%text>\ {-supply_expr {power == `{OFF} && ground == `{FULL_ON, 0.0}}} -simstate CORRUPT -% for bank in range(ram_numbanks): -add_power_state PD_MEM_BANK_${bank}.primary -state MEM_BANK_${bank}_ON <%text>\ +% for bank in xheep.iter_ram_banks(): +add_power_state PD_MEM_BANK_${bank.name()}.primary -state MEM_BANK_${bank.name()}_ON <%text>\ {-supply_expr {power == `{FULL_ON, 1.2} && ground == `{FULL_ON, 0.0}}} -add_power_state PD_MEM_BANK_${bank}.primary -state MEM_BANK_${bank}_OFF <%text>\ +add_power_state PD_MEM_BANK_${bank.name()}.primary -state MEM_BANK_${bank.name()}_OFF <%text>\ {-supply_expr {power == `{OFF} && ground == `{FULL_ON, 0.0}}} -simstate CORRUPT % endfor @@ -71,9 +71,9 @@ create_supply_set PD_CPU.primary -function {power VDD_CPU} -function {ground VSS create_supply_net VDD_PERIP_SUBS create_supply_set PD_PERIP_SUBS.primary -function {power VDD_PERIP_SUBS} -function {ground VSS} -update -% for bank in range(ram_numbanks): -create_supply_net VDD_MEM_BANK_${bank} -create_supply_set PD_MEM_BANK_${bank}.primary -function {power VDD_MEM_BANK_${bank}} -function {ground VSS} -update +% for bank in xheep.iter_ram_banks(): +create_supply_net VDD_MEM_BANK_${bank.name()} +create_supply_set PD_MEM_BANK_${bank.name()}.primary -function {power VDD_MEM_BANK_${bank.name()}} -function {ground VSS} -update % endfor @@ -103,14 +103,14 @@ create_power_switch switch_PD_PERIP_SUBS <%text>\ -on_state {on_state sw_in {sw_ctrl}} <%text>\ -off_state {off_state {!sw_ctrl}} -% for bank in range(ram_numbanks): -create_power_switch switch_PD_MEM_BANK_${bank} <%text>\ +% for bank in xheep.iter_ram_banks(): +create_power_switch switch_PD_MEM_BANK_${bank.name()} <%text>\ -supply_set PD_TOP.primary <%text>\ - -domain PD_MEM_BANK_${bank} <%text>\ + -domain PD_MEM_BANK_${bank.name()} <%text>\ -input_supply_port {sw_in VDD} <%text>\ - -output_supply_port {sw_out VDD_MEM_BANK_${bank}} <%text>\ - -control_port {sw_ctrl memory_subsystem_banks_powergate_switch_no[${bank}]} <%text>\ - -ack_port {sw_ack memory_subsystem_banks_powergate_switch_ack_ni[${bank}]} <%text>\ + -output_supply_port {sw_out VDD_MEM_BANK_${bank.name()}} <%text>\ + -control_port {sw_ctrl memory_subsystem_banks_powergate_switch_no[${bank.name()}]} <%text>\ + -ack_port {sw_ack memory_subsystem_banks_powergate_switch_ack_ni[${bank.name()}]} <%text>\ -on_state {on_state sw_in {sw_ctrl}} <%text>\ -off_state {off_state {!sw_ctrl}} @@ -144,12 +144,12 @@ set_isolation perip_subs_iso <%text>\ -name_prefix cpu_iso_cell <%text>\ -location parent -% for bank in range(ram_numbanks): -set_isolation mem_bank_${bank}_iso <%text>\ - -domain PD_MEM_BANK_${bank} <%text>\ +% for bank in xheep.iter_ram_banks(): +set_isolation mem_bank_${bank.name()}_iso <%text>\ + -domain PD_MEM_BANK_${bank.name()} <%text>\ -isolation_power_net VDD <%text>\ -isolation_ground_net VSS <%text>\ - -isolation_signal memory_subsystem_banks_powergate_iso_n[${bank}] <%text>\ + -isolation_signal memory_subsystem_banks_powergate_iso_n[${bank.name()}] <%text>\ -isolation_sense low <%text>\ -clamp_value 0 <%text>\ -applies_to outputs <%text>\ diff --git a/docs/requirements.txt b/docs/requirements.txt index cf9a675bb..a4cd7e7b5 100644 --- a/docs/requirements.txt +++ b/docs/requirements.txt @@ -1,2 +1,3 @@ sphinx-rtd-theme -myst-parser \ No newline at end of file +myst-parser +sphinxcontrib-apidoc diff --git a/docs/source/Configuration/Configuration.rst b/docs/source/Configuration/Configuration.rst new file mode 100644 index 000000000..c18a4e26b --- /dev/null +++ b/docs/source/Configuration/Configuration.rst @@ -0,0 +1,172 @@ +Configuration +============= + +Project Configuration +--------------------- +The project can be configured either by a hjson file or a python script. +The default configurations and examples are located in the `config` directory. + +Hjson Configuration File +~~~~~~~~~~~~~~~~~~~~~~~~ + +.. literalinclude:: ../../../configs/example_interleaved.hjson + :language: js + +Bus Type Configuration +^^^^^^^^^^^^^^^^^^^^^^ +The bus type can either be configured to `NtoM` or `onetoM`. + +Ram Bank Configuration +^^^^^^^^^^^^^^^^^^^^^^ + +The system can be configured with banks of different sizes. The sizes should be a power of two in kiB. +All banks are configured in one continuous address region. + + +To configure interleaved banks the number and the size of the banks have to be provided. +The following restrictions apply: All banks must have the same size and a power of two banks must be configured. + + +For continuous banks the default mode, only the `sizes` filed is required. +It can be either the size in kiB of a single bank, +a dictionary of the same format containing more banks, or a list of multiple entries. +If the `num` field is also provided the configuration in the `sizes` field is repeated `num` times. + +.. code:: js + + ram_banks: { + code: {sizes: 64} // configure just one bank + + data: { + type: continuous // the default, can be omitted + num: 2 + sizes: 32 + } + + alt_data: {sizes: [32, 32]} // the same as data but with a list + + more_complex: { + // This also works recursively so we can easily have different sizes of banks + // and in bigger numbers without listing them all one by one. + sizes: [ + { + num: 4 + sizes: 8 + }, + { + num: 16 + sizes: 4 + }, + ] + } + + } + + +Linker Section Configuration +^^^^^^^^^^^^^^^^^^^^^^^^^^^^ +The linker script sections can be configured in two ways. +Either a section can be matched with a group of ram banks +or it can be manually defined. + +To automatically add a section the entry `auto_section: auto` should be added to the banks. +It will add a section that matches the banks exactly, it can only be used in the non recursive part. +The name of the section is set with the name of the group of banks. + +When manually setting the the linker section the name and the start address have to be provided. +Additionally the end is either provided by it's address or by the size of the section. +If no end is provided the end is inferred by the start of the next linker section. +Or if no section follows, the end address of the last ram bank. + +Both configuration types can be freely combined as long as no section overlap. +All sections will e sorted by the configuration system. + +The first two sections should always be code and data. +The other name can be used in code with a `.xheep_` prefix, like in `example_matadd_interleaved` + +.. code:: c + + int32_t __attribute__((section(".xheep_data_interleaved"))) m_c[16*16]; + +.. code:: js + + { + ram_address: 0 + bus_type: "onetoM", + ram_banks: { + code_and_data: { + num: 2 + sizes: [32] + } + i_am_a_section_name: { + auto_section: auto + sizes: 16 + } + } + + linker_sections: + [ + { + name: code + start: 0 + + // Alternatively the end tag can be used to provide the end. + size: 0x00000C800 + }, + { + name: data + start: 0x00000C800 + // The end of this section will be at the beginning of the next. + // In this example the next section is i_am_a_section_name + } + ] + } + + + + +Python Configuration +~~~~~~~~~~~~~~~~~~~~ + +The same can be done by using a python script + +.. literalinclude:: ../../../configs/example.py + :language: python + + +The script should include a config function that return an :py:class:`x_heep_gen.system.XHeep` instance. +The configuration is simmilar to the hjson one. The order in which sections are added is also the one used in hardware. +The script writer is responsible to call :py:meth:`x_heep_gen.system.XHeep.build` and :py:meth:`x_heep_gen.system.XHeep.validate` and to raise an error in case of failure. +The first does make the system ready to be used and the second does check for errors in the configuration. + + + +Select Configuration File +~~~~~~~~~~~~~~~~~~~~~~~~~ + +To configure the system call + +.. code:: bash + + make mcu-gen X_HEEP_CFG=configs/name_of_file.hjson + + + +Other configurations +~~~~~~~~~~~~~~~~~~~~ + +The pads are configured in `pad_cfg.hjson`. + +One part of the configuration is in `mcu_cfg.hjson`. + +Additionally if a `hjson` file is ussed for configuration the following parameters can be set to the make command to override the configuration: + +- `BUS=NtoM,onetoM` +- `MEMORY_BANKS=integer` +- `MEMORY_BANKS_IL=integer` + +They will replace the configuration used in the configuration file. +When one parameter is not provided the configuration files value is used. +The memory banks configured this way will only be of size 32kiB. +For compatibility reasons `MEMORY_BANKS` does not create linker sections while `MEMORY_BANKS_IL` does create a linker section. + diff --git a/docs/source/How_to/AnalogMixedSignal.md b/docs/source/How_to/AnalogMixedSignal.md index 8ba4ed5c3..841b8a882 100644 --- a/docs/source/How_to/AnalogMixedSignal.md +++ b/docs/source/How_to/AnalogMixedSignal.md @@ -1,4 +1,4 @@ -# integrate Analog / Mixed-Signal simulations +# Integrate Analog / Mixed-Signal simulations ## About diff --git a/docs/source/How_to/CompileMakefile b/docs/source/How_to/CompileMakefile deleted file mode 100644 index 2db8a17d0..000000000 --- a/docs/source/How_to/CompileMakefile +++ /dev/null @@ -1,350 +0,0 @@ -# compile with Makefile - -You can compile the example applications and the platform using the Makefile. Type 'make help' or 'make' for more information. Moreover, please, check the different 'clean' commands to verify that you are using the corret one. - -## Generate core-v-mini-mcu package - -First, you have to generate the SystemVerilog package and C header file of the core-v-mini-mcu: - -``` -make mcu-gen -``` - -To change the default cpu type (i.e., cv32e20), the default bus type (i.e., onetoM), -the default continuous memory size (i.e., 2 continuous banks) or the default interleaved memory size (i.e., 0 interleaved banks): - -``` -make mcu-gen CPU=cv32e40p BUS=NtoM MEMORY_BANKS=12 MEMORY_BANKS_IL=4 -``` - -The last command generates x-heep with the cv32e40p core, with a parallel bus, and 16 memory banks (12 continuous and 4 interleaved), -each 32KB, for a total memory of 512KB. - -## Compiling Software - -Don't forget to set the `RISCV` env variable to the compiler folder (without the `/bin` included). -To run 'hello world' application, just type 'make app'. - -``` -make app -``` - -To run any other application, please use the following command with appropiate parameters: - -``` -app PROJECT= TARGET=sim(default),pynq-z2 LINKER=on_chip(default),flash_load,flash_exec COMPILER=gcc(default),clang COMPILER_PREFIX=riscv32-unknown-(default) ARCH=rv32imc(default), - -Params: -- PROJECT (ex: , hello_world(default)) -- TARGET (ex: sim(default),pynq-z2) -- LINKER (ex: on_chip(default),flash_load,flash_exec) -- COMPILER (ex: gcc(default),clang) -- COMPILER_PREFIX (ex: riscv32-unknown-(default)) -- ARCH (ex: rv32imc(default),) -``` - -For instance, to run 'hello world' app for the pynq-z2 FPGA targets, just run: - -``` -make app TARGET=pynq-z2 -``` - -Or, if you use the OpenHW Group [GCC](https://www.embecosm.com/resources/tool-chain-downloads/#corev) compiler with CORE_PULP extensions, make sure to point the `RISCV` env variable to the OpenHW Group compiler, then just run: - - -``` -make app COMPILER_PREFIX=riscv32-corev- ARCH=rv32imc_zicsr_zifencei_xcvhwlp1p0_xcvmem1p0_xcvmac1p0_xcvbi1p0_xcvalu1p0_xcvsimd1p0_xcvbitmanip1p0 -``` - -This will create the executable file to be loaded in your target system (ASIC, FPGA, Simulation). -Remember that, `X-HEEP` is using CMake to compile and link. Thus, the generated files after having -compiled and linked are under `sw\build` - -## FreeROTS based applications - -'X-HEEP' supports 'FreeRTOS' based applications. Please see `sw\applications\blinky_freertos`. - -After that, you can run the command to compile and link the FreeRTOS based application. Please also set 'LINKER' and 'TARGET' parameters if needed. - -``` -make app PROJECT=blinky_freertos -``` - -The main FreeRTOS configuration is allocated under `sw\freertos`, in `FreeRTOSConfig.h`. Please, change this file based on your application requirements. -Moreover, FreeRTOS is being fetch from 'https://github.com/FreeRTOS/FreeRTOS-Kernel.git' by CMake. Specifically, 'V10.5.1' is used. Finally, the fetch repository is located under `sw\build\_deps` after building. - -## Simulating - -This project supports simulation with Verilator, Synopsys VCS, and Siemens Questasim. -It relies on `fusesoc` to handle multiple EDA tools and parameters. -For example, if you want to set the `FPU` and `COREV_PULP` parameters of the `cv32e40p` CPU, -you need to add next to your compilation command `FUSESOC_PARAM="--COREV_PULP=1 --FPU=1"` -Below the different EDA examples commands. - -### Compiling for Verilator - -To simulate your application with Verilator, first compile the HDL: - -``` -make verilator-sim -``` - -then, go to your target system built folder - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-verilator -``` - -and type to run your compiled software: - -``` -./Vtestharness +firmware=../../../sw/build/main.hex -``` - -or to execute all these three steps type: - -``` -make run-helloworld -``` - -### Compiling for VCS - -To simulate your application with VCS, first compile the HDL: - -``` -make vcs-sim -``` - -then, go to your target system built folder - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-vcs -``` - -and type to run your compiled software: - -``` -./openhwgroup.org_systems_core-v-mini-mcu_0 +firmware=../../../sw/build/main.hex -``` - -Waveforms can be viewed with Verdi. Make sure you have the env variable `VERDI_HOME` set to your Verdi install folder, then run your compiled software as above, but with the `-gui` flag: - -``` -./openhwgroup.org_systems_core-v-mini-mcu_0 +firmware=../../../sw/build/main.hex -gui -``` - -An Analog / Mixed-Signal simulation of X-HEEP, combining both the RTL system verilog files for the digital part and a SPICE file connected through a `control.init` file for the analog / mixed-signal part, can be ran by typing - -``` -make vcs-ams-sim -``` - -then going to the target system built folder - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-vcs -``` - -and running the same executable as for the digital simulation. Note that with Verdi you can view both the digital and the analog waveforms. - -Additional instructions on how to run an analog / mixed-signal simulation of X-HEEP can be found [here](AnalogMixedSignal.md). To try out the simulation, we provide an example SPICE netlist of an simple 1-bit ADC created by us and exported from [xschem](https://xschem.sourceforge.io/stefan/index.html) and which uses the PTM 65nm bulk CMOS model from [https://ptm.asu.edu](https://ptm.asu.edu/). - -### Compiling for Questasim - -To simulate your application with Questasim, first set the env variable `MODEL_TECH` to your Questasim bin folder, then compile the HDL: - -``` -make questasim-sim -``` - -then, go to your target system built folder - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-modelsim/ -``` - -and type to run your compiled software: - -``` -make run PLUSARGS="c firmware=../../../sw/build/main.hex" -``` - -You can also use vopt for HDL optimized compilation: - -``` -make questasim-sim-opt -``` - -then go to - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim_opt-modelsim/ -``` -and - -``` -make run RUN_OPT=1 PLUSARGS="c firmware=../../../sw/build/main.hex" -``` - -You can also compile with the UPF power domain description as: - -``` -make questasim-sim-opt-upf FUSESOC_PARAM="--USE_UPF" -``` - -and then execute software as: - -``` -make run RUN_OPT=1 RUN_UPF=1 PLUSARGS="c firmware=../../../sw/build/main.hex" -``` - -Questasim version must be >= Questasim 2020.4 - -### UART DPI - -To simulate the UART, we use the LowRISC OpenTitan [UART DPI](https://github.com/lowRISC/opentitan/tree/master/hw/dv/dpi/uartdpi). -Read how to interact with it in the Section "Running Software on a Verilator Simulation with Bazel" [here](https://opentitan.org/guides/getting_started/setup_verilator.html#running-software-on-a-verilator-simulation-with-bazel). -The output of the UART DPI module is printed in the `uart0.log` file in the simulation folder. - -For example, to see the "hello world!" output of the Verilator simulation: - -``` -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-verilator -./Vtestharness +firmware=../../../sw/build/main.hex -cat uart0.log -``` - -## Automatic testing - -X-HEEP includes two tools to perform automatic tests over your modifications. - -### Github CIs - -Upon push, tests are run on Github runners, these include: -* The generated `.sv` files pushed are equal to those generated in the runner (the code does not depend on the modification of generated files) -* Vendor is up to date (the code does not depend on the modification of vendorized files) -* All applications can be built successfully using both gcc and clang - -All test must be successful before PRs can be merged. - -### Simulation script - -Additionally, a `test_all.sh` script is provided. Apart from compiling all apps with both gcc and clang, it will simulate them and check the result. - -The available parameters are: -* COMPILER: `gcc` (default) or `clang` (can provide more than one) -* SIMULATOR: `verilator` (default), `questasim` or disable simulation with `nosim` (only one, the last provided is used). -* LINKER: `on_chip`(default), `flash_load` or `flash_exec` (can provide more than one) -* TIMEOUT: Integer number of seconds (default 120) - -#### Usage - -##### Comands -You can use two different commands to compile or simulate all the existing APPs: -``` -make app-compile-all -``` -``` -make app-simulate-all -``` -Note that both commands allow the previous parameters to specify compiling or simulation options. E.g.: -``` -make app-simulate-all LINKER=on_chip SIMULATOR=questasim COMPILER=clang TIMEOUT=150 -``` - -##### Manually -You can also **SOURCE** the script as -```bash -. util/test_all.sh on_chip questasim clang 150 -``` - -*Pay special attention to the first period in the command!* -You will be killing simulations that take too long, if you **EXECUTE** (`./test_all.sh`) this action kills the script. - -For both usages (commands or manual), the order of the arguments is irrelevant. - -> Note: Be sure to commit all your changes before running the script! - -* Applications that fail being built with gcc will not be simulated (skipped). -* Some applications are skipped by default for not being suitable for simulation. -* If a simulation takes too long (>timeout), it is killed. - -* Upon starting, the script will modify the `mcu_cfg.hjson` file to include all peripherals (so the largest number of apps can be run), re-generates the mcu and re-builds the simulation model for the chosen tool. -These changes can be reverted at the end of the execution (default). If changes were not commited, accepting this operation will revert them! - -The success of the script is not required for merging of a PR. - -## Debug - -Follow the [Debug](./Debug.md) guide to debug core-v-mini-mcu. - -## Execute From Flash - -Follow the [ExecuteFromFlash](./ExecuteFromFlash.md) guide to exxecute code directly from the FLASH with modelsim, FPGA, or ASIC. - -## Emulation on Xilinx FPGAs - -This project offers two different X-HEEP implementetions on Xilinx FPGAs, called Standalone and FEMU. - -### Standalone - -In this version, the X-HEEP architecture is implemented on the programmable logic (PL) side of the FPGA, and its input/output are connected to the available headers on the FPGA board. - -Two FPGA boards are supported: the Xilinx Pynq-z2 and Nexys-A7-100t. - -Make sure you have the FPGA board files installed in your Vivado. - -For example, for the Pynq-Z2 board, use the documentation provided at the following [link](https://pynq.readthedocs.io/en/v2.5/overlay_design_methodology/board_settings.html) to download and install them: - -To build and program the bitstream for your FPGA with vivado, type: - -``` -make vivado-fpga FPGA_BOARD=pynq-z2 -``` - -or - -``` -make vivado-fpga FPGA_BOARD=nexys-a7-100t -``` - -or add the flag `use_bscane_xilinx` to use the native Xilinx scanchain: - -``` -make vivado-fpga FPGA_BOARD=pynq-z2 FUSESOC_FLAGS=--flag=use_bscane_xilinx -``` - -Only Vivado 2021.2 has been tried. - -To program the bitstream, open Vivado, - -``` -open --> Hardware Manager --> Open Target --> Autoconnect --> Program Device -``` - -and choose the file `openhwgroup.org_systems_core-v-mini-mcu_0.bit` - -To run SW, follow the [Debug](./Debug.md) guide -to load the binaries with the HS2 cable over JTAG, -or follow the [ExecuteFromFlash](./ExecuteFromFlash.md) -guide if you have a FLASH attached to the FPGA. - -Do not forget that the `pynq-z2` board requires you to have the ethernet cable attached to the board while running. - -For example, if you want to run your application using flash_exec, do as follow: - -compile your application, e.g. `make app PROJECT=example_matfadd TARGET=pynq-z2 ARCH=rv32imfc LINKER=flash_exec` - -and then follow the [ExecuteFromFlash](./ExecuteFromFlash.md) to program the flash and set the boot buttons on the FPGA correctly. - -To look at the output of your printf, run in another terminal: - -`picocom -b 9600 -r -l --imap lfcrlf /dev/ttyUSB2` - -Please be sure to use the right `ttyUSB` number (you can discover it with `dmesg --time-format iso | grep FTDI` for example). - -### FPGA EMUlation Platform (FEMU) - -In this version, the X-HEEP architecture is implemented on the programmable logic (PL) side of the Xilinx Zynq-7020 chip on the Pynq-Z2 board and Linux is run on the ARM-based processing system (PS) side of the same chip. - -NOTE: This platform is not part of this repository, but you can access it with the following link: [FEMU](https://github.com/esl-epfl/x-heep-femu-sdk). diff --git a/docs/source/How_to/CompileMakefile.md b/docs/source/How_to/CompileMakefile.md new file mode 100644 index 000000000..774f8ae3e --- /dev/null +++ b/docs/source/How_to/CompileMakefile.md @@ -0,0 +1,145 @@ +# Compile with Makefile + +You can compile the example applications and the platform using the Makefile. Type 'make help' or 'make' for more information. Moreover, please, check the different 'clean' commands to verify that you are using the corret one. + +## Generate core-v-mini-mcu package + +First, you have to generate the SystemVerilog package and C header file of the core-v-mini-mcu: + +``` +make mcu-gen +``` + +To change the default cpu type (i.e., cv32e20), the default bus type (i.e., onetoM), +the default continuous memory size (i.e., 2 continuous banks) or the default interleaved memory size (i.e., 0 interleaved banks): + +``` +make mcu-gen CPU=cv32e40p BUS=NtoM MEMORY_BANKS=12 MEMORY_BANKS_IL=4 +``` + +The last command generates x-heep with the cv32e40p core, with a parallel bus, and 16 memory banks (12 continuous and 4 interleaved), +each 32KB, for a total memory of 512KB. +This method is limmited to 32KB banks. + +To configure the ram banks with more flexibility, edit `configs/general.hjson` or provided your own one. +Both method work together the first one overrides the second. + +``` +make mcu-gen X_HEEP_CFG=configs/my_config.hjson +``` + +For more information see Configuration section. + +## Compiling Software + +Don't forget to set the `RISCV` env variable to the compiler folder (without the `/bin` included). +To run 'hello world' application, just type 'make app'. + +``` +make app +``` + +To run any other application, please use the following command with appropiate parameters: + +``` +app PROJECT= TARGET=sim(default),pynq-z2 LINKER=on_chip(default),flash_load,flash_exec COMPILER=gcc(default),clang COMPILER_PREFIX=riscv32-unknown-(default) ARCH=rv32imc(default), + +Params: +- PROJECT (ex: , hello_world(default)) +- TARGET (ex: sim(default),pynq-z2) +- LINKER (ex: on_chip(default),flash_load,flash_exec) +- COMPILER (ex: gcc(default),clang) +- COMPILER_PREFIX (ex: riscv32-unknown-(default)) +- ARCH (ex: rv32imc(default),) +``` + +For instance, to run 'hello world' app for the pynq-z2 FPGA targets, just run: + +``` +make app TARGET=pynq-z2 +``` + +Or, if you use the OpenHW Group [GCC](https://www.embecosm.com/resources/tool-chain-downloads/#corev) compiler with CORE_PULP extensions, make sure to point the `RISCV` env variable to the OpenHW Group compiler, then just run: + + +``` +make app COMPILER_PREFIX=riscv32-corev- ARCH=rv32imc_zicsr_zifencei_xcvhwlp_xcvmem_xcvmac_xcvbi_xcvalu_xcvsimd_xcvbitmanip +``` + +This will create the executable file to be loaded in your target system (ASIC, FPGA, Simulation). +Remember that, `X-HEEP` is using CMake to compile and link. Thus, the generated files after having +compiled and linked are under `sw\build` + +## FreeROTS based applications + +'X-HEEP' supports 'FreeRTOS' based applications. Please see `sw\applications\blinky_freertos`. + +After that, you can run the command to compile and link the FreeRTOS based application. Please also set 'LINKER' and 'TARGET' parameters if needed. + +``` +make app PROJECT=blinky_freertos +``` + +The main FreeRTOS configuration is allocated under `sw\freertos`, in `FreeRTOSConfig.h`. Please, change this file based on your application requirements. +Moreover, FreeRTOS is being fetch from 'https://github.com/FreeRTOS/FreeRTOS-Kernel.git' by CMake. Specifically, 'V10.5.1' is used. Finally, the fetch repository is located under `sw\build\_deps` after building. + + +## Automatic testing + +X-HEEP includes two tools to perform automatic tests over your modifications. + +### Github CIs + +Upon push, tests are run on Github runners, these include: +* The generated `.sv` files pushed are equal to those generated in the runner (the code does not depend on the modification of generated files) +* Vendor is up to date (the code does not depend on the modification of vendorized files) +* All applications can be built successfully using both gcc and clang + +All test must be successful before PRs can be merged. + +### Simulation script + +Additionally, a `test_all.sh` script is provided. Apart from compiling all apps with both gcc and clang, it will simulate them and check the result. + +The available parameters are: +* COMPILER: `gcc` (default) or `clang` (can provide more than one) +* SIMULATOR: `verilator` (default), `questasim` or disable simulation with `nosim` (only one, the last provided is used). +* LINKER: `on_chip`(default), `flash_load` or `flash_exec` (can provide more than one) +* TIMEOUT: Integer number of seconds (default 120) + +#### Usage + +##### Comands +You can use two different commands to compile or simulate all the existing APPs: +``` +make app-compile-all +``` +``` +make app-simulate-all +``` +Note that both commands allow the previous parameters to specify compiling or simulation options. E.g.: +``` +make app-simulate-all LINKER=on_chip SIMULATOR=questasim COMPILER=clang TIMEOUT=150 +``` + +##### Manually +You can also **SOURCE** the script as +```bash +. util/test_all.sh on_chip questasim clang 150 +``` + +*Pay special attention to the first period in the command!* +You will be killing simulations that take too long, if you **EXECUTE** (`./test_all.sh`) this action kills the script. + +For both usages (commands or manual), the order of the arguments is irrelevant. + +> Note: Be sure to commit all your changes before running the script! + +* Applications that fail being built with gcc will not be simulated (skipped). +* Some applications are skipped by default for not being suitable for simulation. +* If a simulation takes too long (>timeout), it is killed. + +* Upon starting, the script will modify the `mcu_cfg.hjson` file to include all peripherals (so the largest number of apps can be run), re-generates the mcu and re-builds the simulation model for the chosen tool. +These changes can be reverted at the end of the execution (default). If changes were not commited, accepting this operation will revert them! + +The success of the script is not required for merging of a PR. \ No newline at end of file diff --git a/docs/source/How_to/Debug.md b/docs/source/How_to/Debug.md index a71034c52..9bcc76fb8 100644 --- a/docs/source/How_to/Debug.md +++ b/docs/source/How_to/Debug.md @@ -1,11 +1,11 @@ -# debug +# Debug ## Prerequisite 1. Install the required linux tools: ``` -sudo apt install pkg-config libftdi1-2 libusb-1.0-4 +sudo apt install pkg-config libftdi1-2 ``` You need at least gcc>10, so in case you do not have it: @@ -40,9 +40,9 @@ Now we are going to Simulate debugging with core-v-mini-mcu. In this setup, OpenOCD communicates with the remote bitbang server by means of DPIs. The remote bitbang server is simplemented in the folder ./hw/vendor/pulp_platform_pulpissimo/rtl/tb/remote_bitbang and it will be compiled using fusesoc. -### Verilator +### Verilator (C++ only) -To simulate your application with Questasim using the remote_bitbang server, you need to compile you system adding the `JTAG DPI` functions: +To simulate your application with Verilator using the remote_bitbang server, you need to compile you system adding the `JTAG DPI` functions: ``` make verilator-sim FUSESOC_PARAM="--JTAG_DPI=1" diff --git a/docs/source/How_to/ExecuteFromFlash.md b/docs/source/How_to/ExecuteFromFlash.md index 8413a1299..ce7b9a9c6 100644 --- a/docs/source/How_to/ExecuteFromFlash.md +++ b/docs/source/How_to/ExecuteFromFlash.md @@ -1,4 +1,4 @@ -# execute Code from FLASH +# Execute Code from FLASH ## Boot Procedure diff --git a/docs/source/How_to/ExternalDevices.md b/docs/source/How_to/ExternalDevices.md deleted file mode 100644 index bbc89bd32..000000000 --- a/docs/source/How_to/ExternalDevices.md +++ /dev/null @@ -1,151 +0,0 @@ -# interface with external devices - -The top module [`core_v_mini_mcu`]((./../../../hw/core-v-mini-mcu/corecore_v_mini_mcu.sv) exposes several external interfaces: - -- `ext_xbar_master`: N ports to connect external masters to the internal system bus. - -- Five external ports to connect internal masters (e.g., CPU instruction and data ports) to external slaves. Every internal master is exposed to the external subsystem: - 1. `ext_core_instr`: CPU instruction interface. - 2. `ext_core_data`: CPU data interface. - 3. `ext_debug_master`: debug interface. - 4. `ext_dma_read_ch0`: DMA read master, channel 0. - 5. `ext_dma_write_ch0`: DMA write master, channel 0. - 6. `ext_dma_addr_ch0`: DMA address (read) master, channel 0. - -- `ext_peripheral_slave`: 1 peripheral slave port connected to the system bus (through the peripheral interface). - -The number of external master ports is set by the [`EXT_XBAR_NMASTER`](./../../../tb/testharness_pkg.sv#L10) parameter from `testharness_pkg`. -Multiple OBI slaves can be connected to the exposed internal masters using an external bus, as demonstrated in [`testharness.sv`](./../../../tb/testharness.sv#L232). - -> NOTE: the internal bus has no master port connected to the external subsystem. Therefore, an external master cannot send a request to an external slave through one of the exposed master ports. All the address decoding must be done by the external bus: the request must be forwarded to one of the `ext_xbar_master` ports only if the target address falls into the space where internal slaves are mapped. This can be achieved using a 1-to-2 crossbar for each external master as done [here](./../../../tb/ext_bus.sv#L131). - -Finally, only one peripheral slave port is available to the external subsystem. - -## External device example - -One example using the external ports is provided where: - -- hw/ip_examples/slow_sram is a memory slave device -- hw/ip_examples/memcopy is a slave peripheral with a master port. It implements a simple memcopy feature (i.e., DMA). -- hw/ip_examples/ams is an example AMS peripheral which can interface with SPICE netlists to run mixed-signal simulations (in this repository, the example analog peripheral is a 1-bit ADC) - - For more information, see [here](AnalogMixedSignal.md) - -## Run the external device example - -To run the external device example, first compile the software example: - -```bash -make app PROJECT=example_external_dma -``` - -By default, the external device example RTL code is disabled. This example is available for the sim and sim_opt targets. - -For example, compile for Verilator with: - -``` -make verilator-sim -``` - -then, go to your target system built folder - -```bash -cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-verilator -``` - -and type to run your compiled software: - -```bash -./Vtestharness +firmware=../../../sw/build/main.hex -``` - -If you don't compile the platform with the correct fusesoc flag, the simulation will hang forever because the external peripheral is disabled and never replies. - -You can display the UART output with: - -```bash -cat uart0.log -``` - -It should print: - -``` ---- MEMCOPY EXAMPLE - external peripheral --- -Init the PLIC...success -Set MEMCOPY interrupt priority to 1...success -Enable MEMCOPY interrupt...Success -Memcopy launched...finished -Complete interrupt...success -MEMCOPY SUCCESS -``` - -## Add an external master/slave or peripheral - -1. Master(s): use the obi_pkg (import obi_pkg::\*;) to create your master_req output port (obi_req_t) and master_resp input port (obi_resp_t). Adjust the EXT_XBAR_NMASTER parameter accordingly. - -2. Slave(s): similar to adding a master but you have a slave_req input port (obi_req_t) and slave_resp output port (obi_resp_t). Remember to connect external masters with external slaves through an external bus. The same bus can be used to connect multiple external slaves to the internal `core_v_mini_mcu` masters. - -3. Peripheral slave(s): use the reg_pkg (import obi_pkg::\*;) to create your slave_periph_req input port (reg_req_t) and slave_resp output port (reg_rsp_t). If multiple peripheral slaves are used, add a decoding stage for addresses dispatching. - -To create and maintain a peripheral unit efficiently, use the `reggen` tool: - -1. Define the registers of your peripheral in a `.hjson` file (read the documentation [here](https://docs.opentitan.org/doc/rm/register_tool/)). - -2. Launch the `regtool.py` script to generate SystemVerilog RTL code and a C header file. - -For example, launching the script [`memcopy_periph_gen.sh`](./../../../hw/ip_examples/memcopy_periph/memcopy_periph_gen.sh) generates 2 SystemVerilog files and one C header file: - -1. `memcopy_periph_reg_top.sv`: the register file module. It can be directly instantiated inside your peripheral RTL code (e.g., [`memcopy_periph.sv`](./../../../hw/ip_examples/memcopy_periph/rtl/memcopy_periph.sv)) and connected to the peripheral device controller(s). -2. `memcopy_periph_reg_pkg.sv`: SystemVerilog package containing the definitions used in the SystemVerilog module above. -3. `memcopy_periph_regs.h`: C/C++ header file defining the address offset of the peripheral configuration registers. Take a look at the C code [here](./../../../sw/applications/example_external_peripheral/memcopy_periph.c) for a usage example. - -## External Interrupts - -X-HEEP includes several empty external interrupts slots that can be assigned both in HW and SW. - -Firstly, connect your external device's interrupt to one of the slots of the `external_interrupt_vector` of X-HEEP: - -```systemverilog - -logic [core_v_mini_mcu_pkg::NEXT_INT-1:0] ext_intr_vector; - -always_comb begin -for (int i = 0; i < core_v_mini_mcu_pkg::NEXT_INT; i++) begin - ext_intr_vector[i] = 1'b0; // All interrupt lines set to zero by default -end -ext_intr_vector[0] = my_device_int; // Re-assign the interrupt lines used here -end - -x_heep_system #( - . . . -) x_heep_system_i ( - .intr_vector_ext_i(ext_intr_vector), - . . . -) - -``` - -Then, when initializing the PLIC system in software, do not forget to assign the corresponding interrupt ID to your custom handler. - -```C -#define MY_DEVICE_INTR EXT_INTR_0 - -void handler_irq_my_device(uint32_t id) { - my_device_intr_flag = 1; - // Do whatever you need here -} - -void main() { - plic_Init(); // Init the PLIC, this will clear all external interrupts assigned previously. - plic_irq_set_priority(MY_DEVICE_INTR, 1); // Set the priority of the external device's interrupt. - plic_irq_set_enabled(MY_DEVICE_INTR, kPlicToggleEnabled); // Enable the external device's interrupt. - plic_assign_external_irq_handler( MY_DEVICE_INTR, (void *) &handler_irq_my_device); // Assign a handler taht will be called when this interrupt is triggered. - - // Enable global interrupt for machine-level interrupts - CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); - // Set mie.MEIE bit to one to enable machine-level external interrupts - const uint32_t mask = 1 << 11;//IRQ_EXT_ENABLE_OFFSET; - CSR_SET_BITS(CSR_REG_MIE, mask); - - . . . -} -``` \ No newline at end of file diff --git a/docs/source/How_to/GettingStarted.md b/docs/source/How_to/GettingStarted.md index e5b877dd4..4b7b66855 100644 --- a/docs/source/How_to/GettingStarted.md +++ b/docs/source/How_to/GettingStarted.md @@ -1,6 +1,34 @@ -# get started +# Get started -## 1. OS requirements +## Docker setup + +A docker image containing all the required software dependancies is available on [github-packages](https://github.com/orgs/esl-epfl/packages/container/package/x-heep-toolchain). + +It is only required to install docker and pull the image. + +```bash +docker pull ghcr.io/esl-epfl/x-heep-toolchain:latest +``` + +Assuming that X-HEEP has been cloned to `X-HEEP-DIR=\absolute\path\to\x-HEEP\folder`, it is possible to directly run the docker mounting `X-HEEP-DIR` to the path `\workspace\x-heep` in the docker. + +```bash +docker run -it -v ${X-HEEP-DIR}:/workspace/x-heep ghcr.io/esl-epfl/x-heep-toolchain +``` + +:warning: Take care to indicate the absolute path to the local clone of X-HEEP, otherwise docker will not be able to properly mount the local folder in the container. + +All the command listed in the README can be execute in the docker container, except for: + +- Simulation with Questasim and VCS, synthesis with Design Compiler (licenses are required to use these tools, so they are not installed in the container) + +- OpenRoad flow is not installed in the container, so it is not possible to run the related make commands + +- Synthesis with Vivado could be possible, but currently is untested + +## Manual setup + +### 1. OS requirements To use `X-HEEP`, first make sure you have the following apt packages, or install them as: @@ -10,16 +38,14 @@ sudo apt install lcov libelf1 libelf-dev libftdi1-2 libftdi1-dev libncurses5 lib In general, have a look at the [Install required software](https://opentitan.org/guides/getting_started/index.html) section of the OpenTitan documentation. -It has been tested only on `Ubuntu 20`, and we know it does NOT WORK on `Ubuntu 22`. - -## 2. Python +### 2. Python We rely on either (a) `miniconda`, or (b) `virtual environment` enviroment. Choose between `2.a` or `2.b` to setup your enviroment. -### 2.a Miniconda +#### 2.a Miniconda Install [Miniconda](https://docs.conda.io/en/latest/miniconda.html#linux-installers) python 3.8 version as described in the link, and create the Conda enviroment: @@ -35,7 +61,7 @@ conda activate core-v-mini-mcu ``` -### 2.b Virtual Environment +#### 2.b Virtual Environment Install the python virtual environment just as: @@ -49,7 +75,7 @@ You need to do it only the first time, then just activate the environment everyt source .venv/bin/activate ``` -## 3. Install the RISC-V Compiler: +### 3. Install the RISC-V Compiler: ``` git clone --branch 2022.01.17 --recursive https://github.com/riscv/riscv-gnu-toolchain @@ -75,7 +101,7 @@ cmake -G "Unix Makefiles" -DLLVM_ENABLE_PROJECTS=clang -DCMAKE_BUILD_TYPE=Releas cmake --build . --target install ``` -## 4. Install Verilator: +### 4. Install Verilator: ``` export VERILATOR_VERSION=4.210 @@ -104,18 +130,18 @@ sudo apt install libcanberra-gtk-module libcanberra-gtk3-module sudo apt-get install -y gtkwave ``` -## Files are formatted with Verible +### Files are formatted with Verible We use version v0.0-1824-ga3b5bedf -See: [Install Verible](https://opentitan.org/guides/getting_started/index.html#step-6a-install-verible-optional) +See: [Install Verible](https://opentitan.org/guides/getting_started/index.html#step-7a-install-verible-optional) To format your RTL code type: ``` make verible ``` -## Compilation Flow and Package Manager +### Compilation Flow and Package Manager We use [FuseSoC](https://github.com/olofk/fusesoc) for all the tools we use. diff --git a/IDEs.md b/docs/source/How_to/IDEs.md similarity index 92% rename from IDEs.md rename to docs/source/How_to/IDEs.md index e1fce194f..ee932dc39 100644 --- a/IDEs.md +++ b/docs/source/How_to/IDEs.md @@ -1,14 +1,14 @@ +# Set up an IDE For FW development, `X-HEEP` can be used together with different Integrated Development Environments (IDEs) flavours. Up to now, full support is just provided by [Segger Embedded Studio (SES)](https://www.segger.com/products/development-tools/embedded-studio/editions/risc-v/). This readme guides you through all the needed steps to get SES working and debugging when prototyping `X-HEEP` into the pynq-z2 board. -# Prerequisite -## 1. SES installation. +## SES installation. The platform was only tested under Linux and version 7.32 of the Embedded Studio for RISC-V. Please, go to the Segger [download center](https://www.segger.com/downloads/embedded-studio/) to get that version. It is assumed that you have already installed the RISC-V compiler and openOCD. If the latter is not true, check the main [Readme](https://github.com/esl-epfl/x-heep) please. -# Configuration +## Configuration -After installing SES, you need to indicate to Segger your Toolchain directory (RISC-V Compiler) as well as your openOCD installation folder. Those need to be specified into `xheep.emProject` file. +After installing SES, you need to indicate to Segger your Toolchain directory (RISC-V Compiler) as well as your openOCD installation folder. Those need to be specified into `xheep.emProject` file. For the RISC-V Compiler path, **line 71**: ``` @@ -22,23 +22,23 @@ gdb_server_command_line="/home/< user >/tools/openocd/bin/openocd -f "$(Pro ``` Please, substitute that path to your current path where openOCD was installed. Do not forget to target the `openocd` file inside the `bin` installation folder of openocd. -# Building +## Building Once the paths are set properly, you can open `xheep.emProject` with SES. That will launch SES with one solution already configured, `xheep_ses`, and one project into that solution `helloworld`. Note that this project has already everything configured to run the `helloworld` application of the main repo, i.e. all the source files are linked to the project as well as the `c user include directories` already set up. Moreover, this project is configured to be running (compile, linking, and debug) by using the on-chip linker `sw/linker/link.ld`. If you want to change any of these options, you will need to change the options of the project or the options of the solution. Note that the project is currently set-up to be working on the `Debug_External` configuration. Please, do not move to other configuration when building and/or debugging. Finally, to build the whole project just press `F7` or `Build > Build helloworld`. The output should be like this: -

+

Note that on the right part, you have the memory usage based on the linker we have configured. If you do not see this, you can activate that view in `View > Memory Usage`. -# Debugging +## Debugging -Finally, after building (compile and linking), you can directly start debugging by pressing `F5` or also `Target > Connect GDB Server` and `Debug > Go`. You also have the possibility to activate the terminal to see directly into the SES window the printing characters. +Finally, after building (compile and linking), you can directly start debugging by pressing `F5` or also `Target > Connect GDB Server` and `Debug > Go`. You also have the possibility to activate the terminal to see directly into the SES window the printing characters. The output should be something like this: -

+

Note that when debugging and setting breakpoints, please, go one-by-one (one breakpoint at a time). Several breakpoints support will be supported in the following releases. diff --git a/docs/source/How_to/ImplementASIC.md b/docs/source/How_to/ImplementASIC.md index bacb70509..2ddf20c85 100644 --- a/docs/source/How_to/ImplementASIC.md +++ b/docs/source/How_to/ImplementASIC.md @@ -1,4 +1,4 @@ -# implement on ASIC +# Implement on ASIC This project can be implemented using standard cells based ASIC flow. diff --git a/docs/source/How_to/IntegratePeripheral.md b/docs/source/How_to/IntegratePeripheral.md index ae8b3b3f6..f39bdbd19 100644 --- a/docs/source/How_to/IntegratePeripheral.md +++ b/docs/source/How_to/IntegratePeripheral.md @@ -206,7 +206,7 @@ module #( b. The corresponding package must be imported: -```systemverilog +``` import _reg_pkg::*; ``` diff --git a/docs/source/How_to/ProgramFlash.md b/docs/source/How_to/ProgramFlash.md index 7ed89d246..efbe78a99 100644 --- a/docs/source/How_to/ProgramFlash.md +++ b/docs/source/How_to/ProgramFlash.md @@ -1,4 +1,4 @@ -# program the FLASH on the EPFL Programmer +# Program the FLASH on the EPFL Programmer Install the required linux tools: diff --git a/docs/source/How_to/RunOnFPGA.md b/docs/source/How_to/RunOnFPGA.md new file mode 100644 index 000000000..beb604b73 --- /dev/null +++ b/docs/source/How_to/RunOnFPGA.md @@ -0,0 +1,91 @@ +## Run on FPGA + +Follow the [Debug](./Debug.md) guide to debug core-v-mini-mcu. + +## Execute From Flash + +Follow the [ExecuteFromFlash](./ExecuteFromFlash.md) guide to execute code directly from the FLASH with modelsim, FPGA, or ASIC. + +## Emulation on Xilinx FPGAs + +This project offers two different X-HEEP implementetions on Xilinx FPGAs, called Standalone and FEMU. + +### Standalone + +#### Set-up +In this version, the X-HEEP architecture is implemented on the programmable logic (PL) side of the FPGA, and its input/output are connected to the available headers on the FPGA board. + +Two FPGA boards are supported: the Xilinx Pynq-z2 and Nexys-A7-100t. + +1. Make sure you have the FPGA board files installed in your Vivado. +> For example, for the Pynq-Z2 board, use the documentation provided at the following [link](https://pynq.readthedocs.io/en/v2.5/overlay_design_methodology/board_settings.html) to download and install them. + +2. Make sure you set up the Vivado environments by running + ``` + source /settings64.sh + ``` + > We recommend adding this command to your `.bashrc` + +3. Install the Xilinx cable drivers. +* Follow the [instructions for Linux](https://docs.amd.com/api/khub/documents/6EIhov6ruoilhq8zq7bXBA/content?Ft-Calling-App=ft%2Fturnkey-portal&Ft-Calling-App-Version=4.3.26#G4.262534) +* Restart your PC + +#### Running + +To build and program the bitstream for your FPGA with vivado, type: + +``` +make vivado-fpga FPGA_BOARD=pynq-z2 +``` + +or + +``` +make vivado-fpga FPGA_BOARD=nexys-a7-100t +``` + +or add the flag `use_bscane_xilinx` to use the native Xilinx scanchain: + +``` +make vivado-fpga FPGA_BOARD=pynq-z2 FUSESOC_FLAGS=--flag=use_bscane_xilinx +``` + +To program the bitstream, open Vivado, + +``` +open --> Hardware Manager --> Open Target --> Autoconnect --> Program Device +``` + +and choose the file `openhwgroup.org_systems_core-v-mini-mcu_0.bit`. + +Or simply type: + +``` +bash vivado-fpga-pgm FPGA_BOARD=pynq-z2 +``` + +or + +``` +make vivado-fpga-pgm FPGA_BOARD=nexys-a7-100t +``` + +To run SW, follow the [Debug](./Debug.md) guide +to load the binaries with the HS2 cable over JTAG, +or follow the [ExecuteFromFlash](./ExecuteFromFlash.md) +guide if you have a FLASH attached to the FPGA. + +Do not forget that the `pynq-z2` board requires you to have the ethernet cable attached to the board while running. + +For example, if you want to run your application using flash_exec, do as follow: +compile your application, e.g. `make app PROJECT=example_matfadd TARGET=pynq-z2 ARCH=rv32imfc LINKER=flash_exec` +and then follow the [ExecuteFromFlash](./ExecuteFromFlash.md) to program the flash and set the boot buttons on the FPGA correctly. +To look at the output of your printf, run in another terminal: +`picocom -b 9600 -r -l --imap lfcrlf /dev/ttyUSB2` +Please be sure to use the right `ttyUSB` number (you can discover it with `dmesg --time-format iso | grep FTDI` for example). + +### FPGA EMUlation Platform (FEMU) + +In this version, the X-HEEP architecture is implemented on the programmable logic (PL) side of the Xilinx Zynq-7020 chip on the Pynq-Z2 board and Linux is run on the ARM-based processing system (PS) side of the same chip. + +NOTE: This platform is not part of this repository, but you can access it with the following link: [FEMU](https://github.com/esl-epfl/x-heep-femu-sdk). diff --git a/docs/source/How_to/Simulate.md b/docs/source/How_to/Simulate.md new file mode 100644 index 000000000..6147648ce --- /dev/null +++ b/docs/source/How_to/Simulate.md @@ -0,0 +1,160 @@ +# Simulate + +This project supports simulation with Verilator, Synopsys VCS, Siemens Questasim and Cadence Xcelium. +It relies on `fusesoc` to handle multiple EDA tools and parameters. +For example, if you want to set the `FPU` and `COREV_PULP` parameters of the `cv32e40p` CPU, +you need to add next to your compilation command `FUSESOC_PARAM="--COREV_PULP=1 --FPU=1"` +Below the different EDA examples commands. + +## Compiling for Verilator + +To simulate your application with Verilator, first compile the HDL: + +``` +make verilator-sim +``` + +then, go to your target system built folder + +``` +cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-verilator +``` + +and type to run your compiled software: + +``` +./Vtestharness +firmware=../../../sw/build/main.hex +``` + +or to execute all these three steps type: + +``` +make run-helloworld +``` + +## Compiling for VCS + +To simulate your application with VCS, first compile the HDL: + +``` +make vcs-sim +``` + +then, go to your target system built folder + +``` +cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-vcs +``` + +and type to run your compiled software: + +``` +./openhwgroup.org_systems_core-v-mini-mcu_0 +firmware=../../../sw/build/main.hex +``` + +Waveforms can be viewed with Verdi. Make sure you have the env variable `VERDI_HOME` set to your Verdi install folder, then run your compiled software as above, but with the `-gui` flag: + +``` +./openhwgroup.org_systems_core-v-mini-mcu_0 +firmware=../../../sw/build/main.hex -gui +``` + +An Analog / Mixed-Signal simulation of X-HEEP, combining both the RTL system verilog files for the digital part and a SPICE file connected through a `control.init` file for the analog / mixed-signal part, can be ran by typing + +``` +make vcs-ams-sim +``` + +then going to the target system built folder + +``` +cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-vcs +``` + +and running the same executable as for the digital simulation. Note that with Verdi you can view both the digital and the analog waveforms. + +Additional instructions on how to run an analog / mixed-signal simulation of X-HEEP can be found [here](./AnalogMixedSignal.md). To try out the simulation, we provide an example SPICE netlist of an simple 1-bit ADC created by us and exported from [xschem](https://xschem.sourceforge.io/stefan/index.html) and which uses the PTM 65nm bulk CMOS model from [https://ptm.asu.edu](https://ptm.asu.edu/). + +## Compiling for Questasim + +To simulate your application with Questasim, first set the env variable `MODEL_TECH` to your Questasim bin folder, then compile the HDL: + +``` +make questasim-sim +``` + +then, go to your target system built folder + +``` +cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-modelsim/ +``` + +and type to run your compiled software: + +``` +make run PLUSARGS="c firmware=../../../sw/build/main.hex" +``` + +You can also use vopt for HDL optimized compilation: + +``` +make questasim-sim-opt +``` + +then go to + +``` +cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim_opt-modelsim/ +``` +and + +``` +make run RUN_OPT=1 PLUSARGS="c firmware=../../../sw/build/main.hex" +``` + +You can also compile with the UPF power domain description as: + +``` +make questasim-sim-opt-upf FUSESOC_PARAM="--USE_UPF" +``` + +and then execute software as: + +``` +make run RUN_OPT=1 RUN_UPF=1 PLUSARGS="c firmware=../../../sw/build/main.hex" +``` + +Questasim version must be >= Questasim 2020.4 + +## Compiling for Xcelium + +To simulate your application with Xcelium, first compile the HDL: + +``` +make xcelium-sim +``` + +then, go to your target system built folder + +``` +cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-xcelium/ +``` + +and type to run your compiled software: + +``` +make run PLUSARGS="c firmware=../../../sw/build/main.hex" +``` + +## UART DPI + +To simulate the UART, we use the LowRISC OpenTitan [UART DPI](https://github.com/lowRISC/opentitan/tree/master/hw/dv/dpi/uartdpi). +Read how to interact with it in the Section "Running Software on a Verilator Simulation with Bazel" [here](https://opentitan.org/guides/getting_started/setup_verilator.html#running-software-on-a-verilator-simulation-with-bazel). +The output of the UART DPI module is printed in the `uart0.log` file in the simulation folder. + +For example, to see the "hello world!" output of the Verilator simulation: + +``` +cd ./build/openhwgroup.org_systems_core-v-mini-mcu_0/sim-verilator +./Vtestharness +firmware=../../../sw/build/main.hex +cat uart0.log +``` \ No newline at end of file diff --git a/docs/source/How_to/SystemC.md b/docs/source/How_to/SystemC.md new file mode 100644 index 000000000..1a1aad571 --- /dev/null +++ b/docs/source/How_to/SystemC.md @@ -0,0 +1,16 @@ +# SystemC model + +Supporting SystemC model in `X-HEEP` is still a work-in-progress. +However, a simple example is provided in the SystemC testbench available in `tb/tb_sc_top.cpp`. + +When compiling the `X-HEEP` with Verilator using SystemC, the above testbench is used for simulation. +The testbench gets an `X-HEEP` external-memory `obi` master port to communicate with a SystemC memory model. + +Such model is very simple as meant to be an example and is provided in `tb/systemc_tb`. +For those who want to extend the functionality of `X-HEEP` with SystemC, such examples can be used as starting point. + +The SystemC modules leverages `TLM-2.0` as well as baseline SystemC functionalities. + +The `X-HEEP` `obi` port is connected to a `C++` direct-mapped cache who handles `hit` and `miss` with pre-defined latencies. +It uses `TLM-2.0` to communicate with the external SystemC memory on `miss` cache-transactions. +A module in SystemC then communicates with the RTL SystemC model compiled by Verilator to provides read/write data. \ No newline at end of file diff --git a/docs/source/How_to/UpdateDocumentation.md b/docs/source/How_to/UpdateDocumentation.md new file mode 100644 index 000000000..c27a87760 --- /dev/null +++ b/docs/source/How_to/UpdateDocumentation.md @@ -0,0 +1,27 @@ +# Update the documentation + +All documentation is found in the `/docs` directory. + +1. If you need to create a new entry, add the new document in markdown (`.md` extension) to the corresponding folder. Otherwise, just edit the corresponding file. + +> Make sure the document has one single `# header`, otherwise they will be considered different documents. + +2. If a new folder is added, add it to the `toctree` inside `docs/source/index.rst` (as the `peripherals` folder is) +3. Commit and push + +## Refresh the web documentation + +After each change to the documentation, once the branch is merged into the main branch of X-HEEP, anyone must do the following: + +1. Open a terminal in the `docs` folder and make sure the conda environment is activated. +2. If it is your first time updating the web docs, run: +```bash +pip install -r requirements.txt +``` +3. Run +```bash +make clean html +``` +4. Wait a few minutes and enjoy your brand new documentation in [Read the Docs](https://x-heep.readthedocs.io/en/latest/). + +Thank you for helping keep X-HEEP accurately and extensively documented! diff --git a/docs/source/How_to/eXtendingHEEP.md b/docs/source/How_to/eXtendingHEEP.md index ad8805b16..0f875a5f1 100644 --- a/docs/source/How_to/eXtendingHEEP.md +++ b/docs/source/How_to/eXtendingHEEP.md @@ -10,6 +10,7 @@ Here you can find a list of `X-HEEP` based open-source examples. If you want to * [CGRA-X-HEEP](https://github.com/esl-epfl/cgra_x_heep): A CGRA loosely coupled with X-HEEP. * [F-HEEP](https://github.com/davidmallasen/F-HEEP): System integrating [fpu_ss](https://github.com/pulp-platform/fpu_ss) into X-HEEP via the eXtension interface and cv32e40x. +* [KALIPSO](https://github.com/vlsi-lab/ntt_intt_kyber) and [KRONOS](https://github.com/vlsi-lab/keccak_integration/tree/keccak_xheep): Loosely-coupled, post-quantum cryptography accelerators for NTT/INTT and Keccak hash function integrated into X-HEEP. In addition, the `X-HEEP` testbench has been extended with a `DMA`, dummy `PERIPHERALs` (including the `FLASH`), and a CORE-V-XIF compatible co-processor @@ -304,7 +305,7 @@ To do so, it MUST include the `external.mk` AFTER all your custom rules.
Example of BASE/Makefile -``` +```Makefile MAKE = make .PHONY: test test: @@ -326,12 +327,69 @@ include $(XHEEP_MAKE) * The `app` rule will perform actions before calling `X-HEEP` Makefile's `app` rule. In this case, the project and where the source files are to be extracted from is being specified. The `SOURCE=.` argument will set `X-HEEP`'s own `sw/` folder as the directory from which to fetch source files. This is an example of building inner sources from an external directory. * The `verilator-sim` rule will override the `X-HEEP` Makefile's one. * Any other target will be passed straight to `X-HEEP`'s Makefile. For example -``` +```sh make mcu-gen CPU=cv32e40x ```
+### Excluding files from compilation +If you have files that need to be excluded from the gcc compilation flow, you can add them to a directory containing the keyword `exclude`, and/or rename the file to include the keyword `exclude`. +In the following example, the files marked with ✅ will be compiled, and the ones marked with ❌ will not. + + BASE + ├── sw + │ ├── applications + │ │ └── your_app + │ │ ├── ✅ main.c + │ │ ├── ✅ your_app.c + │ │ ├── your_app.h + │ │ ├── ❌ my_kernel_exclude.c + │ │ ├── my_kernel.h + │ │ └── exclude_files + │ │ └── ❌ kernel_asm.S + + + +### Makefile help +If you want that the commands `make` or `make help` show the help for your external Makefile, add the following lines before the first `include` directive or target. + +
+ Addition to print the target's help + +```Makefile +# HEEP_DIR might already be defined, you may want to move it to the top +export HEEP_DIR = hw/vendor/esl_epfl_x_heep/ + +# Get the path of this Makefile to pass to the Makefile help generator +MKFILE_PATH = $(shell dirname "$(realpath $(firstword $(MAKEFILE_LIST)))") +export FILE_FOR_HELP = $(MKFILE_PATH)/Makefile + + +## Call the help generator. Calling simply +## $ make +## or +## $ make help +## Will print the help of this project. +## With the parameter WHICH you can select to print +## either the help of X-HEEP (WHICH=xheep) +## or both this project's and X-HEEP's (WHICH=all) +help: +ifndef WHICH + ${HEEP_DIR}/util/MakefileHelp +else ifeq ($(filter $(WHICH),xheep x-heep),) + ${HEEP_DIR}/util/MakefileHelp + $(MAKE) -C $(HEEP_DIR) help +else + $(MAKE) -C $(HEEP_DIR) help +endif +``` + +
+ +> Remeber to add double hashes `##` on any comment you want printed on the help. +> Use `## @section SectionName` to divide the documentation in sections + ### Different use cases If you plan to vendorize `X-HEEP` in a different directory than the one proposed, just update in your `BASE/Makefile`: ``` diff --git a/docs/source/Peripherals/DMA.md b/docs/source/Peripherals/DMA.md index e955df917..2838a83bf 100644 --- a/docs/source/Peripherals/DMA.md +++ b/docs/source/Peripherals/DMA.md @@ -22,6 +22,10 @@ Sources and destinations are the two pointers that will exchange data. Bytes wil ### Data type The DMA allows transactions in chunks of 1, 2 or 4 Bytes (`Byte`, `Half-Word` and `Word` respectively). The size in bytes of the chosen data type is called _data unit_ (usually abbreviated as `du`). For example, 16 bytes can be 16 data units if the data type is `Byte`, but 8 data units if the data type is `Half Word`. +Source and destination can have different data types, if the destination type is wider than the source type, data can be sign extended. + +### Sign extension +If specified (setting the bit in the corresponding register) and if the destination data type is wider than the source type, sign of the source data is extended to fill the size of the destination data type. ### Increment In the case that source and/or destination data are not to be consecutively read/written, a certain increment can be defined. @@ -156,6 +160,7 @@ static dma_target_t tgt_src = { .ptr = copy_buffer, .inc_du = 1, .size_du = sizeof(copy_buffer), + .type = DMA_DATA_TYPE_WORD }; ``` @@ -166,15 +171,17 @@ This configuration is implicitly initializing the rest of the target configurati * Data type is set to _word_ (32-bits). * The trigger is set to _memory_ (vs. a peripheral). -The destination target can also dispense of a size, as the source size will be used. - ```C static dma_target_t tgt_dst = { .ptr = copy_buffer, .inc_du = 1, + .size_du = sizeof(copy_buffer), + .type = DMA_DATA_TYPE_WORD }; ``` +Both destination and source targets has to contain a data type (they can be different) and size in data units (they should be the same). + Finally, a transaction is created to relate both targets: ```C @@ -488,3 +495,57 @@ There are 6 applications using the DMA: * `spi_host_dma_exampe`: Test the transfer of data through the SPI host. Not available on Verilator. * `spi_host_dma_power_gate_example`: Test the transfer of data through the SPI host. Not available on Verilator. + +## 😎 X-pert Zone: + +If you know what you are doing and want to minimize the overhead of using the DMA, you can try by-passing the HAL and writing directly on the configuration registers. + +```c + + /* We will copy a set of 25 half-words of 16 bits into a buffer of 32-bit words. + Each word in the destination buffer will have its 16 MSB set to 0, and the 16 LSB with the corresponding value from the source.*/ + #define HALF_WORDS_TO_COPY 25 + static uint16_t src_buffer[HALF_WORDS_TO_COPY]; // The source buffer + static uint32_t dst_buffer[HALF_WORDS_TO_COPY]; // The destination buffer + + /* Set the DMA's control block's peripheral structure to point to the address defined in core_v_mini_mcu.h */ + dma_cb.peri = dma_peri; + /* Activate interrupts*/ + dma_cb.peri->INTERRUPT_EN |= INTR_EN_TRANS_DONE; + /* Set the source and destination pointers*/ + dma_cb.peri->SRC_PTR = (uint16_t*) source_buffer; + dma_cb.peri->DST_PTR = (uint32_t*) dst_buffer; + + /* Set the source increment as 2 bytes (because the source buffer is uint16_t). + Set the destination increment as 4 bytes (because the destination buffer is uint32_t). + We write 1026 = 0000 0100 0000 0010, + as the first 8 LSB refer to the source, and the next 8 bits for the destination. */ + dma_cb.peri->PTR_INC = (uint32_t) 1026; + + /* Make sure that the DMA will point to memory.*/ + dma_cb.peri->SLOT = DMA_TRIG_MEMORY; + + /* Set the data transfer type as half-words.*/ + dma_cb.peri->TYPE = DMA_DATA_TYPE_HALF_WORD; + + /* Set the transaction size, this will launch the transaction. + If you want to restart the same transaction again, just run from here.*/ + dma_cb.peri->SIZE = HALF_WORDS_TO_COPY; + + /* Go to sleep until the DMA finishes.*/ + while( dma_cb.peri->STATUS == 0 ) { + /* Disable the interrupts MSTATUS to avoid going to sleep AFTER the interrupt + was triggered.*/ + CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); + /* If the transaction has not yet finished, go to sleep*/ + if (dma_cb.peri->STATUS == 0) { + /* If a interrupt happened before, the core would still wake-up, + but will not jump to the interrupt handler MSTATUS is not re-set. */ + { asm volatile("wfi"); } + } + /* Restore the interrupts. */ + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); + } + + +``` \ No newline at end of file diff --git a/ides/img/build_screenshot.png b/docs/source/_static/ides/build_screenshot.png similarity index 100% rename from ides/img/build_screenshot.png rename to docs/source/_static/ides/build_screenshot.png diff --git a/ides/img/debug_screenshot.png b/docs/source/_static/ides/debug_screenshot.png similarity index 100% rename from ides/img/debug_screenshot.png rename to docs/source/_static/ides/debug_screenshot.png diff --git a/docs/source/conf.py b/docs/source/conf.py index c9f6eaabd..44ce6b68c 100644 --- a/docs/source/conf.py +++ b/docs/source/conf.py @@ -4,6 +4,10 @@ # # Author: Embedded Systems Laboratory (EPFL) +import os +import sys +sys.path.insert(0, os.path.abspath("../../util")) + project = 'X-HEEP' copyright = '2023, EPFL' author = 'ESL' @@ -17,9 +21,12 @@ 'sphinx.ext.autodoc', 'sphinx.ext.autosummary', 'sphinx.ext.intersphinx', + 'sphinxcontrib.apidoc', 'myst_parser', ] +html_static_path = ['_static'] + source_suffix = ['.rst', '.md'] intersphinx_mapping = { @@ -33,3 +40,7 @@ html_theme = 'sphinx_rtd_theme' epub_show_urls = 'footnote' + +apidoc_module_dir = '../../util/x_heep_gen' +apidoc_output_dir = 'Configuration/generated' +apidoc_separate_modules = True \ No newline at end of file diff --git a/docs/source/images/x-heep-outline.png b/docs/source/images/x-heep-outline.png new file mode 100644 index 000000000..b2aa5446f Binary files /dev/null and b/docs/source/images/x-heep-outline.png differ diff --git a/docs/source/index.rst b/docs/source/index.rst index e3968a3f4..a0d4c7463 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,6 +1,11 @@ X-HEEP ====== +.. image:: images/x-heep-outline.png + :width: 600 + + + ``X-HEEP`` (eXtendable Heterogeneous Energy-Efficient Platform) is a RISC-V microcontroller described in SystemVerilog that can be configured to target small and tiny platforms as well as extended to support accelerators. The cool thing about X-HEEP is that we provide a simple customizable MCU, so CPUs, common peripherals, memories, etc. so that you can extend it with your own accelerator without modifying the MCU, but just instantiating it in your design. By doing so, you inherit an IP capable of booting RTOS (such as FreeRTOS) with the whole FW stack, including HAL drivers and SDK, and you can focus on building your special HW or APP supported by the microcontroller. @@ -23,7 +28,7 @@ The CPU subsystem is based on the `RISC-V lowRISC Ibex 1 ? $clog2(SYSTEM_XBAR_NMASTER) : 32'd1; localparam int unsigned LOG_SYSTEM_XBAR_NSLAVE = SYSTEM_XBAR_NSLAVE > 1 ? $clog2(SYSTEM_XBAR_NSLAVE) : 32'd1; - localparam int unsigned NUM_BANKS = ${ram_numbanks}; - localparam int unsigned NUM_BANKS_IL = ${ram_numbanks_il}; + localparam int unsigned NUM_BANKS = ${xheep.ram_numbanks()}; + localparam int unsigned NUM_BANKS_IL = ${xheep.ram_numbanks_il()}; localparam int unsigned EXTERNAL_DOMAINS = ${external_domains}; localparam logic[31:0] ERROR_START_ADDRESS = 32'hBADACCE5; @@ -62,49 +62,44 @@ package core_v_mini_mcu_pkg; localparam logic[31:0] ERROR_END_ADDRESS = ERROR_START_ADDRESS + ERROR_SIZE; localparam logic[31:0] ERROR_IDX = 32'd0; -% for bank in range(ram_numbanks_cont): - localparam logic [31:0] RAM${bank}_START_ADDRESS = 32'h${'{:08X}'.format(int(ram_start_address) + bank*32*1024)}; - localparam logic [31:0] RAM${bank}_SIZE = 32'h${hex(32*1024)[2:]}; - localparam logic [31:0] RAM${bank}_END_ADDRESS = RAM${bank}_START_ADDRESS + RAM${bank}_SIZE; - localparam logic [31:0] RAM${bank}_IDX = 32'd${bank + 1}; +% for bank in xheep.iter_ram_banks(): + localparam logic [31:0] RAM${bank.name()}_IDX = 32'd${bank.map_idx()}; + localparam logic [31:0] RAM${bank.name()}_SIZE = 32'h${f'{bank.size():08X}'}; + localparam logic [31:0] RAM${bank.name()}_START_ADDRESS = 32'h${f'{bank.start_address():08X}'}; + localparam logic [31:0] RAM${bank.name()}_END_ADDRESS = 32'h${f'{bank.end_address():08X}'}; % endfor -% if ram_numbanks_il != 0: - localparam logic [31:0] RAM${ram_numbanks_cont}_START_ADDRESS = 32'h${'{:08X}'.format(int(ram_start_address) + int(ram_numbanks_cont)*32*1024)}; - localparam logic [31:0] RAM${ram_numbanks_cont}_SIZE = 32'h${hex(int(ram_numbanks_il)*32*1024)[2:]}; - localparam logic [31:0] RAM${ram_numbanks_cont}_END_ADDRESS = RAM${ram_numbanks_cont}_START_ADDRESS + RAM${ram_numbanks_cont}_SIZE; - localparam logic [31:0] RAM${ram_numbanks_cont}_IDX = 32'd${ram_numbanks_cont + 1}; -% for bank in range(ram_numbanks_il - 1): - localparam logic [31:0] RAM${int(ram_numbanks_cont) + bank + 1}_IDX = 32'd${int(ram_numbanks_cont) + bank + 2}; + +% for i, group in enumerate(xheep.iter_il_groups()): + localparam logic [31:0] RAM_IL${i}_START_ADDRESS = 32'h${f'{group.start:08X}'}; + localparam logic [31:0] RAM_IL${i}_SIZE = 32'h${f'{group.size:08X}'}; + localparam logic [31:0] RAM_IL${i}_END_ADDRESS = RAM_IL${i}_START_ADDRESS + RAM_IL${i}_SIZE; + localparam logic [31:0] RAM_IL${i}_IDX = RAM${group.first_name}_IDX; % endfor -% endif localparam logic[31:0] DEBUG_START_ADDRESS = 32'h${debug_start_address}; localparam logic[31:0] DEBUG_SIZE = 32'h${debug_size_address}; localparam logic[31:0] DEBUG_END_ADDRESS = DEBUG_START_ADDRESS + DEBUG_SIZE; - localparam logic[31:0] DEBUG_IDX = 32'd${int(ram_numbanks) + 1}; + localparam logic[31:0] DEBUG_IDX = 32'd${xheep.ram_numbanks() + 1}; localparam logic[31:0] AO_PERIPHERAL_START_ADDRESS = 32'h${ao_peripheral_start_address}; localparam logic[31:0] AO_PERIPHERAL_SIZE = 32'h${ao_peripheral_size_address}; localparam logic[31:0] AO_PERIPHERAL_END_ADDRESS = AO_PERIPHERAL_START_ADDRESS + AO_PERIPHERAL_SIZE; - localparam logic[31:0] AO_PERIPHERAL_IDX = 32'd${int(ram_numbanks) + 2}; + localparam logic[31:0] AO_PERIPHERAL_IDX = 32'd${xheep.ram_numbanks() + 2}; localparam logic[31:0] PERIPHERAL_START_ADDRESS = 32'h${peripheral_start_address}; localparam logic[31:0] PERIPHERAL_SIZE = 32'h${peripheral_size_address}; localparam logic[31:0] PERIPHERAL_END_ADDRESS = PERIPHERAL_START_ADDRESS + PERIPHERAL_SIZE; - localparam logic[31:0] PERIPHERAL_IDX = 32'd${int(ram_numbanks) + 3}; + localparam logic[31:0] PERIPHERAL_IDX = 32'd${xheep.ram_numbanks() + 3}; localparam logic[31:0] FLASH_MEM_START_ADDRESS = 32'h${flash_mem_start_address}; localparam logic[31:0] FLASH_MEM_SIZE = 32'h${flash_mem_size_address}; localparam logic[31:0] FLASH_MEM_END_ADDRESS = FLASH_MEM_START_ADDRESS + FLASH_MEM_SIZE; - localparam logic[31:0] FLASH_MEM_IDX = 32'd${int(ram_numbanks) + 4}; + localparam logic[31:0] FLASH_MEM_IDX = 32'd${xheep.ram_numbanks() + 4}; localparam addr_map_rule_t [SYSTEM_XBAR_NSLAVE-1:0] XBAR_ADDR_RULES = '{ '{ idx: ERROR_IDX, start_addr: ERROR_START_ADDRESS, end_addr: ERROR_END_ADDRESS }, -% for bank in range(ram_numbanks_cont): - '{ idx: RAM${bank}_IDX, start_addr: RAM${bank}_START_ADDRESS, end_addr: RAM${bank}_END_ADDRESS }, -% endfor -% for bank in range(ram_numbanks_il): - '{ idx: RAM${int(ram_numbanks_cont) + bank}_IDX, start_addr: RAM${ram_numbanks_cont}_START_ADDRESS, end_addr: RAM${ram_numbanks_cont}_END_ADDRESS }, +% for bank in xheep.iter_ram_banks(): + '{ idx: RAM${bank.name()}_IDX, start_addr: RAM${bank.name()}_START_ADDRESS, end_addr: RAM${bank.name()}_END_ADDRESS }, % endfor '{ idx: DEBUG_IDX, start_addr: DEBUG_START_ADDRESS, end_addr: DEBUG_END_ADDRESS }, '{ idx: AO_PERIPHERAL_IDX, start_addr: AO_PERIPHERAL_START_ADDRESS, end_addr: AO_PERIPHERAL_END_ADDRESS }, @@ -195,5 +190,11 @@ package core_v_mini_mcu_pkg; localparam int unsigned NUM_PAD_PORT_SEL_WIDTH = NUM_PAD > 1 ? $clog2(NUM_PAD) : 32'd1; + typedef enum logic [1:0] { + TOP, + RIGHT, + BOTTOM, + LEFT + } pad_side_e; endpackage diff --git a/hw/core-v-mini-mcu/memory_subsystem.sv.tpl b/hw/core-v-mini-mcu/memory_subsystem.sv.tpl index 9e89348c1..1e9501a7a 100644 --- a/hw/core-v-mini-mcu/memory_subsystem.sv.tpl +++ b/hw/core-v-mini-mcu/memory_subsystem.sv.tpl @@ -22,30 +22,21 @@ module memory_subsystem input logic [core_v_mini_mcu_pkg::NUM_BANKS-1:0] set_retentive_ni ); - localparam int NumWords = 32 * 1024 / 4; - localparam int AddrWidth = $clog2(32 * 1024); -% if ram_numbanks_il != 0: - localparam int ilAddrWidth = $clog2(${ram_numbanks_il} * 32 * 1024); -% endif - logic [NUM_BANKS-1:0] ram_valid_q; // Clock-gating logic [NUM_BANKS-1:0] clk_cg; -% if ram_numbanks_il != 0: - logic [NUM_BANKS-1:0][AddrWidth-3:0] ram_req_addr; - for (genvar i = 0; i < NUM_BANKS; i++) begin : gen_addr_napot - if (i >= NUM_BANKS - ${ram_numbanks_il}) begin - assign ram_req_addr[i] = { - ram_req_i[i].addr[ilAddrWidth-1:AddrWidth] - - core_v_mini_mcu_pkg::RAM${ram_numbanks_cont}_START_ADDRESS[ilAddrWidth-1:AddrWidth], - ram_req_i[i].addr[AddrWidth-1:${2+log_ram_numbanks_il}] - }; - end else begin - assign ram_req_addr[i] = ram_req_i[i].addr[AddrWidth-1:2]; - end - end -% endif +% for i, bank in enumerate(xheep.iter_ram_banks()): + logic [${bank.size().bit_length()-1 -2}-1:0] ram_req_addr_${i}; +% endfor + +% for i, bank in enumerate(xheep.iter_ram_banks()): +<% + p1 = bank.size().bit_length()-1 + bank.il_level() + p2 = 2 + bank.il_level() +%> + assign ram_req_addr_${i} = ram_req_i[${i}].addr[${p1}-1:${p2}]; +% endfor for (genvar i = 0; i < NUM_BANKS; i++) begin : gen_sram @@ -66,27 +57,24 @@ module memory_subsystem assign ram_resp_o[i].gnt = ram_req_i[i].req; assign ram_resp_o[i].rvalid = ram_valid_q[i]; + end - //Fixed to 8KWords per bank (32KB) - sram_wrapper #( - .NumWords (NumWords), - .DataWidth(32'd32) - ) ram_i ( - .clk_i(clk_cg[i]), - .rst_ni(rst_ni), - .req_i(ram_req_i[i].req), - .we_i(ram_req_i[i].we), -% if ram_numbanks_il == 0: - .addr_i(ram_req_i[i].addr[AddrWidth-1:2]), -% else: - .addr_i(ram_req_addr[i]), -% endif - .wdata_i(ram_req_i[i].wdata), - .be_i(ram_req_i[i].be), - .set_retentive_ni(set_retentive_ni[i]), - .rdata_o(ram_resp_o[i].rdata) - ); +%for i, bank in enumerate(xheep.iter_ram_banks()): + sram_wrapper #( + .NumWords (${bank.size() // 4}), + .DataWidth(32'd32) + ) ram${bank.name()}_i ( + .clk_i(clk_cg[${i}]), + .rst_ni(rst_ni), + .req_i(ram_req_i[${i}].req), + .we_i(ram_req_i[${i}].we), + .addr_i(ram_req_addr_${i}), + .wdata_i(ram_req_i[${i}].wdata), + .be_i(ram_req_i[${i}].be), + .set_retentive_ni(set_retentive_ni[${i}]), + .rdata_o(ram_resp_o[${i}].rdata) + ); - end +%endfor endmodule diff --git a/hw/core-v-mini-mcu/peripheral_subsystem.sv b/hw/core-v-mini-mcu/peripheral_subsystem.sv index 5c28e195d..fd7ac4c31 100644 --- a/hw/core-v-mini-mcu/peripheral_subsystem.sv +++ b/hw/core-v-mini-mcu/peripheral_subsystem.sv @@ -50,6 +50,18 @@ module peripheral_subsystem output logic cio_sda_en_o, // SPI Host + output logic spi_sck_o, + output logic spi_sck_en_o, + output logic [spi_host_reg_pkg::NumCS-1:0] spi_csb_o, + output logic [spi_host_reg_pkg::NumCS-1:0] spi_csb_en_o, + output logic [ 3:0] spi_sd_o, + output logic [ 3:0] spi_sd_en_o, + input logic [ 3:0] spi_sd_i, + output logic spi_intr_event_o, + output logic spi_rx_valid_o, + output logic spi_tx_ready_o, + + // SPI 2 Host output logic spi2_sck_o, output logic spi2_sck_en_o, output logic [spi_host_reg_pkg::NumCS-1:0] spi2_csb_o, @@ -291,6 +303,32 @@ module peripheral_subsystem .msip_o(msip_o) ); + spi_host #( + .reg_req_t(reg_pkg::reg_req_t), + .reg_rsp_t(reg_pkg::reg_rsp_t) + ) spi_host_dma_i ( + .clk_i(clk_cg), + .rst_ni, + .reg_req_i(peripheral_slv_req[core_v_mini_mcu_pkg::SPI_HOST_IDX]), + .reg_rsp_o(peripheral_slv_rsp[core_v_mini_mcu_pkg::SPI_HOST_IDX]), + .alert_rx_i(), + .alert_tx_o(), + .passthrough_i(spi_device_pkg::PASSTHROUGH_REQ_DEFAULT), + .passthrough_o(), + .cio_sck_o(spi_sck_o), + .cio_sck_en_o(spi_sck_en_o), + .cio_csb_o(spi_csb_o), + .cio_csb_en_o(spi_csb_en_o), + .cio_sd_o(spi_sd_o), + .cio_sd_en_o(spi_sd_en_o), + .cio_sd_i(spi_sd_i), + .rx_valid_o(spi_rx_valid_o), + .tx_ready_o(spi_tx_ready_o), + .intr_error_o(), + .intr_spi_event_o(spi_intr_event_o) + ); + + gpio #( .reg_req_t(reg_pkg::reg_req_t), diff --git a/hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl b/hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl index 71215d048..12fc624fd 100644 --- a/hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl +++ b/hw/core-v-mini-mcu/peripheral_subsystem.sv.tpl @@ -50,6 +50,18 @@ module peripheral_subsystem output logic cio_sda_en_o, // SPI Host + output logic spi_sck_o, + output logic spi_sck_en_o, + output logic [spi_host_reg_pkg::NumCS-1:0] spi_csb_o, + output logic [spi_host_reg_pkg::NumCS-1:0] spi_csb_en_o, + output logic [ 3:0] spi_sd_o, + output logic [ 3:0] spi_sd_en_o, + input logic [ 3:0] spi_sd_i, + output logic spi_intr_event_o, + output logic spi_rx_valid_o, + output logic spi_tx_ready_o, + + // SPI 2 Host output logic spi2_sck_o, output logic spi2_sck_en_o, output logic [spi_host_reg_pkg::NumCS-1:0] spi2_csb_o, @@ -306,6 +318,49 @@ module peripheral_subsystem % endif % endfor +% for peripheral in peripherals.items(): +% if peripheral[0] in ("spi_host"): +% if peripheral[1]['is_included'] in ("yes"): + spi_host #( + .reg_req_t(reg_pkg::reg_req_t), + .reg_rsp_t(reg_pkg::reg_rsp_t) + ) spi_host_dma_i ( + .clk_i(clk_cg), + .rst_ni, + .reg_req_i(peripheral_slv_req[core_v_mini_mcu_pkg::SPI_HOST_IDX]), + .reg_rsp_o(peripheral_slv_rsp[core_v_mini_mcu_pkg::SPI_HOST_IDX]), + .alert_rx_i(), + .alert_tx_o(), + .passthrough_i(spi_device_pkg::PASSTHROUGH_REQ_DEFAULT), + .passthrough_o(), + .cio_sck_o(spi_sck_o), + .cio_sck_en_o(spi_sck_en_o), + .cio_csb_o(spi_csb_o), + .cio_csb_en_o(spi_csb_en_o), + .cio_sd_o(spi_sd_o), + .cio_sd_en_o(spi_sd_en_o), + .cio_sd_i(spi_sd_i), + .rx_valid_o(spi_rx_valid_o), + .tx_ready_o(spi_tx_ready_o), + .intr_error_o(), + .intr_spi_event_o(spi_intr_event_o) + ); +% else: + assign peripheral_slv_rsp[core_v_mini_mcu_pkg::SPI_HOST_IDX] = '0; + assign spi_sck_o = '0; + assign spi_sck_en_o = '0; + assign spi_csb_o = '0; + assign spi_csb_en_o = '0; + assign spi_sd_o = '0; + assign spi_sd_en_o = '0; + assign spi_intr_event_o = '0; + assign spi_rx_valid_o = '0; + assign spi_tx_ready_o = '0; +% endif +% endif +% endfor + + % for peripheral in peripherals.items(): % if peripheral[0] in ("gpio"): diff --git a/hw/core-v-mini-mcu/system_bus.sv.tpl b/hw/core-v-mini-mcu/system_bus.sv.tpl index b7dbbfdc0..19bb8622a 100644 --- a/hw/core-v-mini-mcu/system_bus.sv.tpl +++ b/hw/core-v-mini-mcu/system_bus.sv.tpl @@ -157,8 +157,8 @@ module system_bus // Internal slave requests assign error_slave_req = int_slave_req[core_v_mini_mcu_pkg::ERROR_IDX]; -% for bank in range(ram_numbanks): - assign ram_req_o[${bank}] = int_slave_req[core_v_mini_mcu_pkg::RAM${bank}_IDX]; +% for bank in xheep.iter_ram_banks(): + assign ram_req_o[${bank.name()}] = int_slave_req[core_v_mini_mcu_pkg::RAM${bank.name()}_IDX]; % endfor assign debug_slave_req_o = int_slave_req[core_v_mini_mcu_pkg::DEBUG_IDX]; assign ao_peripheral_slave_req_o = int_slave_req[core_v_mini_mcu_pkg::AO_PERIPHERAL_IDX]; @@ -175,8 +175,8 @@ module system_bus // Internal slave responses assign int_slave_resp[core_v_mini_mcu_pkg::ERROR_IDX] = error_slave_resp; -% for bank in range(ram_numbanks): - assign int_slave_resp[core_v_mini_mcu_pkg::RAM${bank}_IDX] = ram_resp_i[${bank}]; +% for bank in xheep.iter_ram_banks(): + assign int_slave_resp[core_v_mini_mcu_pkg::RAM${bank.name()}_IDX] = ram_resp_i[${bank.name()}]; % endfor assign int_slave_resp[core_v_mini_mcu_pkg::DEBUG_IDX] = debug_slave_resp_i; assign int_slave_resp[core_v_mini_mcu_pkg::AO_PERIPHERAL_IDX] = ao_peripheral_slave_resp_i; diff --git a/hw/core-v-mini-mcu/system_xbar.sv.tpl b/hw/core-v-mini-mcu/system_xbar.sv.tpl index 2731cb4e7..18cbbe4df 100644 --- a/hw/core-v-mini-mcu/system_xbar.sv.tpl +++ b/hw/core-v-mini-mcu/system_xbar.sv.tpl @@ -38,7 +38,7 @@ module system_xbar localparam int unsigned RESP_AGG_DATA_WIDTH = 32; //Address Decoder -% if ram_numbanks_il == 0: +% if not xheep.has_il_ram(): logic [XBAR_NMASTER-1:0][LOG_XBAR_NSLAVE-1:0] port_sel; % else: logic [XBAR_NMASTER-1:0][LOG_XBAR_NSLAVE-1:0] port_sel, pre_port_sel; @@ -75,7 +75,7 @@ module system_xbar ) addr_decode_i ( .addr_i(master_req_i[i].addr), .addr_map_i, -% if ram_numbanks_il == 0: +% if not xheep.has_il_ram(): .idx_o(port_sel[i]), % else: .idx_o(pre_port_sel[i]), @@ -86,21 +86,21 @@ module system_xbar .default_idx_i ); end -% if ram_numbanks_il != 0: +% if xheep.has_il_ram(): localparam ZERO = 32'h0; for (genvar j = 0; j < XBAR_NMASTER; j++) begin : gen_addr_napot always_comb begin - port_sel[j] = 1; - post_master_req_addr[j] = '0; - if (pre_port_sel[j] == NUM_BANKS[LOG_XBAR_NSLAVE-1:0] - (NUM_BANKS_IL[LOG_XBAR_NSLAVE-1:0]-1)) begin - port_sel[j] = NUM_BANKS[LOG_XBAR_NSLAVE-1:0] - (NUM_BANKS_IL[LOG_XBAR_NSLAVE-1:0]-1) + {ZERO[LOG_XBAR_NSLAVE-${1+log_ram_numbanks_il}:0],master_req_i[j].addr[${1+log_ram_numbanks_il}:2]}; - post_master_req_addr[j] = {master_req_i[j].addr[31:${2+log_ram_numbanks_il}], ${2+log_ram_numbanks_il}'h0}; - end else begin - port_sel[j] = pre_port_sel[j]; - post_master_req_addr[j] = master_req_i[j].addr; + port_sel[j] = pre_port_sel[j]; + post_master_req_addr[j] = master_req_i[j].addr; +% for i, group in enumerate(xheep.iter_il_groups()): + + if (pre_port_sel[j] == RAM_IL${i}_IDX[LOG_XBAR_NSLAVE-1:0]) begin + port_sel[j] = RAM_IL${i}_IDX[LOG_XBAR_NSLAVE-1:0] + {ZERO[LOG_XBAR_NSLAVE-${1+group.n.bit_length()}:0],master_req_i[j].addr[${group.n.bit_length()-1 +1}:2]}; + post_master_req_addr[j] = {master_req_i[j].addr[31:${2+group.n.bit_length()-1}], ${2+group.n.bit_length()-1}'h0}; end +% endfor end end % endif @@ -113,7 +113,7 @@ module system_xbar req: master_req_i[i].req, we: master_req_i[i].we, be: master_req_i[i].be, - % if ram_numbanks_il == 0: + % if not xheep.has_il_ram(): addr: master_req_i[i].addr, % else: addr: post_master_req_addr[i], diff --git a/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files.lock.hjson b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files.lock.hjson new file mode 100644 index 000000000..a05740cc3 --- /dev/null +++ b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files.lock.hjson @@ -0,0 +1,14 @@ +// Copyright lowRISC contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +// This file is generated by the util/vendor script. Please do not modify it +// manually. + +{ + upstream: + { + url: https://github.com/esl-epfl/zcu104_board_files.git + rev: 53e4affbaeec73809304940be8f5351ae147227a + } +} diff --git a/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files.vendor.hjson b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files.vendor.hjson new file mode 100644 index 000000000..690288260 --- /dev/null +++ b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files.vendor.hjson @@ -0,0 +1,16 @@ +// Copyright 2023 David Mallasén Quintana +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +{ + name: "esl_epfl_zcu104_board_files", + target_dir: "esl_epfl_zcu104_board_files", + + upstream: { + url: "https://github.com/esl-epfl/zcu104_board_files.git", + rev: "53e4affbaeec73809304940be8f5351ae147227a", + }, + + exclude_from_upstream: [ + "README.md" + ] +} diff --git a/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files/board.xml b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files/board.xml new file mode 100644 index 000000000..2206a9183 --- /dev/null +++ b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files/board.xml @@ -0,0 +1,657 @@ + + + + + + ZCU104 Board File Image + + + + + RevA + RevB + RevC + + + 1.0 + + Zynq UltraScale+ ZCU104 Evaluation Board + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + FPGA part on the board + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + DDR4 board interface, it can use DDR4 controller IP for connection. + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + 4-Position User DIP Switch + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + SI570 based User programmable differential 300 MHz Clock. Can be used for DDR4 input system clock + + + + + + + PL UART + + + + + + + + PL I2C + + + + + 2GB DDR4 SDRAM memory SODIMM + + + + + + + + CPU Reset Push Button, Active High + + + + DIP Switches 3 to 0 + + + + LEDs, 3 to 0, Active High + + + + Push Buttons, 3 to 0, Active High + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files/part0_pins.xml b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files/part0_pins.xml new file mode 100644 index 000000000..e5d6301c1 --- /dev/null +++ b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files/part0_pins.xml @@ -0,0 +1,228 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files/preset.xml b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files/preset.xml new file mode 100644 index 000000000..b8d890844 --- /dev/null +++ b/hw/fpga/board_files/vendor/esl_epfl_zcu104_board_files/preset.xml @@ -0,0 +1,446 @@ + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + + diff --git a/hw/fpga/constraints/nexys/constraints.xdc b/hw/fpga/constraints/nexys/constraints.xdc index 5a3c1c4ba..ed3a6e89a 100644 --- a/hw/fpga/constraints/nexys/constraints.xdc +++ b/hw/fpga/constraints/nexys/constraints.xdc @@ -1 +1 @@ -set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets x_heep_system_i/pad_ring_i/pad_clk_i/xilinx_iobuf_i/O] +create_clock -add -name sys_clk_pin -period 10.00 -waveform {0 5} [get_ports {clk_i}]; diff --git a/hw/fpga/constraints/nexys/pin_assign.xdc b/hw/fpga/constraints/nexys/pin_assign.xdc index 70020cf37..d3723fef9 100644 --- a/hw/fpga/constraints/nexys/pin_assign.xdc +++ b/hw/fpga/constraints/nexys/pin_assign.xdc @@ -1,85 +1,82 @@ -## This file is a general .xdc for the Nexys A7-100T -## To use it in a project: -## - uncomment the lines corresponding to used pins -## - rename the used ports (in each line, after get_ports) according to the top level signal names in the project +# Copyright 2022 EPFL +# Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 ## Clock signal -set_property -dict { PACKAGE_PIN E3 IOSTANDARD LVCMOS33 } [get_ports { clk_i }]; #IO_L12P_T1_MRCC_35 Sch=clk100mhz -create_clock -add -name sys_clk_pin -period 10.00 -waveform {0 5} [get_ports { clk_i }]; +set_property -dict {PACKAGE_PIN E3 IOSTANDARD LVCMOS33} [get_ports {clk_i}]; #IO_L12P_T1_MRCC_35 Sch=clk100mhz -set_property -dict { PACKAGE_PIN C12 IOSTANDARD LVCMOS33 } [get_ports { rst_i }]; #IO_L3P_T0_DQS_AD1P_15 Sch=cpu_resetn +set_property -dict {PACKAGE_PIN C12 IOSTANDARD LVCMOS33} [get_ports {rst_i}]; #IO_L3P_T0_DQS_AD1P_15 Sch=cpu_resetn ## LEDs -set_property -dict { PACKAGE_PIN V11 IOSTANDARD LVCMOS33 } [get_ports { rst_led }]; -set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets rst_led_OBUF] -set_property -dict { PACKAGE_PIN K15 IOSTANDARD LVCMOS33 } [get_ports { clk_out }]; -set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets clk_out_OBUF] -set_property -dict { PACKAGE_PIN J13 IOSTANDARD LVCMOS33 } [get_ports { clk_led }]; -set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets clk_led_OBUF] -set_property -dict { PACKAGE_PIN N14 IOSTANDARD LVCMOS33 } [get_ports { exit_valid_o }]; -set_property -dict { PACKAGE_PIN R18 IOSTANDARD LVCMOS33 } [get_ports { exit_value_o }]; - - +set_property -dict {PACKAGE_PIN V11 IOSTANDARD LVCMOS33} [get_ports {rst_led_o}]; +set_property -dict {PACKAGE_PIN J13 IOSTANDARD LVCMOS33} [get_ports {clk_led_o}]; +set_property -dict {PACKAGE_PIN N14 IOSTANDARD LVCMOS33} [get_ports {exit_valid_o}]; +set_property -dict {PACKAGE_PIN R18 IOSTANDARD LVCMOS33} [get_ports {exit_value_o}]; +set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets clk_led_o_OBUF] +set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets rst_led_o_OBUF] ##Switches -set_property -dict { PACKAGE_PIN L16 IOSTANDARD LVCMOS33 } [get_ports { execute_from_flash_i }]; #Sch=sw[1] -set_property -dict { PACKAGE_PIN M13 IOSTANDARD LVCMOS33 } [get_ports { boot_select_i }]; #Sch=sw[2] - +set_property -dict {PACKAGE_PIN L16 IOSTANDARD LVCMOS33} [get_ports {execute_from_flash_i}]; #Sch=sw[1] +set_property -dict {PACKAGE_PIN M13 IOSTANDARD LVCMOS33} [get_ports {boot_select_i}]; #Sch=sw[2] ##Switches -set_property -dict { PACKAGE_PIN J15 IOSTANDARD LVCMOS33 } [get_ports { jtag_trst_ni }]; #IO_L24N_T3_RS0_15 Sch=sw[0] - +set_property -dict {PACKAGE_PIN J15 IOSTANDARD LVCMOS33} [get_ports {jtag_trst_ni}]; #IO_L24N_T3_RS0_15 Sch=sw[0] ##Pmod Headers ##Pmod Header JA -set_property -dict { PACKAGE_PIN C17 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_csb_o }]; #IO_L20N_T3_A19_15 Sch=ja[1] -set_property -dict { PACKAGE_PIN D18 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_sck_o }]; #IO_L21N_T3_DQS_A18_15 Sch=ja[2] -set_property -dict { PACKAGE_PIN E18 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_sd_io[0] }]; #IO_L21P_T3_DQS_15 Sch=ja[3] -set_property -dict { PACKAGE_PIN G17 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_sd_io[1] }]; #IO_L18N_T2_A23_15 Sch=ja[4] -set_property -dict { PACKAGE_PIN D17 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_sd_io[2] }]; #IO_L16N_T2_A27_15 Sch=ja[7] -set_property -dict { PACKAGE_PIN E17 IOSTANDARD LVCMOS33 } [get_ports { spi_flash_sd_io[3] }]; #IO_L16P_T2_A28_15 Sch=ja[8] -#set_property -dict { PACKAGE_PIN F18 IOSTANDARD LVCMOS33 } [get_ports { gpio_io[6] }]; #IO_L22N_T3_A16_15 Sch=ja[9] -#set_property -dict { PACKAGE_PIN G18 IOSTANDARD LVCMOS33 } [get_ports { gpio_io[7] }]; #IO_L22P_T3_A17_15 Sch=ja[10] - +set_property -dict {PACKAGE_PIN C17 IOSTANDARD LVCMOS33} [get_ports {spi_flash_csb_o}]; #IO_L20N_T3_A19_15 Sch=ja[1] +set_property -dict {PACKAGE_PIN D18 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sck_o}]; #IO_L21N_T3_DQS_A18_15 Sch=ja[2] +set_property -dict {PACKAGE_PIN E18 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[0]}]; #IO_L21P_T3_DQS_15 Sch=ja[3] +set_property -dict {PACKAGE_PIN G17 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[1]}]; #IO_L18N_T2_A23_15 Sch=ja[4] +set_property -dict {PACKAGE_PIN D17 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[2]}]; #IO_L16N_T2_A27_15 Sch=ja[7] +set_property -dict {PACKAGE_PIN E17 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[3]}]; #IO_L16P_T2_A28_15 Sch=ja[8] +#set_property -dict {PACKAGE_PIN F18 IOSTANDARD LVCMOS33} [get_ports {gpio_io[6]}]; #IO_L22N_T3_A16_15 Sch=ja[9] +#set_property -dict {PACKAGE_PIN G18 IOSTANDARD LVCMOS33} [get_ports {gpio_io[7]}]; #IO_L22P_T3_A17_15 Sch=ja[10] ##Pmod Header JC -set_property -dict { PACKAGE_PIN K1 IOSTANDARD LVCMOS33 } [get_ports { spi_csb_o }]; #IO_L23N_T3_35 Sch=jc[1] -set_property -dict { PACKAGE_PIN F6 IOSTANDARD LVCMOS33 } [get_ports { spi_sck_o }]; #IO_L19N_T3_VREF_35 Sch=jc[2] -set_property -dict { PACKAGE_PIN J2 IOSTANDARD LVCMOS33 } [get_ports { spi_sd_io[0] }]; #IO_L22N_T3_35 Sch=jc[3] -set_property -dict { PACKAGE_PIN G6 IOSTANDARD LVCMOS33 } [get_ports { spi_sd_io[1] }]; #IO_L19P_T3_35 Sch=jc[4] -set_property -dict { PACKAGE_PIN E7 IOSTANDARD LVCMOS33 } [get_ports { spi_sd_io[2] }]; #IO_L6P_T0_35 Sch=jc[7] -set_property -dict { PACKAGE_PIN J3 IOSTANDARD LVCMOS33 } [get_ports { spi_sd_io[3] }]; #IO_L22P_T3_35 Sch=jc[8] -#set_property -dict { PACKAGE_PIN J4 IOSTANDARD LVCMOS33 } [get_ports { clk_out }]; #IO_L21P_T3_DQS_35 Sch=jc[9] -#set_property -dict { PACKAGE_PIN E6 IOSTANDARD LVCMOS33 } [get_ports { JC[10] }]; #IO_L5P_T0_AD13P_35 Sch=jc[10] - +set_property -dict {PACKAGE_PIN K1 IOSTANDARD LVCMOS33} [get_ports {spi_csb_o}]; #IO_L23N_T3_35 Sch=jc[1] +set_property -dict {PACKAGE_PIN F6 IOSTANDARD LVCMOS33} [get_ports {spi_sck_o}]; #IO_L19N_T3_VREF_35 Sch=jc[2] +set_property -dict {PACKAGE_PIN J2 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[0]}]; #IO_L22N_T3_35 Sch=jc[3] +set_property -dict {PACKAGE_PIN G6 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[1]}]; #IO_L19P_T3_35 Sch=jc[4] +set_property -dict {PACKAGE_PIN E7 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[2]}]; #IO_L6P_T0_35 Sch=jc[7] +set_property -dict {PACKAGE_PIN J3 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[3]}]; #IO_L22P_T3_35 Sch=jc[8] +#set_property -dict {PACKAGE_PIN J4 IOSTANDARD LVCMOS33} [get_ports {clk_out}]; #IO_L21P_T3_DQS_35 Sch=jc[9] +#set_property -dict {PACKAGE_PIN E6 IOSTANDARD LVCMOS33} [get_ports {JC[10]}]; #IO_L5P_T0_AD13P_35 Sch=jc[10] ##USB-RS232 Interface -set_property -dict { PACKAGE_PIN C4 IOSTANDARD LVCMOS33 } [get_ports { uart_rx_i }]; #IO_L7P_T1_AD6P_35 Sch=uart_txd_in -set_property -dict { PACKAGE_PIN D4 IOSTANDARD LVCMOS33 } [get_ports { uart_tx_o }]; #IO_L11N_T1_SRCC_35 Sch=uart_rxd_out +set_property -dict {PACKAGE_PIN C4 IOSTANDARD LVCMOS33} [get_ports {uart_rx_i}]; #IO_L7P_T1_AD6P_35 Sch=uart_txd_in +set_property -dict {PACKAGE_PIN D4 IOSTANDARD LVCMOS33} [get_ports {uart_tx_o}]; #IO_L11N_T1_SRCC_35 Sch=uart_rxd_out ##Pmod Header JB -#set_property -dict { PACKAGE_PIN D14 IOSTANDARD LVCMOS33 } [get_ports { JB[1] }]; #IO_L1P_T0_AD0P_15 Sch=jb[1] -#set_property -dict { PACKAGE_PIN F16 IOSTANDARD LVCMOS33 } [get_ports { JB[2] }]; #IO_L14N_T2_SRCC_15 Sch=jb[2] -#set_property -dict { PACKAGE_PIN G16 IOSTANDARD LVCMOS33 } [get_ports { uart_tx_o }]; #IO_L13N_T2_MRCC_15 Sch=jb[3] -#set_property -dict { PACKAGE_PIN H14 IOSTANDARD LVCMOS33 } [get_ports { uart_rx_i }]; #IO_L15P_T2_DQS_15 Sch=jb[4] -set_property -dict { PACKAGE_PIN E16 IOSTANDARD LVCMOS33 } [get_ports { jtag_tms_i }]; #IO_L11N_T1_SRCC_15 Sch=jb[7] -set_property -dict { PACKAGE_PIN F13 IOSTANDARD LVCMOS33 } [get_ports { jtag_tdi_i }]; #IO_L5P_T0_AD9P_15 Sch=jb[8] -set_property -dict { PACKAGE_PIN G13 IOSTANDARD LVCMOS33 } [get_ports { jtag_tdo_o }]; #IO_0_15 Sch=jb[9] -set_property -dict { PACKAGE_PIN H16 IOSTANDARD LVCMOS33 } [get_ports { jtag_tck_i }]; #IO_L13P_T2_MRCC_15 Sch=jb[10] +#set_property -dict {PACKAGE_PIN D14 IOSTANDARD LVCMOS33} [get_ports {JB[1]}]; #IO_L1P_T0_AD0P_15 Sch=jb[1] +#set_property -dict {PACKAGE_PIN F16 IOSTANDARD LVCMOS33} [get_ports {JB[2]}]; #IO_L14N_T2_SRCC_15 Sch=jb[2] +#set_property -dict {PACKAGE_PIN G16 IOSTANDARD LVCMOS33} [get_ports {uart_tx_o}]; #IO_L13N_T2_MRCC_15 Sch=jb[3] +#set_property -dict {PACKAGE_PIN H14 IOSTANDARD LVCMOS33} [get_ports {uart_rx_i}]; #IO_L15P_T2_DQS_15 Sch=jb[4] +set_property -dict {PACKAGE_PIN E16 IOSTANDARD LVCMOS33} [get_ports {jtag_tms_i}]; #IO_L11N_T1_SRCC_15 Sch=jb[7] +set_property -dict {PACKAGE_PIN F13 IOSTANDARD LVCMOS33} [get_ports {jtag_tdi_i}]; #IO_L5P_T0_AD9P_15 Sch=jb[8] +set_property -dict {PACKAGE_PIN G13 IOSTANDARD LVCMOS33} [get_ports {jtag_tdo_o}]; #IO_0_15 Sch=jb[9] +set_property -dict {PACKAGE_PIN H16 IOSTANDARD LVCMOS33} [get_ports {jtag_tck_i}]; #IO_L13P_T2_MRCC_15 Sch=jb[10] ## LEDs -set_property -dict { PACKAGE_PIN V17 IOSTANDARD LVCMOS33} [get_ports { spi2_sd_io[1] }]; -set_property -dict { PACKAGE_PIN U17 IOSTANDARD LVCMOS33} [get_ports { spi2_sd_io[2] }]; -set_property -dict { PACKAGE_PIN U16 IOSTANDARD LVCMOS33} [get_ports { spi2_sd_io[3] }]; -set_property -dict { PACKAGE_PIN V16 IOSTANDARD LVCMOS33} [get_ports { i2c_scl_io }]; -set_property -dict { PACKAGE_PIN T15 IOSTANDARD LVCMOS33} [get_ports { i2c_sda_io }]; -set_property -dict { PACKAGE_PIN U14 IOSTANDARD LVCMOS33} [get_ports { gpio_io[5] }]; -set_property -dict { PACKAGE_PIN T16 IOSTANDARD LVCMOS33} [get_ports { gpio_io[6] }]; -set_property -dict { PACKAGE_PIN V15 IOSTANDARD LVCMOS33} [get_ports { gpio_io[7] }]; -set_property -dict { PACKAGE_PIN V14 IOSTANDARD LVCMOS33} [get_ports { gpio_io[8] }]; -set_property -dict { PACKAGE_PIN V12 IOSTANDARD LVCMOS33} [get_ports { gpio_io[9] }]; -set_property -dict { PACKAGE_PIN H17 IOSTANDARD LVCMOS33} [get_ports { gpio_io[10] }]; +set_property -dict {PACKAGE_PIN V17 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[1]}]; +set_property -dict {PACKAGE_PIN U17 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[2]}]; +set_property -dict {PACKAGE_PIN U16 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[3]}]; +set_property -dict {PACKAGE_PIN V16 IOSTANDARD LVCMOS33} [get_ports {i2c_scl_io}]; +set_property -dict {PACKAGE_PIN T15 IOSTANDARD LVCMOS33} [get_ports {i2c_sda_io}]; +set_property -dict {PACKAGE_PIN U14 IOSTANDARD LVCMOS33} [get_ports {gpio_io[5]}]; +set_property -dict {PACKAGE_PIN T16 IOSTANDARD LVCMOS33} [get_ports {gpio_io[6]}]; +set_property -dict {PACKAGE_PIN V15 IOSTANDARD LVCMOS33} [get_ports {gpio_io[7]}]; +set_property -dict {PACKAGE_PIN V14 IOSTANDARD LVCMOS33} [get_ports {gpio_io[8]}]; +set_property -dict {PACKAGE_PIN V12 IOSTANDARD LVCMOS33} [get_ports {gpio_io[9]}]; +set_property -dict {PACKAGE_PIN H17 IOSTANDARD LVCMOS33} [get_ports {gpio_io[10]}]; + +##Buttons +set_property -dict {PACKAGE_PIN N17 IOSTANDARD LVCMOS33} [get_ports {gpio_io[0]}]; #IO_L9P_T1_DQS_14 Sch=btnc +set_property -dict {PACKAGE_PIN M18 IOSTANDARD LVCMOS33} [get_ports {gpio_io[1]}]; #IO_L4N_T0_D05_14 Sch=btnu +set_property -dict {PACKAGE_PIN P17 IOSTANDARD LVCMOS33} [get_ports {gpio_io[2]}]; #IO_L12P_T1_MRCC_14 Sch=btnl +set_property -dict {PACKAGE_PIN M17 IOSTANDARD LVCMOS33} [get_ports {gpio_io[3]}]; #IO_L10N_T1_D15_14 Sch=btnr +set_property -dict {PACKAGE_PIN P18 IOSTANDARD LVCMOS33} [get_ports {gpio_io[4]}]; #IO_L9N_T1_DQS_D13_14 Sch=btnd ##7 segment display set_property -dict { PACKAGE_PIN T10 IOSTANDARD LVCMOS33} [get_ports { gpio_io[11] }]; @@ -100,135 +97,5 @@ set_property -dict { PACKAGE_PIN T14 IOSTANDARD LVCMOS33 } [get_ports { spi2_s set_property -dict { PACKAGE_PIN K2 IOSTANDARD LVCMOS33 } [get_ports { spi2_sd_io[0] }]; #IO_L23P_T3_35 Sch=an[6] -##Buttons -set_property -dict { PACKAGE_PIN N17 IOSTANDARD LVCMOS33 } [get_ports { gpio_io[0] }]; #IO_L9P_T1_DQS_14 Sch=btnc -set_property -dict { PACKAGE_PIN M18 IOSTANDARD LVCMOS33 } [get_ports { gpio_io[1] }]; #IO_L4N_T0_D05_14 Sch=btnu -set_property -dict { PACKAGE_PIN P17 IOSTANDARD LVCMOS33 } [get_ports { gpio_io[2] }]; #IO_L12P_T1_MRCC_14 Sch=btnl -set_property -dict { PACKAGE_PIN M17 IOSTANDARD LVCMOS33 } [get_ports { gpio_io[3] }]; #IO_L10N_T1_D15_14 Sch=btnr -set_property -dict { PACKAGE_PIN P18 IOSTANDARD LVCMOS33 } [get_ports { gpio_io[4] }]; #IO_L9N_T1_DQS_D13_14 Sch=btnd - - set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets jtag_tck_i_IBUF] - -#set_property -dict { PACKAGE_PIN R15 IOSTANDARD LVCMOS33 } [get_ports { SW[3] }]; #IO_L13N_T2_MRCC_14 Sch=sw[3] -#set_property -dict { PACKAGE_PIN R17 IOSTANDARD LVCMOS33 } [get_ports { SW[4] }]; #IO_L12N_T1_MRCC_14 Sch=sw[4] -#set_property -dict { PACKAGE_PIN T18 IOSTANDARD LVCMOS33 } [get_ports { SW[5] }]; #IO_L7N_T1_D10_14 Sch=sw[5] -#set_property -dict { PACKAGE_PIN U18 IOSTANDARD LVCMOS33 } [get_ports { SW[6] }]; #IO_L17N_T2_A13_D29_14 Sch=sw[6] -#set_property -dict { PACKAGE_PIN R13 IOSTANDARD LVCMOS33 } [get_ports { SW[7] }]; #IO_L5N_T0_D07_14 Sch=sw[7] -#set_property -dict { PACKAGE_PIN T8 IOSTANDARD LVCMOS18 } [get_ports { SW[8] }]; #IO_L24N_T3_34 Sch=sw[8] -#set_property -dict { PACKAGE_PIN U8 IOSTANDARD LVCMOS18 } [get_ports { SW[9] }]; #IO_25_34 Sch=sw[9] -#set_property -dict { PACKAGE_PIN R16 IOSTANDARD LVCMOS33 } [get_ports { SW[10] }]; #IO_L15P_T2_DQS_RDWR_B_14 Sch=sw[10] -#set_property -dict { PACKAGE_PIN T13 IOSTANDARD LVCMOS33 } [get_ports { SW[11] }]; #IO_L23P_T3_A03_D19_14 Sch=sw[11] -#set_property -dict { PACKAGE_PIN H6 IOSTANDARD LVCMOS33 } [get_ports { SW[12] }]; #IO_L24P_T3_35 Sch=sw[12] -#set_property -dict { PACKAGE_PIN U12 IOSTANDARD LVCMOS33 } [get_ports { SW[13] }]; #IO_L20P_T3_A08_D24_14 Sch=sw[13] -#set_property -dict { PACKAGE_PIN U11 IOSTANDARD LVCMOS33 } [get_ports { SW[14] }]; #IO_L19N_T3_A09_D25_VREF_14 Sch=sw[14] -#set_property -dict { PACKAGE_PIN V10 IOSTANDARD LVCMOS33 } [get_ports { SW[15] }]; #IO_L21P_T3_DQS_14 Sch=sw[15] - - -## RGB LEDs -#set_property -dict { PACKAGE_PIN R12 IOSTANDARD LVCMOS33 } [get_ports { LED16_B }]; #IO_L5P_T0_D06_14 Sch=led16_b -#set_property -dict { PACKAGE_PIN M16 IOSTANDARD LVCMOS33 } [get_ports { LED16_G }]; #IO_L10P_T1_D14_14 Sch=led16_g -#set_property -dict { PACKAGE_PIN N15 IOSTANDARD LVCMOS33 } [get_ports { LED16_R }]; #IO_L11P_T1_SRCC_14 Sch=led16_r -#set_property -dict { PACKAGE_PIN G14 IOSTANDARD LVCMOS33 } [get_ports { LED17_B }]; #IO_L15N_T2_DQS_ADV_B_15 Sch=led17_b -#set_property -dict { PACKAGE_PIN R11 IOSTANDARD LVCMOS33 } [get_ports { LED17_G }]; #IO_0_14 Sch=led17_g -#set_property -dict { PACKAGE_PIN N16 IOSTANDARD LVCMOS33 } [get_ports { LED17_R }]; #IO_L11N_T1_SRCC_14 Sch=led17_r - - -##Pmod Header JD -#set_property -dict { PACKAGE_PIN H4 IOSTANDARD LVCMOS33 } [get_ports { JD[1] }]; #IO_L21N_T3_DQS_35 Sch=jd[1] -#set_property -dict { PACKAGE_PIN H1 IOSTANDARD LVCMOS33 } [get_ports { JD[2] }]; #IO_L17P_T2_35 Sch=jd[2] -#set_property -dict { PACKAGE_PIN G1 IOSTANDARD LVCMOS33 } [get_ports { JD[3] }]; #IO_L17N_T2_35 Sch=jd[3] -#set_property -dict { PACKAGE_PIN G3 IOSTANDARD LVCMOS33 } [get_ports { JD[4] }]; #IO_L20N_T3_35 Sch=jd[4] -#set_property -dict { PACKAGE_PIN H2 IOSTANDARD LVCMOS33 } [get_ports { JD[7] }]; #IO_L15P_T2_DQS_35 Sch=jd[7] -#set_property -dict { PACKAGE_PIN G4 IOSTANDARD LVCMOS33 } [get_ports { JD[8] }]; #IO_L20P_T3_35 Sch=jd[8] -#set_property -dict { PACKAGE_PIN G2 IOSTANDARD LVCMOS33 } [get_ports { JD[9] }]; #IO_L15N_T2_DQS_35 Sch=jd[9] -#set_property -dict { PACKAGE_PIN F3 IOSTANDARD LVCMOS33 } [get_ports { JD[10] }]; #IO_L13N_T2_MRCC_35 Sch=jd[10] - -##Pmod Header JXADC -#set_property -dict { PACKAGE_PIN A14 IOSTANDARD LVCMOS33 } [get_ports { XA_N[1] }]; #IO_L9N_T1_DQS_AD3N_15 Sch=xa_n[1] -#set_property -dict { PACKAGE_PIN A13 IOSTANDARD LVCMOS33 } [get_ports { XA_P[1] }]; #IO_L9P_T1_DQS_AD3P_15 Sch=xa_p[1] -#set_property -dict { PACKAGE_PIN A16 IOSTANDARD LVCMOS33 } [get_ports { spi_sd_io[2] }]; #IO_L8N_T1_AD10N_15 Sch=xa_n[2] -#set_property -dict { PACKAGE_PIN A15 IOSTANDARD LVCMOS33 } [get_ports { spi_sd_io[0] }]; #IO_L8P_T1_AD10P_15 Sch=xa_p[2] -#set_property -dict { PACKAGE_PIN B17 IOSTANDARD LVCMOS33 } [get_ports { XA_N[3] }]; #IO_L7N_T1_AD2N_15 Sch=xa_n[3] -#set_property -dict { PACKAGE_PIN B16 IOSTANDARD LVCMOS33 } [get_ports { spi_sd_io[1] }]; #IO_L7P_T1_AD2P_15 Sch=xa_p[3] -#set_property -dict { PACKAGE_PIN A18 IOSTANDARD LVCMOS33 } [get_ports { spi_sd_io[3] }]; #IO_L10N_T1_AD11N_15 Sch=xa_n[4] -#set_property -dict { PACKAGE_PIN B18 IOSTANDARD LVCMOS33 } [get_ports { spi_sck_o }]; #IO_L10P_T1_AD11P_15 Sch=xa_p[4] - -##VGA Connector -#set_property -dict { PACKAGE_PIN A3 IOSTANDARD LVCMOS33 } [get_ports { VGA_R[0] }]; #IO_L8N_T1_AD14N_35 Sch=vga_r[0] -#set_property -dict { PACKAGE_PIN B4 IOSTANDARD LVCMOS33 } [get_ports { VGA_R[1] }]; #IO_L7N_T1_AD6N_35 Sch=vga_r[1] -#set_property -dict { PACKAGE_PIN C5 IOSTANDARD LVCMOS33 } [get_ports { VGA_R[2] }]; #IO_L1N_T0_AD4N_35 Sch=vga_r[2] -#set_property -dict { PACKAGE_PIN A4 IOSTANDARD LVCMOS33 } [get_ports { VGA_R[3] }]; #IO_L8P_T1_AD14P_35 Sch=vga_r[3] -#set_property -dict { PACKAGE_PIN C6 IOSTANDARD LVCMOS33 } [get_ports { VGA_G[0] }]; #IO_L1P_T0_AD4P_35 Sch=vga_g[0] -#set_property -dict { PACKAGE_PIN A5 IOSTANDARD LVCMOS33 } [get_ports { VGA_G[1] }]; #IO_L3N_T0_DQS_AD5N_35 Sch=vga_g[1] -#set_property -dict { PACKAGE_PIN B6 IOSTANDARD LVCMOS33 } [get_ports { VGA_G[2] }]; #IO_L2N_T0_AD12N_35 Sch=vga_g[2] -#set_property -dict { PACKAGE_PIN A6 IOSTANDARD LVCMOS33 } [get_ports { VGA_G[3] }]; #IO_L3P_T0_DQS_AD5P_35 Sch=vga_g[3] -#set_property -dict { PACKAGE_PIN B7 IOSTANDARD LVCMOS33 } [get_ports { VGA_B[0] }]; #IO_L2P_T0_AD12P_35 Sch=vga_b[0] -#set_property -dict { PACKAGE_PIN C7 IOSTANDARD LVCMOS33 } [get_ports { VGA_B[1] }]; #IO_L4N_T0_35 Sch=vga_b[1] -#set_property -dict { PACKAGE_PIN D7 IOSTANDARD LVCMOS33 } [get_ports { VGA_B[2] }]; #IO_L6N_T0_VREF_35 Sch=vga_b[2] -#set_property -dict { PACKAGE_PIN D8 IOSTANDARD LVCMOS33 } [get_ports { VGA_B[3] }]; #IO_L4P_T0_35 Sch=vga_b[3] -#set_property -dict { PACKAGE_PIN B11 IOSTANDARD LVCMOS33 } [get_ports { VGA_HS }]; #IO_L4P_T0_15 Sch=vga_hs -#set_property -dict { PACKAGE_PIN B12 IOSTANDARD LVCMOS33 } [get_ports { VGA_VS }]; #IO_L3N_T0_DQS_AD1N_15 Sch=vga_vs - -##Micro SD Connector -#set_property -dict { PACKAGE_PIN E2 IOSTANDARD LVCMOS33 } [get_ports { SD_RESET }]; #IO_L14P_T2_SRCC_35 Sch=sd_reset -#set_property -dict { PACKAGE_PIN A1 IOSTANDARD LVCMOS33 } [get_ports { SD_CD }]; #IO_L9N_T1_DQS_AD7N_35 Sch=sd_cd -#set_property -dict { PACKAGE_PIN B1 IOSTANDARD LVCMOS33 } [get_ports { SD_SCK }]; #IO_L9P_T1_DQS_AD7P_35 Sch=sd_sck -#set_property -dict { PACKAGE_PIN C1 IOSTANDARD LVCMOS33 } [get_ports { SD_CMD }]; #IO_L16N_T2_35 Sch=sd_cmd -#set_property -dict { PACKAGE_PIN C2 IOSTANDARD LVCMOS33 } [get_ports { SD_DAT[0] }]; #IO_L16P_T2_35 Sch=sd_dat[0] -#set_property -dict { PACKAGE_PIN E1 IOSTANDARD LVCMOS33 } [get_ports { SD_DAT[1] }]; #IO_L18N_T2_35 Sch=sd_dat[1] -#set_property -dict { PACKAGE_PIN F1 IOSTANDARD LVCMOS33 } [get_ports { SD_DAT[2] }]; #IO_L18P_T2_35 Sch=sd_dat[2] -#set_property -dict { PACKAGE_PIN D2 IOSTANDARD LVCMOS33 } [get_ports { SD_DAT[3] }]; #IO_L14N_T2_SRCC_35 Sch=sd_dat[3] - -##Accelerometer -#set_property -dict { PACKAGE_PIN E15 IOSTANDARD LVCMOS33 } [get_ports { ACL_MISO }]; #IO_L11P_T1_SRCC_15 Sch=acl_miso -#set_property -dict { PACKAGE_PIN F14 IOSTANDARD LVCMOS33 } [get_ports { ACL_MOSI }]; #IO_L5N_T0_AD9N_15 Sch=acl_mosi -#set_property -dict { PACKAGE_PIN F15 IOSTANDARD LVCMOS33 } [get_ports { ACL_SCLK }]; #IO_L14P_T2_SRCC_15 Sch=acl_sclk -#set_property -dict { PACKAGE_PIN D15 IOSTANDARD LVCMOS33 } [get_ports { ACL_CSN }]; #IO_L12P_T1_MRCC_15 Sch=acl_csn -#set_property -dict { PACKAGE_PIN B13 IOSTANDARD LVCMOS33 } [get_ports { ACL_INT[1] }]; #IO_L2P_T0_AD8P_15 Sch=acl_int[1] -#set_property -dict { PACKAGE_PIN C16 IOSTANDARD LVCMOS33 } [get_ports { ACL_INT[2] }]; #IO_L20P_T3_A20_15 Sch=acl_int[2] - -##Temperature Sensor -#set_property -dict { PACKAGE_PIN C14 IOSTANDARD LVCMOS33 } [get_ports { TMP_SCL }]; #IO_L1N_T0_AD0N_15 Sch=tmp_scl -#set_property -dict { PACKAGE_PIN C15 IOSTANDARD LVCMOS33 } [get_ports { TMP_SDA }]; #IO_L12N_T1_MRCC_15 Sch=tmp_sda -#set_property -dict { PACKAGE_PIN D13 IOSTANDARD LVCMOS33 } [get_ports { TMP_INT }]; #IO_L6N_T0_VREF_15 Sch=tmp_int -#set_property -dict { PACKAGE_PIN B14 IOSTANDARD LVCMOS33 } [get_ports { TMP_CT }]; #IO_L2N_T0_AD8N_15 Sch=tmp_ct - -##Omnidirectional Microphone -#set_property -dict { PACKAGE_PIN J5 IOSTANDARD LVCMOS33 } [get_ports { M_CLK }]; #IO_25_35 Sch=m_clk -#set_property -dict { PACKAGE_PIN H5 IOSTANDARD LVCMOS33 } [get_ports { M_DATA }]; #IO_L24N_T3_35 Sch=m_data -#set_property -dict { PACKAGE_PIN F5 IOSTANDARD LVCMOS33 } [get_ports { M_LRSEL }]; #IO_0_35 Sch=m_lrsel - -##PWM Audio Amplifier -#set_property -dict { PACKAGE_PIN A11 IOSTANDARD LVCMOS33 } [get_ports { AUD_PWM }]; #IO_L4N_T0_15 Sch=aud_pwm -#set_property -dict { PACKAGE_PIN D12 IOSTANDARD LVCMOS33 } [get_ports { AUD_SD }]; #IO_L6P_T0_15 Sch=aud_sd - -##USB-RS232 Interface -#set_property -dict { PACKAGE_PIN D3 IOSTANDARD LVCMOS33 } [get_ports { UART_CTS }]; #IO_L12N_T1_MRCC_35 Sch=uart_cts -#set_property -dict { PACKAGE_PIN E5 IOSTANDARD LVCMOS33 } [get_ports { UART_RTS }]; #IO_L5N_T0_AD13N_35 Sch=uart_rts - -##USB HID (PS/2) -#set_property -dict { PACKAGE_PIN F4 IOSTANDARD LVCMOS33 } [get_ports { PS2_CLK }]; #IO_L13P_T2_MRCC_35 Sch=ps2_clk -#set_property -dict { PACKAGE_PIN B2 IOSTANDARD LVCMOS33 } [get_ports { PS2_DATA }]; #IO_L10N_T1_AD15N_35 Sch=ps2_data - -##SMSC Ethernet PHY -#set_property -dict { PACKAGE_PIN C9 IOSTANDARD LVCMOS33 } [get_ports { ETH_MDC }]; #IO_L11P_T1_SRCC_16 Sch=eth_mdc -#set_property -dict { PACKAGE_PIN A9 IOSTANDARD LVCMOS33 } [get_ports { ETH_MDIO }]; #IO_L14N_T2_SRCC_16 Sch=eth_mdio -#set_property -dict { PACKAGE_PIN B3 IOSTANDARD LVCMOS33 } [get_ports { ETH_RSTN }]; #IO_L10P_T1_AD15P_35 Sch=eth_rstn -#set_property -dict { PACKAGE_PIN D9 IOSTANDARD LVCMOS33 } [get_ports { ETH_CRSDV }]; #IO_L6N_T0_VREF_16 Sch=eth_crsdv -#set_property -dict { PACKAGE_PIN C10 IOSTANDARD LVCMOS33 } [get_ports { ETH_RXERR }]; #IO_L13N_T2_MRCC_16 Sch=eth_rxerr -#set_property -dict { PACKAGE_PIN C11 IOSTANDARD LVCMOS33 } [get_ports { ETH_RXD[0] }]; #IO_L13P_T2_MRCC_16 Sch=eth_rxd[0] -#set_property -dict { PACKAGE_PIN D10 IOSTANDARD LVCMOS33 } [get_ports { ETH_RXD[1] }]; #IO_L19N_T3_VREF_16 Sch=eth_rxd[1] -#set_property -dict { PACKAGE_PIN B9 IOSTANDARD LVCMOS33 } [get_ports { ETH_TXEN }]; #IO_L11N_T1_SRCC_16 Sch=eth_txen -#set_property -dict { PACKAGE_PIN A10 IOSTANDARD LVCMOS33 } [get_ports { ETH_TXD[0] }]; #IO_L14P_T2_SRCC_16 Sch=eth_txd[0] -#set_property -dict { PACKAGE_PIN A8 IOSTANDARD LVCMOS33 } [get_ports { ETH_TXD[1] }]; #IO_L12N_T1_MRCC_16 Sch=eth_txd[1] -#set_property -dict { PACKAGE_PIN D5 IOSTANDARD LVCMOS33 } [get_ports { ETH_REFCLK }]; #IO_L11P_T1_SRCC_35 Sch=eth_refclk -#set_property -dict { PACKAGE_PIN B8 IOSTANDARD LVCMOS33 } [get_ports { ETH_INTN }]; #IO_L12P_T1_MRCC_16 Sch=eth_intn - -##Quad SPI Flash -#set_property -dict { PACKAGE_PIN K17 IOSTANDARD LVCMOS33 } [get_ports { QSPI_DQ[0] }]; #IO_L1P_T0_D00_MOSI_14 Sch=qspi_dq[0] -#set_property -dict { PACKAGE_PIN K18 IOSTANDARD LVCMOS33 } [get_ports { QSPI_DQ[1] }]; #IO_L1N_T0_D01_DIN_14 Sch=qspi_dq[1] -#set_property -dict { PACKAGE_PIN L14 IOSTANDARD LVCMOS33 } [get_ports { QSPI_DQ[2] }]; #IO_L2P_T0_D02_14 Sch=qspi_dq[2] -#set_property -dict { PACKAGE_PIN M14 IOSTANDARD LVCMOS33 } [get_ports { QSPI_DQ[3] }]; #IO_L2N_T0_D03_14 Sch=qspi_dq[3] -#set_property -dict { PACKAGE_PIN L13 IOSTANDARD LVCMOS33 } [get_ports { QSPI_CSN }]; #IO_L6P_T0_FCS_B_14 Sch=qspi_csn \ No newline at end of file diff --git a/hw/fpga/constraints/pynq-z2/constraints.xdc b/hw/fpga/constraints/pynq-z2/constraints.xdc index 5a3c1c4ba..38e2f6b39 100644 --- a/hw/fpga/constraints/pynq-z2/constraints.xdc +++ b/hw/fpga/constraints/pynq-z2/constraints.xdc @@ -1 +1 @@ -set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets x_heep_system_i/pad_ring_i/pad_clk_i/xilinx_iobuf_i/O] +create_clock -add -name sys_clk_pin -period 8.00 -waveform {0 5} [get_ports {clk_i}]; diff --git a/hw/fpga/constraints/pynq-z2/pin_assign.xdc b/hw/fpga/constraints/pynq-z2/pin_assign.xdc index c01e2fb4a..9f8f7dedd 100644 --- a/hw/fpga/constraints/pynq-z2/pin_assign.xdc +++ b/hw/fpga/constraints/pynq-z2/pin_assign.xdc @@ -2,83 +2,97 @@ # Solderpad Hardware License, Version 2.1, see LICENSE.md for details. # SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# Clock signal set_property -dict {PACKAGE_PIN H16 IOSTANDARD LVCMOS33} [get_ports clk_i] +set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets jtag_tck_i_IBUF] + set_property -dict {PACKAGE_PIN L19 IOSTANDARD LVCMOS33} [get_ports rst_i] -set_property -dict {PACKAGE_PIN M14 IOSTANDARD LVCMOS33} [get_ports rst_led] -set_property -dict {PACKAGE_PIN N16 IOSTANDARD LVCMOS33} [get_ports clk_led] -set_property -dict {PACKAGE_PIN W9 IOSTANDARD LVCMOS33} [get_ports clk_out] + +# LEDs +set_property -dict {PACKAGE_PIN M14 IOSTANDARD LVCMOS33} [get_ports rst_led_o] +set_property -dict {PACKAGE_PIN N16 IOSTANDARD LVCMOS33} [get_ports clk_led_o] set_property -dict {PACKAGE_PIN R14 IOSTANDARD LVCMOS33} [get_ports exit_valid_o] set_property -dict {PACKAGE_PIN P14 IOSTANDARD LVCMOS33} [get_ports exit_value_o] +set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets rst_led_OBUF] +set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets clk_out_OBUF] +set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets clk_led_OBUF] + +# Switches set_property -dict {PACKAGE_PIN M19 IOSTANDARD LVCMOS33} [get_ports execute_from_flash_i] set_property -dict {PACKAGE_PIN M20 IOSTANDARD LVCMOS33} [get_ports boot_select_i] -## Pmoda -## RPi GPIO 7-0 are shared with pmoda_rpi_gpio_tri_io[7:0] - +# FLASH # QSPI # Q0 / MOSI # Q1 / MISO # Q2 / nWP # Q3 / nHLD +set_property -dict {PACKAGE_PIN U18 IOSTANDARD LVCMOS33} [get_ports spi_flash_csb_o] ; # Pmoda[4] +set_property -dict {PACKAGE_PIN Y18 IOSTANDARD LVCMOS33} [get_ports spi_flash_sck_o] ; # Pmoda[0] +set_property -dict {PACKAGE_PIN U19 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[0]}] ; # Pmoda[5] +set_property -dict {PACKAGE_PIN Y19 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[1]}] ; # Pmoda[1] +set_property -dict {PACKAGE_PIN W18 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[2]}] ; # Pmoda[6] +set_property -dict {PACKAGE_PIN Y16 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[3]}] ; # Pmoda[2] -set_property -dict {PACKAGE_PIN U18 IOSTANDARD LVCMOS33} [get_ports spi_flash_csb_o] -set_property -dict {PACKAGE_PIN Y18 IOSTANDARD LVCMOS33} [get_ports spi_flash_sck_o] -set_property -dict {PACKAGE_PIN U19 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[0]}] -set_property -dict {PACKAGE_PIN Y19 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[1]}] -set_property -dict {PACKAGE_PIN W18 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[2]}] -set_property -dict {PACKAGE_PIN Y16 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[3]}] -set_property -dict {PACKAGE_PIN W19 IOSTANDARD LVCMOS33} [get_ports jtag_trst_ni] +# UART +set_property -dict {PACKAGE_PIN W14 IOSTANDARD LVCMOS33} [get_ports uart_tx_o] ; # Pmodb[0] +set_property -dict {PACKAGE_PIN V16 IOSTANDARD LVCMOS33} [get_ports uart_rx_i] ; # Pmodb[4] -set_property -dict {PACKAGE_PIN F16 IOSTANDARD LVCMOS33} [get_ports spi_csb_o] -set_property -dict {PACKAGE_PIN H15 IOSTANDARD LVCMOS33} [get_ports spi_sck_o] -set_property -dict {PACKAGE_PIN T12 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[0]}] -set_property -dict {PACKAGE_PIN W15 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[1]}] -set_property -dict {PACKAGE_PIN P18 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[2]}] -set_property -dict {PACKAGE_PIN N17 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[3]}] +# JTAG +set_property -dict {PACKAGE_PIN Y14 IOSTANDARD LVCMOS33} [get_ports jtag_tdi_i] ; # Pmob[1] +set_property -dict {PACKAGE_PIN V12 IOSTANDARD LVCMOS33} [get_ports jtag_tdo_o] ; # Pmodb[6] +set_property -dict {PACKAGE_PIN T11 IOSTANDARD LVCMOS33} [get_ports jtag_tms_i] ; # Pmodb[2] +set_property -dict {PACKAGE_PIN W16 IOSTANDARD LVCMOS33} [get_ports jtag_tck_i] ; # Pmodb[5] +set_property -dict {PACKAGE_PIN W19 IOSTANDARD LVCMOS33} [get_ports jtag_trst_ni] ; # Pmoda[7] -## Pmodb -set_property -dict {PACKAGE_PIN W14 IOSTANDARD LVCMOS33} [get_ports uart_tx_o] -set_property -dict {PACKAGE_PIN V16 IOSTANDARD LVCMOS33} [get_ports uart_rx_i] -set_property -dict {PACKAGE_PIN Y14 IOSTANDARD LVCMOS33} [get_ports jtag_tdi_i] -set_property -dict {PACKAGE_PIN V12 IOSTANDARD LVCMOS33} [get_ports jtag_tdo_o] -set_property -dict {PACKAGE_PIN T11 IOSTANDARD LVCMOS33} [get_ports jtag_tms_i] -set_property -dict {PACKAGE_PIN W16 IOSTANDARD LVCMOS33} [get_ports jtag_tck_i] +# I2C +set_property -dict {PACKAGE_PIN W13 IOSTANDARD LVCMOS33} [get_ports {i2c_scl_io}] ; # Pmodb[7] +set_property -dict {PACKAGE_PIN T10 IOSTANDARD LVCMOS33} [get_ports {i2c_sda_io}] ; # Pmodb[3] -set_property -dict {PACKAGE_PIN T14 IOSTANDARD LVCMOS33} [get_ports {gpio_io[0]}] -set_property -dict {PACKAGE_PIN Y8 IOSTANDARD LVCMOS33} [get_ports {gpio_io[1]}] -set_property -dict {PACKAGE_PIN W8 IOSTANDARD LVCMOS33} [get_ports {gpio_io[2]}] -set_property -dict {PACKAGE_PIN Y7 IOSTANDARD LVCMOS33} [get_ports {gpio_io[3]}] -set_property -dict {PACKAGE_PIN Y6 IOSTANDARD LVCMOS33} [get_ports {gpio_io[4]}] -set_property -dict {PACKAGE_PIN U12 IOSTANDARD LVCMOS33} [get_ports {gpio_io[5]}] -set_property -dict {PACKAGE_PIN W10 IOSTANDARD LVCMOS33} [get_ports {gpio_io[6]}] -set_property -dict {PACKAGE_PIN V10 IOSTANDARD LVCMOS33} [get_ports {gpio_io[7]}] -set_property -dict {PACKAGE_PIN V8 IOSTANDARD LVCMOS33} [get_ports {gpio_io[8]}] -set_property -dict {PACKAGE_PIN U8 IOSTANDARD LVCMOS33} [get_ports {gpio_io[9]}] -set_property -dict {PACKAGE_PIN V7 IOSTANDARD LVCMOS33} [get_ports {gpio_io[10]}] -set_property -dict {PACKAGE_PIN U7 IOSTANDARD LVCMOS33} [get_ports {gpio_io[11]}] -set_property -dict {PACKAGE_PIN V6 IOSTANDARD LVCMOS33} [get_ports {gpio_io[12]}] -set_property -dict {PACKAGE_PIN U13 IOSTANDARD LVCMOS33} [get_ports {gpio_io[13]}] -set_property -dict {PACKAGE_PIN V13 IOSTANDARD LVCMOS33} [get_ports {gpio_io[14]}] -set_property -dict {PACKAGE_PIN Y9 IOSTANDARD LVCMOS33} [get_ports {pdm2pcm_clk_io}] -set_property -dict {PACKAGE_PIN A20 IOSTANDARD LVCMOS33} [get_ports {pdm2pcm_pdm_io}] -set_property -dict {PACKAGE_PIN B19 IOSTANDARD LVCMOS33} [get_ports {i2s_sck_io}] -set_property -dict {PACKAGE_PIN B20 IOSTANDARD LVCMOS33} [get_ports {i2s_ws_io}] -set_property -dict {PACKAGE_PIN P15 IOSTANDARD LVCMOS33} [get_ports {i2s_sd_io}] +# SPI SD +set_property -dict {PACKAGE_PIN F16 IOSTANDARD LVCMOS33} [get_ports spi_csb_o] ; # arduino_direct_spi_ss_io +set_property -dict {PACKAGE_PIN H15 IOSTANDARD LVCMOS33} [get_ports spi_sck_o] ; # arduino_direct_spi_sck_io +set_property -dict {PACKAGE_PIN T12 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[0]}] ; # arduino_direct_spi_io0_io +set_property -dict {PACKAGE_PIN W15 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[1]}] ; # arduino_direct_spi_io1_io +set_property -dict {PACKAGE_PIN P18 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[2]}] ; # arduino_gpio_tri_io[12] +set_property -dict {PACKAGE_PIN N17 IOSTANDARD LVCMOS33} [get_ports {spi_sd_io[3]}] ; # arduino_gpio_tri_io[13] -## Tri-color LD5 for TARGET_PYNQ_Z2 -set_property -dict {PACKAGE_PIN M15 IOSTANDARD LVCMOS33} [get_ports {gpio_io[15]}] -set_property -dict {PACKAGE_PIN G14 IOSTANDARD LVCMOS33} [get_ports {gpio_io[16]}] -set_property -dict {PACKAGE_PIN L14 IOSTANDARD LVCMOS33} [get_ports {gpio_io[17]}] +# GPIOs +set_property -dict {PACKAGE_PIN T14 IOSTANDARD LVCMOS33} [get_ports {gpio_io[0]}] ; # arduino_gpio_tri_io[0] +set_property -dict {PACKAGE_PIN Y8 IOSTANDARD LVCMOS33} [get_ports {gpio_io[1]}] ; # rpi_gpio_tri_io[11] +set_property -dict {PACKAGE_PIN W8 IOSTANDARD LVCMOS33} [get_ports {gpio_io[2]}] ; # rpi_gpio_tri_io[5] +set_property -dict {PACKAGE_PIN Y7 IOSTANDARD LVCMOS33} [get_ports {gpio_io[3]}] ; # rpi_gpio_tri_io[16] +set_property -dict {PACKAGE_PIN Y6 IOSTANDARD LVCMOS33} [get_ports {gpio_io[4]}] ; # rpi_gpio_tri_io[7] +set_property -dict {PACKAGE_PIN U12 IOSTANDARD LVCMOS33} [get_ports {gpio_io[5]}] ; # arduino_gpio_tri_io[1] +set_property -dict {PACKAGE_PIN W10 IOSTANDARD LVCMOS33} [get_ports {gpio_io[6]}] ; # rpi_gpio_tri_io[3] +set_property -dict {PACKAGE_PIN V10 IOSTANDARD LVCMOS33} [get_ports {gpio_io[7]}] ; # rpi_gpio_tri_io[1] +set_property -dict {PACKAGE_PIN V8 IOSTANDARD LVCMOS33} [get_ports {gpio_io[8]}] ; # rpi_gpio_tri_io[2] +set_property -dict {PACKAGE_PIN U8 IOSTANDARD LVCMOS33} [get_ports {gpio_io[9]}] ; # rpi_gpio_tri_io[14] +set_property -dict {PACKAGE_PIN V7 IOSTANDARD LVCMOS33} [get_ports {gpio_io[10]}] ; # rpi_gpio_tri_io[19] +set_property -dict {PACKAGE_PIN U7 IOSTANDARD LVCMOS33} [get_ports {gpio_io[11]}] ; # rpi_gpio_tri_io[9] +set_property -dict {PACKAGE_PIN V6 IOSTANDARD LVCMOS33} [get_ports {gpio_io[12]}] ; # rpi_gpio_tri_io[6] +set_property -dict {PACKAGE_PIN U13 IOSTANDARD LVCMOS33} [get_ports {gpio_io[13]}] ; # arduino_gpio_tri_io[2] +set_property -dict {PACKAGE_PIN V13 IOSTANDARD LVCMOS33} [get_ports {gpio_io[14]}] ; # arduino_gpio_tri_io[3] -set_property -dict {PACKAGE_PIN W6 IOSTANDARD LVCMOS33} [get_ports {spi2_csb_o[0]}] -set_property -dict {PACKAGE_PIN T15 IOSTANDARD LVCMOS33} [get_ports {spi2_csb_o[1]}] -set_property -dict {PACKAGE_PIN C20 IOSTANDARD LVCMOS33} [get_ports {spi2_sck_o}] -set_property -dict {PACKAGE_PIN V17 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[0]}] -set_property -dict {PACKAGE_PIN V18 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[1]}] -set_property -dict {PACKAGE_PIN T16 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[2]}] -set_property -dict {PACKAGE_PIN R17 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[3]}] -set_property -dict {PACKAGE_PIN W13 IOSTANDARD LVCMOS33} [get_ports {i2c_scl_io}] -set_property -dict {PACKAGE_PIN T10 IOSTANDARD LVCMOS33} [get_ports {i2c_sda_io}] +# PDM2PCM +set_property -dict {PACKAGE_PIN Y9 IOSTANDARD LVCMOS33} [get_ports {pdm2pcm_clk_io}] ; # rpi_gpio_tri_io[13] +set_property -dict {PACKAGE_PIN A20 IOSTANDARD LVCMOS33} [get_ports {pdm2pcm_pdm_io}] ; # rpi_gpio_tri_io[12] -set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets jtag_tck_i_IBUF] +# I2S +set_property -dict {PACKAGE_PIN B19 IOSTANDARD LVCMOS33} [get_ports {i2s_sck_io}] ; # rpi_gpio_tri_io[8] +set_property -dict {PACKAGE_PIN B20 IOSTANDARD LVCMOS33} [get_ports {i2s_ws_io}] ; # rpi_gpio_tri_io[4] +set_property -dict {PACKAGE_PIN P15 IOSTANDARD LVCMOS33} [get_ports {i2s_sd_io}] ; # arduino_direct_iic_scl_io + +# SPI2 +set_property -dict {PACKAGE_PIN W6 IOSTANDARD LVCMOS33} [get_ports {spi2_csb_o[0]}] ; # rpi_gpio_tri_io[15] +set_property -dict {PACKAGE_PIN T15 IOSTANDARD LVCMOS33} [get_ports {spi2_csb_o[1]}] ; # arduino_gpio_tri_io[5] +set_property -dict {PACKAGE_PIN C20 IOSTANDARD LVCMOS33} [get_ports {spi2_sck_o}] ; # rpi_gpio_tri_io[10] +set_property -dict {PACKAGE_PIN V17 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[0]}] ; # arduino__gpio_tri_io[8] +set_property -dict {PACKAGE_PIN V18 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[1]}] ; # arduino_gpio_tri_io[9] +set_property -dict {PACKAGE_PIN T16 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[2]}] ; # arduino_gpio_tri_io[10] +set_property -dict {PACKAGE_PIN R17 IOSTANDARD LVCMOS33} [get_ports {spi2_sd_io[3]}] ; # arduino_gpio_tri_io[11] +# Tri-color LEDs for TARGET_PYNQ_Z2 +set_property -dict {PACKAGE_PIN M15 IOSTANDARD LVCMOS33} [get_ports {gpio_io[15]}] ; # rgbleds_6bits_tri_o[5] +set_property -dict {PACKAGE_PIN G14 IOSTANDARD LVCMOS33} [get_ports {gpio_io[16]}] ; # rgbled_6bits_tri_o[3] +set_property -dict {PACKAGE_PIN L14 IOSTANDARD LVCMOS33} [get_ports {gpio_io[17]}] ; # rgbleds_6bits_tri_o[4] diff --git a/hw/fpga/constraints/zcu104/pin_assign.xdc b/hw/fpga/constraints/zcu104/pin_assign.xdc new file mode 100644 index 000000000..4b523cdfd --- /dev/null +++ b/hw/fpga/constraints/zcu104/pin_assign.xdc @@ -0,0 +1,99 @@ +# Copyright 2022 EPFL +# Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +# CLOCK +set_property -dict {PACKAGE_PIN AH18 IOSTANDARD DIFF_SSTL12} [get_ports clk_300mhz_p] +set_property -dict {PACKAGE_PIN AH17 IOSTANDARD DIFF_SSTL12} [get_ports clk_300mhz_n] +set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets jtag_tck_i] + +# RESET +set_property -dict {PACKAGE_PIN M11 IOSTANDARD LVCMOS33} [get_ports rst_i] +set_property CLOCK_DEDICATED_ROUTE FALSE [get_nets rst_i] + +# LEDS +set_property -dict {PACKAGE_PIN D5 IOSTANDARD LVCMOS33} [get_ports rst_led_o] +set_property -dict {PACKAGE_PIN D6 IOSTANDARD LVCMOS33} [get_ports clk_led_o] +set_property -dict {PACKAGE_PIN A5 IOSTANDARD LVCMOS33} [get_ports exit_valid_o] +set_property -dict {PACKAGE_PIN B5 IOSTANDARD LVCMOS33} [get_ports exit_value_o] + +# SWITCHES +set_property -dict {PACKAGE_PIN E4 IOSTANDARD LVCMOS33} [get_ports execute_from_flash_i] +set_property -dict {PACKAGE_PIN D4 IOSTANDARD LVCMOS33} [get_ports boot_select_i] + +# FLASH +# QSPI +# Q0 / MOSI +# Q1 / MISO +# Q2 / nWP +# Q3 / nHLD +set_property -dict {PACKAGE_PIN L10 IOSTANDARD LVCMOS33} [get_ports spi_flash_csb_o] +set_property -dict {PACKAGE_PIN J9 IOSTANDARD LVCMOS33} [get_ports spi_flash_sck_o] +set_property -dict {PACKAGE_PIN M10 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[0]}] +set_property -dict {PACKAGE_PIN K9 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[1]}] +set_property -dict {PACKAGE_PIN M8 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[2]}] +set_property -dict {PACKAGE_PIN K8 IOSTANDARD LVCMOS33} [get_ports {spi_flash_sd_io[3]}] + +# UART +set_property -dict {PACKAGE_PIN G8 IOSTANDARD LVCMOS33} [get_ports uart_tx_o] +set_property -dict {PACKAGE_PIN G6 IOSTANDARD LVCMOS33} [get_ports uart_rx_i] + +# JTAG +set_property -dict {PACKAGE_PIN H8 IOSTANDARD LVCMOS33} [get_ports jtag_tdi_i] +set_property -dict {PACKAGE_PIN J6 IOSTANDARD LVCMOS33} [get_ports jtag_tdo_o] +set_property -dict {PACKAGE_PIN G7 IOSTANDARD LVCMOS33} [get_ports jtag_tms_i] +set_property -dict {PACKAGE_PIN H6 IOSTANDARD LVCMOS33} [get_ports jtag_tck_i] +set_property -dict {PACKAGE_PIN M9 IOSTANDARD LVCMOS33} [get_ports jtag_trst_ni] + +# I2C +set_property -dict {PACKAGE_PIN J7 IOSTANDARD LVCMOS33} [get_ports i2c_scl_io] +set_property -dict {PACKAGE_PIN H7 IOSTANDARD LVCMOS33} [get_ports i2c_sda_io] + +## The following pins are sent to the FMC connector, using the LA pins as single-ended. +## The bank only supports up to 1.8 V. + +# SPI SD +set_property -dict {PACKAGE_PIN H19 IOSTANDARD LVCMOS18} [get_ports spi_csb_o] +set_property -dict {PACKAGE_PIN G19 IOSTANDARD LVCMOS18} [get_ports spi_sck_o] +set_property -dict {PACKAGE_PIN L15 IOSTANDARD LVCMOS18} [get_ports {spi_sd_io[0]}] +set_property -dict {PACKAGE_PIN K15 IOSTANDARD LVCMOS18} [get_ports {spi_sd_io[1]}] +set_property -dict {PACKAGE_PIN C13 IOSTANDARD LVCMOS18} [get_ports {spi_sd_io[2]}] +set_property -dict {PACKAGE_PIN C12 IOSTANDARD LVCMOS18} [get_ports {spi_sd_io[3]}] + +# GPIOs +set_property -dict {PACKAGE_PIN D11 IOSTANDARD LVCMOS18} [get_ports {gpio_io[0]}] +set_property -dict {PACKAGE_PIN D10 IOSTANDARD LVCMOS18} [get_ports {gpio_io[1]}] +set_property -dict {PACKAGE_PIN A8 IOSTANDARD LVCMOS18} [get_ports {gpio_io[2]}] +set_property -dict {PACKAGE_PIN A7 IOSTANDARD LVCMOS18} [get_ports {gpio_io[3]}] +set_property -dict {PACKAGE_PIN H18 IOSTANDARD LVCMOS18} [get_ports {gpio_io[4]}] +set_property -dict {PACKAGE_PIN H17 IOSTANDARD LVCMOS18} [get_ports {gpio_io[5]}] +set_property -dict {PACKAGE_PIN K17 IOSTANDARD LVCMOS18} [get_ports {gpio_io[6]}] +set_property -dict {PACKAGE_PIN J17 IOSTANDARD LVCMOS18} [get_ports {gpio_io[7]}] +set_property -dict {PACKAGE_PIN H16 IOSTANDARD LVCMOS18} [get_ports {gpio_io[8]}] +set_property -dict {PACKAGE_PIN G16 IOSTANDARD LVCMOS18} [get_ports {gpio_io[9]}] +set_property -dict {PACKAGE_PIN G15 IOSTANDARD LVCMOS18} [get_ports {gpio_io[10]}] +set_property -dict {PACKAGE_PIN F15 IOSTANDARD LVCMOS18} [get_ports {gpio_io[11]}] +set_property -dict {PACKAGE_PIN F11 IOSTANDARD LVCMOS18} [get_ports {gpio_io[12]}] +set_property -dict {PACKAGE_PIN E10 IOSTANDARD LVCMOS18} [get_ports {gpio_io[13]}] +set_property -dict {PACKAGE_PIN B11 IOSTANDARD LVCMOS18} [get_ports {gpio_io[14]}] +set_property -dict {PACKAGE_PIN A11 IOSTANDARD LVCMOS18} [get_ports {gpio_io[15]}] +set_property -dict {PACKAGE_PIN B9 IOSTANDARD LVCMOS18} [get_ports {gpio_io[16]}] +set_property -dict {PACKAGE_PIN B8 IOSTANDARD LVCMOS18} [get_ports {gpio_io[17]}] + +# PDM2PCM +set_property -dict {PACKAGE_PIN K19 IOSTANDARD LVCMOS18} [get_ports pdm2pcm_clk_io] +set_property -dict {PACKAGE_PIN K18 IOSTANDARD LVCMOS18} [get_ports pdm2pcm_pdm_io] + +# I2S +set_property -dict {PACKAGE_PIN E18 IOSTANDARD LVCMOS18} [get_ports i2s_sck_io] +set_property -dict {PACKAGE_PIN E17 IOSTANDARD LVCMOS18} [get_ports i2s_ws_io] +set_property -dict {PACKAGE_PIN G18 IOSTANDARD LVCMOS18} [get_ports i2s_sd_io] + +# SPI2 +set_property -dict {PACKAGE_PIN F18 IOSTANDARD LVCMOS18} [get_ports {spi2_csb_o[0]}] +set_property -dict {PACKAGE_PIN D17 IOSTANDARD LVCMOS18} [get_ports {spi2_csb_o[1]}] +set_property -dict {PACKAGE_PIN C17 IOSTANDARD LVCMOS18} [get_ports spi2_sck_o] +set_property -dict {PACKAGE_PIN F12 IOSTANDARD LVCMOS18} [get_ports {spi2_sd_io[0]}] +set_property -dict {PACKAGE_PIN E12 IOSTANDARD LVCMOS18} [get_ports {spi2_sd_io[1]}] +set_property -dict {PACKAGE_PIN H13 IOSTANDARD LVCMOS18} [get_ports {spi2_sd_io[2]}] +set_property -dict {PACKAGE_PIN H12 IOSTANDARD LVCMOS18} [get_ports {spi2_sd_io[3]}] diff --git a/hw/fpga/pad_cell_inout_xilinx.sv b/hw/fpga/pad_cell_inout_xilinx.sv index 69ee4e7e1..5c68364ba 100644 --- a/hw/fpga/pad_cell_inout_xilinx.sv +++ b/hw/fpga/pad_cell_inout_xilinx.sv @@ -3,7 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 module pad_cell_inout #( - parameter PADATTR = 16 + parameter PADATTR = 16, + parameter core_v_mini_mcu_pkg::pad_side_e SIDE = core_v_mini_mcu_pkg::TOP ) ( input logic pad_in_i, input logic pad_oe_i, diff --git a/hw/fpga/pad_cell_input_xilinx.sv b/hw/fpga/pad_cell_input_xilinx.sv index 989aad1cf..470a651a7 100644 --- a/hw/fpga/pad_cell_input_xilinx.sv +++ b/hw/fpga/pad_cell_input_xilinx.sv @@ -3,7 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 module pad_cell_input #( - parameter PADATTR = 16 + parameter PADATTR = 16, + parameter core_v_mini_mcu_pkg::pad_side_e SIDE = core_v_mini_mcu_pkg::TOP ) ( input logic pad_in_i, input logic pad_oe_i, diff --git a/hw/fpga/pad_cell_output_xilinx.sv b/hw/fpga/pad_cell_output_xilinx.sv index 3620ad1bf..43592acd1 100644 --- a/hw/fpga/pad_cell_output_xilinx.sv +++ b/hw/fpga/pad_cell_output_xilinx.sv @@ -3,7 +3,8 @@ // SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 module pad_cell_output #( - parameter PADATTR = 16 + parameter PADATTR = 16, + parameter core_v_mini_mcu_pkg::pad_side_e SIDE = core_v_mini_mcu_pkg::TOP ) ( input logic pad_in_i, input logic pad_oe_i, diff --git a/hw/fpga/scripts/generate_sram.tcl b/hw/fpga/scripts/generate_sram.tcl deleted file mode 100644 index 7ba6d9758..000000000 --- a/hw/fpga/scripts/generate_sram.tcl +++ /dev/null @@ -1,27 +0,0 @@ - -set ipName xilinx_mem_gen_0 - -create_ip -name blk_mem_gen -vendor xilinx.com -library ip -version 8.4 -module_name $ipName - -set_property -dict [list CONFIG.Enable_32bit_Address {false} \ - CONFIG.Use_Byte_Write_Enable {true} \ - CONFIG.Byte_Size {8} \ - CONFIG.Algorithm {Minimum_Area} \ - CONFIG.Primitive {2kx9} \ - CONFIG.Write_Width_A {32} \ - CONFIG.Write_Depth_A {8192} \ - CONFIG.Read_Width_A {32} \ - CONFIG.Enable_A {Use_ENA_Pin} \ - CONFIG.Write_Width_B {32} \ - CONFIG.Read_Width_B {32} \ - CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \ - CONFIG.Use_RSTA_Pin {false} \ - CONFIG.EN_SAFETY_CKT {false}] [get_ips $ipName] - -#generate_target {instantiation_template} [get_ips $ipName] - -#export_ip_user_files -of_objects [get_ips $ipName] -no_script -sync -force -quiet - -create_ip_run [get_ips $ipName] -launch_run -jobs 8 ${ipName}_synth_1 -wait_on_run ${ipName}_synth_1 diff --git a/hw/fpga/scripts/generate_sram.tcl.tpl b/hw/fpga/scripts/generate_sram.tcl.tpl new file mode 100644 index 000000000..e8e6cbb40 --- /dev/null +++ b/hw/fpga/scripts/generate_sram.tcl.tpl @@ -0,0 +1,50 @@ +% for num_words in xheep.iter_bank_numwords(): + +set ipName xilinx_mem_gen_${num_words} + +create_ip -name blk_mem_gen -vendor xilinx.com -library ip -version 8.4 -module_name $ipName + +set_property -dict [list CONFIG.Enable_32bit_Address {false} \\ + + CONFIG.Use_Byte_Write_Enable {true} \\ + + CONFIG.Byte_Size {8} \\ + + CONFIG.Algorithm {Minimum_Area} \\ + + CONFIG.Primitive {2kx9} \\ + + CONFIG.Write_Width_A {32} \\ + + CONFIG.Write_Depth_A {${num_words}} \\ + + CONFIG.Read_Width_A {32} \\ + + CONFIG.Enable_A {Use_ENA_Pin} \\ + + CONFIG.Write_Width_B {32} \\ + + CONFIG.Read_Width_B {32} \\ + + CONFIG.Register_PortA_Output_of_Memory_Primitives {false} \\ + + CONFIG.Use_RSTA_Pin {false} \\ + + CONFIG.EN_SAFETY_CKT {false}] [get_ips $ipName] + +#generate_target {instantiation_template} [get_ips $ipName] + +#export_ip_user_files -of_objects [get_ips $ipName] -no_script -sync -force -quiet + +create_ip_run [get_ips $ipName] + +% endfor + +<%ips = "" +for num_words in xheep.iter_bank_numwords(): + ips += f" xilinx_mem_gen_{num_words}_synth_1"%> +launch_runs -jobs 8 ${ips} + +% for num_words in xheep.iter_bank_numwords(): +wait_on_run xilinx_mem_gen_${num_words}_synth_1 +% endfor diff --git a/hw/fpga/scripts/nexys/set_board.tcl b/hw/fpga/scripts/nexys/set_board.tcl deleted file mode 100644 index 544cd11bd..000000000 --- a/hw/fpga/scripts/nexys/set_board.tcl +++ /dev/null @@ -1,3 +0,0 @@ -# Select board -set_property -name "board_part_repo_paths" -value "[file normalize "../../../hw/fpga/board_files/"]" -objects [current_project] -set_property -name "board_part" -value "digilentinc.com:nexys-a7-100t:part0:1.3" -objects [current_project] diff --git a/hw/fpga/scripts/pynq-z2/set_board.tcl b/hw/fpga/scripts/pynq-z2/set_board.tcl deleted file mode 100644 index f16f754ff..000000000 --- a/hw/fpga/scripts/pynq-z2/set_board.tcl +++ /dev/null @@ -1,3 +0,0 @@ -# Select board -set_property -name "board_part_repo_paths" -value "[file normalize "../../../hw/fpga/board_files/"]" -objects [current_project] -set_property -name "board_part" -value "tul.com.tw:pynq-z2:part0:1.0" -objects [current_project] diff --git a/hw/fpga/scripts/zcu104/xilinx_generate_clk_wizard.tcl b/hw/fpga/scripts/zcu104/xilinx_generate_clk_wizard.tcl new file mode 100644 index 000000000..d67e40541 --- /dev/null +++ b/hw/fpga/scripts/zcu104/xilinx_generate_clk_wizard.tcl @@ -0,0 +1,40 @@ +# Copyright 2022 EPFL +# Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +# Define design macros + +set design_name xilinx_clk_wizard + +# Create block design +create_bd_design $design_name + +# Create instance and set properties +create_bd_cell -type ip -vlnv xilinx.com:ip:clk_wiz:6.0 clk_wiz_0 +set_property -dict [list \ + CONFIG.CLKIN1_JITTER_PS {33.330000000000005} \ + CONFIG.CLKOUT1_JITTER {282.792} \ + CONFIG.CLKOUT1_PHASE_ERROR {207.545} \ + CONFIG.CLKOUT1_REQUESTED_OUT_FREQ {15} \ + CONFIG.CLK_IN1_BOARD_INTERFACE {clk_300mhz} \ + CONFIG.MMCM_CLKFBOUT_MULT_F {32.875} \ + CONFIG.MMCM_CLKIN1_PERIOD {3.333} \ + CONFIG.MMCM_CLKIN2_PERIOD {10.0} \ + CONFIG.MMCM_CLKOUT0_DIVIDE_F {65.750} \ + CONFIG.MMCM_DIVCLK_DIVIDE {10} \ + CONFIG.OPTIMIZE_CLOCKING_STRUCTURE_EN {true} \ + CONFIG.PRIM_SOURCE {Differential_clock_capable_pin} \ + CONFIG.USE_LOCKED {false} \ + CONFIG.USE_RESET {true} \ +] [get_bd_cells clk_wiz_0] + +# Create ports +make_bd_pins_external [get_bd_cells clk_wiz_0] +make_bd_intf_pins_external [get_bd_cells clk_wiz_0] + +# Save and close block design +save_bd_design +close_bd_design $design_name + +# Create wrapper +set wrapper_path [ make_wrapper -fileset sources_1 -files [ get_files -norecurse xilinx_clk_wizard.bd ] -top ] +add_files -norecurse -fileset sources_1 $wrapper_path diff --git a/hw/fpga/sram_wrapper.sv b/hw/fpga/sram_wrapper.sv.tpl similarity index 75% rename from hw/fpga/sram_wrapper.sv rename to hw/fpga/sram_wrapper.sv.tpl index 44fb56878..b317c3621 100644 --- a/hw/fpga/sram_wrapper.sv +++ b/hw/fpga/sram_wrapper.sv.tpl @@ -26,15 +26,22 @@ module sram_wrapper #( // output ports output logic [31:0] rdata_o ); - - xilinx_mem_gen_0 tc_ram_i ( - .clka (clk_i), - .ena (req_i), - .wea ({4{req_i & we_i}} & be_i), - .addra(addr_i), - .dina (wdata_i), - // output ports - .douta(rdata_o) - ); - +<%el = ""%> +% for num_words in xheep.iter_bank_numwords(): + ${el}if (NumWords == 32'd${num_words}) begin + xilinx_mem_gen_${num_words} tc_ram_i ( + .clka (clk_i), + .ena (req_i), + .wea ({4{req_i & we_i}} & be_i), + .addra(addr_i), + .dina (wdata_i), + // output ports + .douta(rdata_o) + ); + end +<%el = "else "%> +% endfor + else begin + $error("Bank size not generated."); + end endmodule diff --git a/hw/fpga/xilinx_core_v_mini_mcu_wrapper.sv b/hw/fpga/xilinx_core_v_mini_mcu_wrapper.sv index ff9d62923..fc9c53fb0 100644 --- a/hw/fpga/xilinx_core_v_mini_mcu_wrapper.sv +++ b/hw/fpga/xilinx_core_v_mini_mcu_wrapper.sv @@ -13,13 +13,16 @@ module xilinx_core_v_mini_mcu_wrapper parameter CLK_LED_COUNT_LENGTH = 27 ) ( +`ifdef FPGA_ZCU104 + inout logic clk_300mhz_n, + inout logic clk_300mhz_p, +`else inout logic clk_i, +`endif inout logic rst_i, - //visibility signals - output logic rst_led, - output logic clk_led, - output logic clk_out, + output logic rst_led_o, + output logic clk_led_o, inout logic boot_select_i, inout logic execute_from_flash_i, @@ -75,10 +78,10 @@ module xilinx_core_v_mini_mcu_wrapper `endif // reset LED for debugging - assign rst_led = rst_n; + assign rst_led_o = rst_n; // counter to blink an LED - assign clk_led = clk_count[CLK_LED_COUNT_LENGTH-1]; + assign clk_led_o = clk_count[CLK_LED_COUNT_LENGTH-1]; always_ff @(posedge clk_gen or negedge rst_n) begin : clk_count_process if (!rst_n) begin @@ -91,17 +94,23 @@ module xilinx_core_v_mini_mcu_wrapper // eXtension Interface if_xif #() ext_if (); - // clock output for debugging - assign clk_out = clk_gen; - +`ifdef FPGA_ZCU104 + xilinx_clk_wizard_wrapper xilinx_clk_wizard_wrapper_i ( + .CLK_IN1_D_0_clk_n(clk_300mhz_n), + .CLK_IN1_D_0_clk_p(clk_300mhz_p), + .clk_out1_0(clk_gen) + ); +`elsif FPGA_NEXYS xilinx_clk_wizard_wrapper xilinx_clk_wizard_wrapper_i ( -`ifdef FPGA_NEXYS .clk_100MHz(clk_i), -`else + .clk_out1_0(clk_gen) + ); +`else // FPGA PYNQ-Z2 + xilinx_clk_wizard_wrapper xilinx_clk_wizard_wrapper_i ( .clk_125MHz(clk_i), -`endif .clk_out1_0(clk_gen) ); +`endif x_heep_system #( .X_EXT(X_EXT), diff --git a/hw/ip/dma/data/dma.hjson b/hw/ip/dma/data/dma.hjson index d37d92f71..6be440323 100644 --- a/hw/ip/dma/data/dma.hjson +++ b/hw/ip/dma/data/dma.hjson @@ -33,13 +33,24 @@ { bits: "31:0", name: "PTR_ADDR", desc: "Address data pointer (word aligned) - used only in Address mode" } ] }, - { name: "SIZE", - desc: "Number of bytes to copy - Once a value is written, the copy starts", + { name: "SIZE_D1", + desc: "Number of bytes to copy from, defined with respect to the first dimension - Once a value is written, the copy starts", swaccess: "rw", hwaccess: "hro", hwqe: "true", // enable `qe` latched signal of software write pulse + // Dimensioned to 16 bits to allow for 64kB transfers on 1D fields: [ - { bits: "31:0", name: "SIZE", desc: "DMA counter and start" } + { bits: "15:0", name: "SIZE", desc: "DMA counter D1 and start" } + ] + }, + { name: "SIZE_D2", + desc: "Number of bytes to copy from, defined with respect to the second dimension", + swaccess: "rw", + hwaccess: "hro", + hwqe: "true", // enable `qe` latched signal of software write pulse + // Dimensioned to 16 bits to allow for 64kB transfers on 2D + fields: [ + { bits: "15:0", name: "SIZE", desc: "DMA counter D2" } ] }, { name: "STATUS", @@ -50,23 +61,56 @@ hwre: "true", // enable `re` latched signal of software read pulse resval: 1, fields: [ - { bits: "0", name: "READY", desc: "Transaction iss done"}, + { bits: "0", name: "READY", desc: "Transaction is done"}, { bits: "1", name: "WINDOW_DONE", desc: "set if DMA is copying second half"}, ] }, - { name: "PTR_INC", - desc: "Increment number of src/dst pointer every time a word is copied", + { name: "SRC_PTR_INC_D1", + desc: "Increment the D1 source pointer every time a word is copied", swaccess: "rw", hwaccess: "hro", + // Dimensioned to allow a maximum of a 15 element stride for a data_type_word case fields: [ - { bits: "7:0", - name: "SRC_PTR_INC", - desc: "Source pointer increment", + { bits: "5:0", + name: "INC", + desc: "Source pointer d1 increment", resval:4 - }, - { bits: "15:8", - name: "DST_PTR_INC", - desc: "Destination pointer increment", + } + ] + }, + { name: "SRC_PTR_INC_D2", + desc: "Increment the D2 source pointer every time a word is copied", + swaccess: "rw", + hwaccess: "hro", + // Dimensioned to allow a maximum of 15 element stride for a data_type_word + fields: [ + { bits: "22:0", + name: "INC", + desc: "Source pointer d2 increment", + resval:4 + } + ] + }, + { name: "DST_PTR_INC_D1", + desc: "Increment the D1 destination pointer every time a word is copied", + swaccess: "rw", + hwaccess: "hro", + fields: [ + { bits: "5:0", + name: "INC", + desc: "Destination pointer d1 increment", + resval:4 + } + ] + }, + { name: "DST_PTR_INC_D2", + desc: "Increment the D2 destination pointer every time a word is copied", + swaccess: "rw", + hwaccess: "hro", + fields: [ + { bits: "22:0", + name: "INC", + desc: "Destination pointer d2 increment", resval:4 } ] @@ -87,8 +131,25 @@ } ] }, - { name: "DATA_TYPE", - desc: '''Width/type of the data to transfer''', + { name: "SRC_DATA_TYPE", + desc: '''Width/type of the source data to transfer''', + swaccess: "rw", + hwaccess: "hro", + resval: 0, + fields: [ + { bits: "1:0", name: "DATA_TYPE", + desc: "Data type", + enum: [ + { value: "0", name: "DMA_32BIT_WORD", desc: "Transfers 32 bits"}, + { value: "1", name: "DMA_16BIT_WORD", desc: "Transfers 16 bits"}, + { value: "2", name: "DMA_8BIT_WORD" , desc: "Transfers 8 bits"}, + { value: "3", name: "DMA_8BIT_WORD_2",desc: "Transfers 8 bits"}, + ] + } + ] + }, + { name: "DST_DATA_TYPE", + desc: '''Width/type of the destination data to transfer''', swaccess: "rw", hwaccess: "hro", resval: 0, @@ -104,6 +165,22 @@ } ] }, + { + name: "SIGN_EXT", + desc: '''Is the data to be sign extended? (Checked only if the dst data type is wider than the src data type)''', + swaccess: "rw", + hwaccess: "hro", + resval: 0, + fields: [ + { bits: "0", name: "SIGNED", + desc: "Extend the sign to the destination data", + enum: [ + { value: "0", name: "NO_EXTEND", desc: "Does not extend the sign"}, + { value: "1", name: "EXTEND", desc: "Extends the sign"}, + ] + } + ] + }, { name: "MODE", desc: '''Set the operational mode of the DMA''', swaccess: "rw", @@ -119,13 +196,71 @@ } ] }, + { name: "DIM_CONFIG", + desc: '''Set the dimensionality of the DMA''', + swaccess: "rw", + hwaccess: "hro", + resval: 0, + fields: [ + { bits: "0", name: "DMA_DIM", desc: "DMA transfer dimensionality"} + ] + }, + { name: "DIM_INV", + desc: '''DMA dimensionality inversion selector''', + swaccess: "rw", + hwaccess: "hro", + resval: 0, + fields: [ + { bits: "0", name: "SEL", desc: "DMA dimensionality inversion, used to perform transposition"} + ] + }, + { name: "PAD_TOP", + desc: '''Set the top padding''', + swaccess: "rw", + hwaccess: "hro", + hwqe: "true", // enable `qe` latched signal of software write pulse: used to trigger the padding + resval: 0, + fields: [ + { bits: "5:0", name: "PAD", desc: "Top margin padding (2D)"} + ] + }, + { name: "PAD_BOTTOM", + desc: '''Set the bottom padding''', + swaccess: "rw", + hwaccess: "hro", + hwqe: "true", // enable `qe` latched signal of software write pulse: used to trigger the padding + resval: 0, + fields: [ + { bits: "5:0", name: "PAD", desc: "Bottom margin padding (2D)"} + ] + }, + { name: "PAD_RIGHT", + desc: '''Set the right padding''', + swaccess: "rw", + hwaccess: "hro", + hwqe: "true", // enable `qe` latched signal of software write pulse: used to trigger the padding + resval: 0, + fields: [ + { bits: "5:0", name: "PAD", desc: "Right margin padding (1D/2D)"} + ] + }, + { name: "PAD_LEFT", + desc: '''Set the left padding''', + swaccess: "rw", + hwaccess: "hro", + hwqe: "true", // enable `qe` latched signal of software write pulse: used to trigger the padding + resval: 0, + fields: [ + { bits: "5:0", name: "PAD", desc: "Left margin padding (1D/2D)"} + ] + }, { name: "WINDOW_SIZE", desc: '''Will trigger a every "WINDOW_SIZE" writes Set to 0 to disable.''', swaccess: "rw", hwaccess: "hro", fields: [ - { bits: "31:0", name: "WINDOW_SIZE", desc: ""} + { bits: "12:0", name: "WINDOW_SIZE", desc: ""} ] }, { name: "WINDOW_COUNT", @@ -136,7 +271,7 @@ hwaccess: "hrw", resval: 0, fields: [ - { bits: "31:0", name: "WINDOW_COUNT", desc: "Number of windows transferred in the transaction" } + { bits: "7:0", name: "WINDOW_COUNT", desc: "Number of windows transferred in the transaction" } ] }, { name: "INTERRUPT_EN", diff --git a/hw/ip/dma/rtl/dma.sv b/hw/ip/dma/rtl/dma.sv index c4f5e2eac..56b5b4ded 100644 --- a/hw/ip/dma/rtl/dma.sv +++ b/hw/ip/dma/rtl/dma.sv @@ -1,8 +1,10 @@ -// Copyright 2022 EPFL -// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +/* + * Copyright 2022 EPFL + * Solderpad Hardware License, Version 2.1, see LICENSE.md for details. + * SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + */ -// DMA assume a read request is not granted before previous request rvalid is asserted +/* DMA assume a read request is not granted before previous request rvalid is asserted */ module dma #( parameter int unsigned FIFO_DEPTH = 4, @@ -41,14 +43,14 @@ module dma #( dma_reg2hw_t reg2hw; dma_hw2reg_t hw2reg; + logic [ 31:0] src_ptr_reg; logic [ 31:0] read_ptr_reg; logic [ 31:0] addr_ptr_reg; logic [ 31:0] read_ptr_valid_reg; logic [ 31:0] write_ptr_reg; logic [ 31:0] write_address; - logic [ 31:0] dma_cnt; logic [ 31:0] dma_addr_cnt; - logic [ 2:0] dma_cnt_dec; + logic [ 2:0] dma_cnt_du; logic dma_start; logic dma_done; logic dma_window_event; @@ -86,6 +88,62 @@ module dma #( logic data_out_rvalid; logic [ 31:0] data_out_rdata; + /* Sign extension signals */ + logic sign_extend; + + /* 2D signals */ + + /* Dimensionality configuration */ + logic dma_conf_1d; // Dimensionality configuration: 0-> 1D, 1-> 2D + logic dma_conf_2d; // Dimensionality configuration: 0-> 1D, 1-> 2D + + /* Counters */ + logic [ 16:0] dma_src_cnt_d1; // d1 src counter + logic [ 16:0] dma_src_cnt_d2; // d2 src counter + logic [ 16:0] dma_dst_cnt_d1; // d2 dst counter + + /* Increments */ + logic [ 5:0] dma_src_d1_inc; // d1 source increment + logic [ 22:0] dma_src_d2_inc; // d2 source increment + logic [ 5:0] dma_dst_d1_inc; // d1 destination increment + logic [ 22:0] dma_dst_d2_inc; // d2 destination increment + + /* Flags */ + logic pad_fifo_on; // Padding flag for FIFO + logic pad_cnt_on; // Padding flag for counters + logic read_ptr_update_sel; // Select the read pointer update source + + /* Padding FSM conditions */ + logic idle_to_left_ex; + logic idle_to_top_ex; + logic idle_to_right_ex; + logic idle_to_bottom_ex; + logic top_ex_to_top_dn; + logic top_ex_to_left_ex; + logic top_dn_to_right_ex; + logic top_dn_to_bottom_ex; + logic top_dn_to_idle; + logic left_ex_to_left_dn; + logic left_dn_to_left_ex; + logic left_dn_to_right_ex; + logic left_dn_to_bottom_ex; + logic left_dn_to_idle; + logic right_ex_to_right_dn; + logic right_ex_to_left_ex; + logic right_dn_to_right_ex; + logic right_dn_to_idle; + logic right_ex_to_bottom_ex; + logic bottom_ex_to_idle; + + /* Padding synchronization signals */ + logic data_in_rvalid_virt; + logic data_in_rvalid_virt_n; + logic data_in_rvalid_virt_n_n; + logic data_in_gnt_virt; + logic data_in_gnt_virt_n; + logic data_in_gnt_virt_n_n; + + /* FIFO signals */ logic fifo_flush; logic fifo_full; logic fifo_empty; @@ -94,22 +152,30 @@ module dma #( logic fifo_addr_full; logic fifo_addr_empty, fifo_addr_empty_check; - logic wait_for_rx; - logic wait_for_tx; + logic wait_for_rx; + logic wait_for_tx; - logic [ 1:0] data_type; + typedef enum logic [1:0] { + DMA_DATA_TYPE_WORD, + DMA_DATA_TYPE_HALF_WORD, + DMA_DATA_TYPE_BYTE, + DMA_DATA_TYPE_BYTE_ + } dma_data_type_t; - logic [31:0] fifo_input; - logic [31:0] fifo_addr_input; - logic [31:0] fifo_output; - logic [31:0] fifo_addr_output; + dma_data_type_t dst_data_type; + dma_data_type_t src_data_type; - logic [ 3:0] byte_enable_out; + logic [31:0] fifo_input; + logic [31:0] fifo_addr_input; + logic [31:0] fifo_output; + logic [31:0] fifo_addr_output; - logic circular_mode; - logic address_mode; + logic [ 3:0] byte_enable_out; - logic dma_start_pending; + logic circular_mode; + logic address_mode; + + logic dma_start_pending; enum { DMA_READY, @@ -118,7 +184,20 @@ module dma #( } dma_state_q, dma_state_d; - logic [Addr_Fifo_Depth-1:0] outstanding_req, outstanding_addr_req; + /* Padding FSM states */ + + enum { + PAD_IDLE, + TOP_PAD_EXEC, + LEFT_PAD_EXEC, + RIGHT_PAD_EXEC, + BOTTOM_PAD_EXEC, + TOP_PAD_DONE, + LEFT_PAD_DONE, + RIGHT_PAD_DONE, + BOTTOM_PAD_DONE + } + pad_state_q, pad_state_d, pad_state_x; enum logic { DMA_READ_FSM_IDLE, @@ -132,14 +211,17 @@ module dma #( } dma_write_fsm_state, dma_write_fsm_n_state; - assign dma_read_ch0_req_o.req = data_in_req; + logic [Addr_Fifo_Depth-1:0] outstanding_req, outstanding_addr_req; + logic [31:0] window_counter; + + assign dma_read_ch0_req_o.req = data_in_req && ~pad_fifo_on; assign dma_read_ch0_req_o.we = data_in_we; assign dma_read_ch0_req_o.be = data_in_be; assign dma_read_ch0_req_o.addr = data_in_addr; assign dma_read_ch0_req_o.wdata = 32'h0; - assign data_in_gnt = dma_read_ch0_resp_i.gnt; - assign data_in_rvalid = dma_read_ch0_resp_i.rvalid; + assign data_in_gnt = dma_read_ch0_resp_i.gnt || (data_in_gnt_virt & pad_fifo_on); + assign data_in_rvalid = dma_read_ch0_resp_i.rvalid || (data_in_rvalid_virt & pad_fifo_on); assign data_in_rdata = dma_read_ch0_resp_i.rdata; assign dma_addr_ch0_req_o.req = data_addr_in_req; @@ -165,11 +247,8 @@ module dma #( assign dma_done_intr_o = dma_done & reg2hw.interrupt_en.transaction_done.q; assign dma_window_intr_o = dma_window_event & reg2hw.interrupt_en.window_done.q; - - logic [31:0] window_counter; - - - assign data_type = reg2hw.data_type.q; + assign dst_data_type = dma_data_type_t'(reg2hw.dst_data_type.q); + assign src_data_type = dma_data_type_t'(reg2hw.src_data_type.q); assign hw2reg.status.ready.d = (dma_state_q == DMA_READY); @@ -178,6 +257,84 @@ module dma #( assign circular_mode = reg2hw.mode.q == 1; assign address_mode = reg2hw.mode.q == 2; + /* DMA Dimensionality configuration flags */ + assign dma_conf_1d = reg2hw.dim_config.q == 0; + assign dma_conf_2d = reg2hw.dim_config.q == 1; + + /* DMA read pointer source selection */ + assign read_ptr_update_sel = reg2hw.dim_inv.q; + + /* DMA 2D increment */ + assign dma_src_d2_inc = reg2hw.src_ptr_inc_d2.q; + assign dma_src_d1_inc = reg2hw.src_ptr_inc_d1.q; + assign dma_dst_d2_inc = reg2hw.dst_ptr_inc_d2.q; + assign dma_dst_d1_inc = reg2hw.dst_ptr_inc_d1.q; + + /* Sign extend flag */ + + assign sign_extend = reg2hw.sign_ext.q & ( (src_data_type[1] & ~dst_data_type[1]) | ((src_data_type[1] == dst_data_type[1]) & (src_data_type[0] & ~dst_data_type[0]))); + + /* Padding FSM conditions assignments */ + + assign idle_to_top_ex = {|reg2hw.pad_top.q == 1'b1 && dma_start == 1'b1}; + assign idle_to_left_ex = { + |reg2hw.pad_top.q == 1'b0 && |reg2hw.pad_left.q == 1'b1 && dma_start == 1'b1 + }; + assign idle_to_right_ex = { + |reg2hw.pad_top.q == 1'b0 && |reg2hw.pad_left.q == 1'b0 && |reg2hw.pad_right.q == 1'b1 + && dma_src_cnt_d1 == ({11'h0, reg2hw.pad_right.q} + {14'h0, dma_cnt_du}) && dma_start == 1'b1 + }; + assign idle_to_bottom_ex = { + |reg2hw.pad_top.q == 1'b0 && |reg2hw.pad_left.q == 1'b0 && |reg2hw.pad_right.q == 1'b0 && |reg2hw.pad_bottom.q == 1'b1 + && dma_src_cnt_d2 == ({11'h0, reg2hw.pad_bottom.q} + {14'h0, dma_cnt_du}) && dma_src_cnt_d1 == ({14'h0, dma_cnt_du}) && dma_start == 1'b1 + }; + assign top_ex_to_top_dn = { + dma_src_cnt_d2 == ({1'h0, reg2hw.size_d2.q} + {11'h0, reg2hw.pad_bottom.q} + {14'h0, dma_cnt_du}) && dma_src_cnt_d1 == ({14'h0, dma_cnt_du}) && |reg2hw.pad_left.q == 1'b0 + }; + assign top_ex_to_left_ex = { + dma_src_cnt_d2 == ({1'h0, reg2hw.size_d2.q} + {11'h0, reg2hw.pad_bottom.q} + {14'h0, dma_cnt_du}) && dma_src_cnt_d1 == ({14'h0, dma_cnt_du}) && |reg2hw.pad_left.q == 1'b1 + }; + assign top_dn_to_right_ex = { + |reg2hw.pad_left.q == 1'b0 && |reg2hw.pad_right.q == 1'b1 && dma_src_cnt_d1 == ({11'h0, reg2hw.pad_right.q} + {14'h0, dma_cnt_du}) + }; + assign top_dn_to_bottom_ex = { + |reg2hw.pad_left.q == 1'b0 && |reg2hw.pad_right.q == 1'b0 && |reg2hw.pad_bottom.q == 1'b1 && dma_src_cnt_d2 == ({11'h0, reg2hw.pad_bottom.q} + {14'h0, dma_cnt_du}) && dma_src_cnt_d1 == ({14'h0, dma_cnt_du}) + }; + assign top_dn_to_idle = { + |reg2hw.pad_left.q == 1'b0 && |reg2hw.pad_right.q == 1'b0 && |reg2hw.pad_bottom.q == 1'b0 && |dma_src_cnt_d2 == 1'b0 + }; + assign left_ex_to_left_dn = { + dma_src_cnt_d1 == ({1'h0, reg2hw.size_d1.q} + {11'h0, reg2hw.pad_right.q} + {14'h0, dma_cnt_du}) + }; + assign left_dn_to_left_ex = { + dma_src_cnt_d1 == ({14'h0, dma_cnt_du}) && dma_src_cnt_d2 != ({14'h0, dma_cnt_du} + {11'h0, reg2hw.pad_bottom.q}) && |reg2hw.pad_right.q == 1'b0 + }; + assign left_dn_to_right_ex = { + |reg2hw.pad_right.q == 1'b1 && dma_src_cnt_d1 == ({11'h0, reg2hw.pad_right.q} + {14'h0, dma_cnt_du}) + }; + assign left_dn_to_bottom_ex = { + |reg2hw.pad_right.q == 1'b0 && |reg2hw.pad_bottom.q == 1'b1 && dma_src_cnt_d2 == ({11'h0, reg2hw.pad_bottom.q} + {14'h0, dma_cnt_du}) && dma_src_cnt_d1 == ({14'h0, dma_cnt_du}) + }; + assign left_dn_to_idle = { + |reg2hw.pad_right.q == 1'b0 && |reg2hw.pad_bottom.q == 1'b0 && |dma_src_cnt_d2 == 1'b0 + }; + assign right_ex_to_right_dn = { + dma_src_cnt_d1 == ({14'h0, dma_cnt_du}) && dma_src_cnt_d2 != ({11'h0, reg2hw.pad_bottom.q} + {14'h0, dma_cnt_du}) && |reg2hw.pad_left.q == 1'b0 + }; + assign right_ex_to_left_ex = { + dma_src_cnt_d1 == ({14'h0, dma_cnt_du}) && dma_src_cnt_d2 != ({11'h0, reg2hw.pad_bottom.q} + {14'h0, dma_cnt_du}) && |reg2hw.pad_left.q == 1'b1 + }; + assign right_ex_to_bottom_ex = { + |reg2hw.pad_bottom.q == 1'b1 && dma_src_cnt_d2 == ({11'h0, reg2hw.pad_bottom.q} + {14'h0, dma_cnt_du}) && dma_src_cnt_d1 == ({14'h0, dma_cnt_du}) + }; + assign right_dn_to_right_ex = { + dma_src_cnt_d1 == ({11'h0, reg2hw.pad_right.q} + {14'h0, dma_cnt_du}) && |reg2hw.pad_left.q == 1'b0 + }; + assign right_dn_to_idle = {|reg2hw.pad_bottom.q == 1'b0 && |dma_src_cnt_d2 == 1'b0}; + assign bottom_ex_to_idle = { + dma_src_cnt_d1 == {14'h0, dma_cnt_du} && dma_src_cnt_d2 == {14'h0, dma_cnt_du} + }; + assign write_address = address_mode ? fifo_addr_output : write_ptr_reg; assign wait_for_rx = |(reg2hw.slot.rx_trigger_slot.q[SLOT_NUM-1:0] & (~trigger_slot_i)); @@ -198,6 +355,7 @@ module dma #( // RUNNING : waiting for transaction finish // when `dma_done` rises either enter ready or restart in circular mode // + always_comb begin dma_state_d = dma_state_q; case (dma_state_q) @@ -218,7 +376,7 @@ module dma #( endcase end - // update state + /* Update DMA state */ always_ff @(posedge clk_i, negedge rst_ni) begin if (~rst_ni) begin dma_state_q <= DMA_READY; @@ -227,21 +385,20 @@ module dma #( end end - - // DMA pulse start when dma_start register is written + /* DMA pulse start when dma_start register is written */ always_ff @(posedge clk_i or negedge rst_ni) begin : proc_dma_start if (~rst_ni) begin dma_start_pending <= 1'b0; end else begin if (dma_start == 1'b1) begin dma_start_pending <= 1'b0; - end else if (reg2hw.size.qe & |reg2hw.size.q) begin + end else if ((reg2hw.size_d1.qe & |reg2hw.size_d1.q)) begin dma_start_pending <= 1'b1; end end end - // Store input data pointer and increment everytime read request is granted + /*/ Store input data pointer and increment everytime read request is granted */ always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ptr_in_reg if (~rst_ni) begin read_ptr_reg <= '0; @@ -249,7 +406,44 @@ module dma #( if (dma_start == 1'b1) begin read_ptr_reg <= reg2hw.src_ptr.q; end else if (data_in_gnt == 1'b1) begin - read_ptr_reg <= read_ptr_reg + {24'h0, reg2hw.ptr_inc.src_ptr_inc.q}; + if (dma_conf_1d == 1'b1) begin + /* Increase the pointer by the amount written in ptr_inc */ + read_ptr_reg <= read_ptr_reg + {26'h0, dma_src_d1_inc}; + end else if (dma_conf_2d == 1'b1 && pad_cnt_on == 1'b0) begin + if (read_ptr_update_sel == 1'b0) begin + if (dma_src_cnt_d1 == {14'h0, dma_cnt_du} && |dma_src_cnt_d2 == 1'b1) begin + /* In this case, the d1 is almost finished, so we need to increment the pointer by sizeof(d1)*data_unit */ + read_ptr_reg <= read_ptr_reg + {9'h0, dma_src_d2_inc}; + end else begin + read_ptr_reg <= read_ptr_reg + {26'h0, dma_src_d1_inc}; /* Increment of the d1 increment (stride) */ + end + end else begin + if (dma_src_cnt_d1 == {14'h0, dma_cnt_du} && |dma_src_cnt_d2 == 1'b1) begin + /* In this case, the d1 is almost finished, so we need to increment the pointer by sizeof(d2)*data_unit */ + read_ptr_reg <= src_ptr_reg; + end else begin + read_ptr_reg <= read_ptr_reg + {9'h0, dma_src_d2_inc}; /* Increment of the d1 increment (stride) */ + end + end + end + end + end + end + + /* + * Store input data pointer in source_ptr_reg and increment it every time read request is granted, + * if the d1 has finished reading and the read pointer update is set to 1'b1 + */ + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_src_ptr_reg + if (~rst_ni) begin + src_ptr_reg <= '0; + end else begin + if (dma_start == 1'b1) begin + src_ptr_reg <= reg2hw.src_ptr.q + {26'h0, dma_src_d1_inc}; + end else if (data_in_gnt == 1'b1 && dma_conf_2d == 1'b1 && pad_cnt_on == 1'b0 && read_ptr_update_sel == 1'b1 && + (dma_src_cnt_d1 == {14'h0, dma_cnt_du} && |dma_src_cnt_d2 == 1'b1)) begin + src_ptr_reg <= src_ptr_reg + {26'h0, dma_src_d1_inc}; end end end @@ -267,7 +461,9 @@ module dma #( end end - // Only update read_ptr_valid_reg when the data is stored in the fifo + // Only update read_ptr_valid_reg when the data is stored in the fifo. + // Since every input grant is followed by a rvalid, the read_ptr_valid_reg is a mere sample of the read_ptr_reg + // synched with the rvalid signal. always_ff @(posedge clk_i or negedge rst_ni) begin : proc_ptr_valid_in_reg if (~rst_ni) begin read_ptr_valid_reg <= '0; @@ -275,7 +471,7 @@ module dma #( if (dma_start == 1'b1) begin read_ptr_valid_reg <= reg2hw.src_ptr.q; end else if (data_in_rvalid == 1'b1) begin - read_ptr_valid_reg <= read_ptr_valid_reg + {24'h0, reg2hw.ptr_inc.src_ptr_inc.q}; + read_ptr_valid_reg <= read_ptr_reg; end end end @@ -288,20 +484,74 @@ module dma #( if (dma_start == 1'b1) begin write_ptr_reg <= reg2hw.dst_ptr.q; end else if (data_out_gnt == 1'b1) begin - write_ptr_reg <= write_ptr_reg + {24'h0, reg2hw.ptr_inc.dst_ptr_inc.q}; + if (dma_conf_1d == 1'b1) begin + write_ptr_reg <= write_ptr_reg + {26'h0, dma_dst_d1_inc}; + end else if (dma_conf_2d == 1'b1) begin + if (dma_dst_cnt_d1 == {14'h0, dma_cnt_du}) begin + // In this case, the d1 is finished, so we need to increment the pointer by sizeof(d1)*data_unit*strides + write_ptr_reg <= write_ptr_reg + {9'h0, dma_dst_d2_inc}; + end else begin + write_ptr_reg <= write_ptr_reg + {26'h0, dma_dst_d1_inc}; // Increment just of one du, since we need to increase the 1d + end + end end end end - // Store dma transfer size and decrement it everytime input data rvalid is asserted - always_ff @(posedge clk_i or negedge rst_ni) begin : proc_dma_cnt_reg + // Store dma transfer size and decrement it everytime input data rvalid is asserted. + // Perform additional checks for 2D DMA + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_dma_src_cnt_reg if (~rst_ni) begin - dma_cnt <= '0; + dma_src_cnt_d1 <= '0; + dma_src_cnt_d2 <= '0; end else begin if (dma_start == 1'b1) begin - dma_cnt <= reg2hw.size.q; + dma_src_cnt_d1 <= {1'h0, reg2hw.size_d1.q} + {11'h0, reg2hw.pad_left.q} + {11'h0, reg2hw.pad_right.q}; + dma_src_cnt_d2 <= {1'h0, reg2hw.size_d2.q} + {11'h0, reg2hw.pad_top.q} + {11'h0, reg2hw.pad_bottom.q}; end else if (data_in_gnt == 1'b1) begin - dma_cnt <= dma_cnt - {29'h0, dma_cnt_dec}; + if (dma_conf_1d == 1'b1) begin + // 1D case + dma_src_cnt_d1 <= dma_src_cnt_d1 - {14'h0, dma_cnt_du}; + end else if (dma_conf_2d == 1'b1) begin + // 2D case + if (dma_src_cnt_d1 == {14'h0, dma_cnt_du}) begin + // In this case, the d1 is finished, so we need to decrement the d2 size and reset the d2 size + dma_src_cnt_d2 <= dma_src_cnt_d2 - {14'h0, dma_cnt_du}; + dma_src_cnt_d1 <= {1'h0, reg2hw.size_d1.q} + {11'h0, reg2hw.pad_left.q} + {11'h0, reg2hw.pad_right.q}; + end else begin + // In this case, the d1 isn't finished, so we need to decrement the d1 size + dma_src_cnt_d1 <= dma_src_cnt_d1 - {14'h0, dma_cnt_du}; + end + end + end + end + end + + // Store dma transfer size and decrement it everytime input data write request is granted. + // The need for two separate counters for reading and writing operations is due to the lack of synchronization between them. + // Since the check on the read side is done on the rvalid signal, we need only an additional counter, for d1. + // Performs additional checks for 2D DMA. + + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_dma_dst_cnt_reg + if (~rst_ni) begin + dma_dst_cnt_d1 <= '0; + end else begin + if (dma_start == 1'b1) begin + dma_dst_cnt_d1 <= {1'h0, reg2hw.size_d1.q} + {11'h0, reg2hw.pad_left.q} + {11'h0, reg2hw.pad_right.q}; + end else if (data_out_gnt == 1'b1) begin + if (dma_conf_1d == 1'b1) begin + // 1D case + dma_dst_cnt_d1 <= dma_dst_cnt_d1 - {14'h0, dma_cnt_du}; + end else if (dma_conf_2d == 1'b1) begin + // 2D case + if (dma_dst_cnt_d1 == {14'h0, dma_cnt_du}) begin + // In this case, the d1 is finished, so we need to reset the d2 size + dma_dst_cnt_d1 <= {1'h0, reg2hw.size_d1.q} + {11'h0, reg2hw.pad_left.q} + {11'h0, reg2hw.pad_right.q}; + end else begin + // In this case, the d1 isn't finished, so we need to decrement the d1 size + dma_dst_cnt_d1 <= dma_dst_cnt_d1 - {14'h0, dma_cnt_du}; + end + end end end end @@ -312,7 +562,7 @@ module dma #( dma_addr_cnt <= '0; end else begin if (dma_start == 1'b1 && address_mode) begin - dma_addr_cnt <= reg2hw.size.q; + dma_addr_cnt <= {16'h0, reg2hw.size_d1.q}; end else if (data_addr_in_gnt == 1'b1 && address_mode) begin dma_addr_cnt <= dma_addr_cnt - 32'h4; //address always 32b end @@ -320,18 +570,18 @@ module dma #( end always_comb begin - case (data_type) - 2'b00: dma_cnt_dec = 3'h4; - 2'b01: dma_cnt_dec = 3'h2; - 2'b10, 2'b11: dma_cnt_dec = 3'h1; + case (dst_data_type) + DMA_DATA_TYPE_WORD: dma_cnt_du = 3'h4; + DMA_DATA_TYPE_HALF_WORD: dma_cnt_du = 3'h2; + DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_BYTE_: dma_cnt_du = 3'h1; endcase end always_comb begin : proc_byte_enable_out - case (data_type) // Data type 00 Word, 01 Half word, 11,10 byte - 2'b00: byte_enable_out = 4'b1111; // Writing a word (32 bits) + case (dst_data_type) // Data type 00 Word, 01 Half word, 11,10 byte + DMA_DATA_TYPE_WORD: byte_enable_out = 4'b1111; // Writing a word (32 bits) - 2'b01: begin // Writing a half-word (16 bits) + DMA_DATA_TYPE_HALF_WORD: begin // Writing a half-word (16 bits) case (write_address[1]) 1'b0: byte_enable_out = 4'b0011; 1'b1: byte_enable_out = 4'b1100; @@ -339,7 +589,7 @@ module dma #( ; // case(write_address[1:0]) end - 2'b10, 2'b11: begin // Writing a byte (8 bits) + DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_BYTE_: begin // Writing a byte (8 bits) case (write_address[1:0]) 2'b00: byte_enable_out = 4'b0001; 2'b01: byte_enable_out = 4'b0010; @@ -349,7 +599,7 @@ module dma #( ; // case(write_address[1:0]) end endcase - ; // case (data_type) + ; // case (dst_data_type) end // Output data shift @@ -361,41 +611,118 @@ module dma #( data_out_wdata[31:24] = fifo_output[31:24]; case (write_address[1:0]) - 2'b00: ; - - 2'b01: data_out_wdata[15:8] = fifo_output[7:0]; - - 2'b10: begin + 2'b00: begin + if (sign_extend) begin + case ({ + src_data_type, dst_data_type + }) + {DMA_DATA_TYPE_WORD, DMA_DATA_TYPE_WORD} : ; + { + DMA_DATA_TYPE_HALF_WORD, DMA_DATA_TYPE_WORD + } : + data_out_wdata[31:16] = {16{fifo_output[15]}}; + { + DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_WORD + }, { + DMA_DATA_TYPE_BYTE_, DMA_DATA_TYPE_WORD + } : + data_out_wdata[31:8] = {24{fifo_output[7]}}; + {DMA_DATA_TYPE_HALF_WORD, DMA_DATA_TYPE_HALF_WORD} : ; + { + DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_HALF_WORD + }, { + DMA_DATA_TYPE_BYTE_, DMA_DATA_TYPE_HALF_WORD + } : + data_out_wdata[15:8] = {8{fifo_output[7]}}; + default: ; + endcase + end else begin + case ({ + src_data_type, dst_data_type + }) + {DMA_DATA_TYPE_WORD, DMA_DATA_TYPE_WORD} : ; + {DMA_DATA_TYPE_HALF_WORD, DMA_DATA_TYPE_WORD} : data_out_wdata[31:16] = 16'b0; + { + DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_WORD + }, { + DMA_DATA_TYPE_BYTE_, DMA_DATA_TYPE_WORD + } : + data_out_wdata[31:8] = 24'b0; + {DMA_DATA_TYPE_HALF_WORD, DMA_DATA_TYPE_HALF_WORD} : ; + { + DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_HALF_WORD + }, { + DMA_DATA_TYPE_BYTE_, DMA_DATA_TYPE_HALF_WORD + } : + data_out_wdata[15:8] = 8'b0; + default: ; + endcase + end + end + 2'b01: data_out_wdata[15:8] = fifo_output[7:0]; // Writing a byte, no need for sign extension + 2'b10: begin // Writing a half-word or a byte data_out_wdata[23:16] = fifo_output[7:0]; data_out_wdata[31:24] = fifo_output[15:8]; - end - 2'b11: data_out_wdata[31:24] = fifo_output[7:0]; + if (sign_extend) begin + case ({ + src_data_type, dst_data_type + }) + {DMA_DATA_TYPE_HALF_WORD, DMA_DATA_TYPE_HALF_WORD} : ; + { + DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_HALF_WORD + }, { + DMA_DATA_TYPE_BYTE_, DMA_DATA_TYPE_HALF_WORD + } : + data_out_wdata[31:24] = {8{fifo_output[7]}}; + default: ; + endcase + end else begin + case ({ + src_data_type, dst_data_type + }) + {DMA_DATA_TYPE_HALF_WORD, DMA_DATA_TYPE_HALF_WORD} : ; + { + DMA_DATA_TYPE_BYTE, DMA_DATA_TYPE_HALF_WORD + }, { + DMA_DATA_TYPE_BYTE_, DMA_DATA_TYPE_HALF_WORD + } : + data_out_wdata[31:24] = 8'b0; + default: ; + endcase + end + end + 2'b11: + data_out_wdata[31:24] = fifo_output[7:0]; // Writing a byte, no need for sign extension endcase end + assign fifo_addr_input = data_addr_in_rdata; //never misaligned, always 32b // Input data shift: shift the input data to be on the LSB of the fifo always_comb begin : proc_input_data - fifo_input[7:0] = data_in_rdata[7:0]; - fifo_input[15:8] = data_in_rdata[15:8]; - fifo_input[23:16] = data_in_rdata[23:16]; - fifo_input[31:24] = data_in_rdata[31:24]; - - case (read_ptr_valid_reg[1:0]) - 2'b00: ; - - 2'b01: fifo_input[7:0] = data_in_rdata[15:8]; - - 2'b10: begin - fifo_input[7:0] = data_in_rdata[23:16]; - fifo_input[15:8] = data_in_rdata[31:24]; - end + if (pad_fifo_on) begin + fifo_input = 32'h0; + end else begin + fifo_input[7:0] = data_in_rdata[7:0]; + fifo_input[15:8] = data_in_rdata[15:8]; + fifo_input[23:16] = data_in_rdata[23:16]; + fifo_input[31:24] = data_in_rdata[31:24]; + + case (read_ptr_valid_reg[1:0]) + 2'b00: ; + 2'b01: fifo_input[7:0] = data_in_rdata[15:8]; + + 2'b10: begin + fifo_input[7:0] = data_in_rdata[23:16]; + fifo_input[15:8] = data_in_rdata[31:24]; + end - 2'b11: fifo_input[7:0] = data_in_rdata[31:24]; - endcase + 2'b11: fifo_input[7:0] = data_in_rdata[31:24]; + endcase + end end // FSM state update @@ -410,6 +737,7 @@ module dma #( dma_read_fsm_state <= dma_read_fsm_n_state; dma_write_fsm_state <= dma_write_fsm_n_state; dma_read_addr_fsm_state <= dma_read_addr_fsm_n_state; + outstanding_req <= outstanding_req + (data_in_req && data_in_gnt) - data_in_rvalid; if (address_mode) @@ -418,6 +746,192 @@ module dma #( end end + /* Padding synchronization signal generation + * When the pad_fifo_on is asserted, this logic mimics the behaviour of the data_in_rvalid and data_in_gnt signals + * coming from the memory. This is done in order to keep the read/write operations working even without an + * actual response from memory, reducing power consumptionnby avoiding unnecessary memory accesses. + */ + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_pad_sync_signal + if (~rst_ni) begin + data_in_rvalid_virt <= 1'b0; + data_in_rvalid_virt_n <= 1'b1; + data_in_rvalid_virt_n_n <= 1'b0; + data_in_gnt_virt <= 1'b1; + data_in_gnt_virt_n <= 1'b0; + data_in_gnt_virt_n_n <= 1'b0; + end else begin + if (data_in_req == 1'b1 && pad_fifo_on == 1'b1) begin + data_in_rvalid_virt <= data_in_rvalid_virt_n; + data_in_rvalid_virt_n <= data_in_rvalid_virt_n_n; + data_in_rvalid_virt_n_n <= data_in_rvalid; + data_in_gnt_virt <= data_in_gnt_virt_n; + data_in_gnt_virt_n <= data_in_gnt_virt_n_n; + data_in_gnt_virt_n_n <= data_in_gnt; + end else begin + data_in_rvalid_virt <= 1'b0; + data_in_rvalid_virt_n <= 1'b1; + data_in_rvalid_virt_n_n <= 1'b0; + data_in_gnt_virt <= 1'b1; + data_in_gnt_virt_n <= 1'b0; + data_in_gnt_virt_n_n <= 1'b0; + end + end + end + + // Padding FSM state update + always_ff @(posedge clk_i or negedge rst_ni) begin : proc_pad_state + if (~rst_ni) begin + pad_state_q <= PAD_IDLE; + pad_state_x <= PAD_IDLE; + end else if (dma_conf_2d == 1'b1) begin + if (dma_start == 1'b1 && |reg2hw.pad_top.q == 1'b1) begin + pad_state_q <= TOP_PAD_EXEC; + pad_state_x <= TOP_PAD_EXEC; + end else if (dma_start == 1'b1 && |reg2hw.pad_left.q == 1'b1) begin + pad_state_q <= LEFT_PAD_EXEC; + pad_state_x <= LEFT_PAD_EXEC; + end else begin + pad_state_x <= pad_state_d; + if (data_in_rvalid == 1'b1) begin + pad_state_q <= pad_state_x; + end + end + end + end + + // Pad fifo flag logic + always_comb begin : proc_pad_fifo_on + if (dma_conf_2d == 1'b1) begin + case (pad_state_q) + TOP_PAD_EXEC, LEFT_PAD_EXEC, RIGHT_PAD_EXEC, BOTTOM_PAD_EXEC: pad_fifo_on = 1'b1; + + default: pad_fifo_on = 1'b0; + endcase + end + end + + // Pad counter flag logic + always_comb begin : proc_pad_cnt_on + case (pad_state_q) + TOP_PAD_DONE: begin + if (top_dn_to_right_ex) begin + pad_cnt_on = 1'b1; + end else begin + pad_cnt_on = pad_fifo_on; + end + end + + LEFT_PAD_DONE: begin + if (left_dn_to_right_ex) begin + pad_cnt_on = 1'b1; + end else begin + pad_cnt_on = pad_fifo_on; + end + end + + RIGHT_PAD_DONE: begin + if (right_dn_to_right_ex) begin + pad_cnt_on = 1'b1; + end else begin + pad_cnt_on = pad_fifo_on; + end + end + + RIGHT_PAD_EXEC: begin + if (right_ex_to_right_dn || right_ex_to_left_ex) begin + pad_cnt_on = 1'b0; + end else begin + pad_cnt_on = pad_fifo_on; + end + end + + default: pad_cnt_on = pad_fifo_on; + endcase + end + + // Padding FSM logic + always_comb begin : proc_pad_fsm_logic + if (dma_conf_1d == 1'b1) begin + pad_state_d = PAD_IDLE; + end else begin + if (dma_start == 1'b1 && |reg2hw.pad_top.q == 1'b1) begin + pad_state_d = TOP_PAD_EXEC; + end else if (dma_start == 1'b1 && |reg2hw.pad_left.q == 1'b1) begin + pad_state_d = LEFT_PAD_EXEC; + end else begin + pad_state_d = pad_state_x; + end + end + + unique case (pad_state_x) + PAD_IDLE: begin + if (idle_to_top_ex) begin + pad_state_d = TOP_PAD_EXEC; + end else if (idle_to_left_ex) begin + pad_state_d = LEFT_PAD_EXEC; + end else if (idle_to_right_ex) begin + pad_state_d = RIGHT_PAD_EXEC; + end else if (idle_to_bottom_ex) begin + pad_state_d = BOTTOM_PAD_EXEC; + end + end + + TOP_PAD_EXEC: begin + if (top_ex_to_left_ex) begin + pad_state_d = LEFT_PAD_EXEC; + end else if (top_ex_to_top_dn) begin + pad_state_d = TOP_PAD_DONE; + end + end + TOP_PAD_DONE: begin + if (top_dn_to_right_ex) begin + pad_state_d = RIGHT_PAD_EXEC; + end else if (top_dn_to_bottom_ex) begin + pad_state_d = BOTTOM_PAD_EXEC; + end else if (top_dn_to_idle) begin + pad_state_d = PAD_IDLE; + end + end + LEFT_PAD_EXEC: begin + if (left_ex_to_left_dn) begin + pad_state_d = LEFT_PAD_DONE; + end + end + LEFT_PAD_DONE: begin + if (left_dn_to_right_ex) begin + pad_state_d = RIGHT_PAD_EXEC; + end else if (left_dn_to_bottom_ex) begin + pad_state_d = BOTTOM_PAD_EXEC; + end else if (left_dn_to_left_ex) begin + pad_state_d = LEFT_PAD_EXEC; + end else if (left_dn_to_idle) begin + pad_state_d = PAD_IDLE; + end + end + RIGHT_PAD_EXEC: begin + if (right_ex_to_right_dn) begin + pad_state_d = RIGHT_PAD_DONE; + end else if (right_ex_to_left_ex) begin + pad_state_d = LEFT_PAD_EXEC; + end else if (right_ex_to_bottom_ex) begin + pad_state_d = BOTTOM_PAD_EXEC; + end + end + RIGHT_PAD_DONE: begin + if (right_dn_to_idle) begin + pad_state_d = PAD_IDLE; + end else if (right_dn_to_right_ex) begin + pad_state_d = RIGHT_PAD_EXEC; + end + end + BOTTOM_PAD_EXEC: begin + if (bottom_ex_to_idle) begin + pad_state_d = PAD_IDLE; + end + end + endcase + end + // Read master FSM always_comb begin : proc_dma_read_fsm_logic @@ -444,16 +958,34 @@ module dma #( // Read one word DMA_READ_FSM_ON: begin // If all input data read exit - if (|dma_cnt == 1'b0) begin - dma_read_fsm_n_state = DMA_READ_FSM_IDLE; - end else begin - dma_read_fsm_n_state = DMA_READ_FSM_ON; - // Wait if fifo is full, almost full (last data), or if the SPI RX does not have valid data (only in SPI mode 1). - if (fifo_full == 1'b0 && fifo_alm_full == 1'b0 && wait_for_rx == 1'b0) begin - data_in_req = 1'b1; - data_in_we = 1'b0; - data_in_be = 4'b1111; // always read all bytes - data_in_addr = read_ptr_reg; + if (dma_conf_1d == 1'b1) begin + // 1D DMA case + if (|dma_src_cnt_d1 == 1'b0) begin + dma_read_fsm_n_state = DMA_READ_FSM_IDLE; + end else begin + dma_read_fsm_n_state = DMA_READ_FSM_ON; + // Wait if fifo is full, almost full (last data), or if the SPI RX does not have valid data (only in SPI mode 1). + if (fifo_full == 1'b0 && fifo_alm_full == 1'b0 && wait_for_rx == 1'b0) begin + data_in_req = 1'b1; + data_in_we = 1'b0; + data_in_be = 4'b1111; // always read all bytes + data_in_addr = read_ptr_reg; + end + end + end else if (dma_conf_2d == 1'b1) begin + // 2D DMA case: exit only if both 1d and 2d counters are at 0 + if (dma_src_cnt_d1 == {1'h0, reg2hw.size_d1.q} + {11'h0, reg2hw.pad_left.q} + {11'h0, reg2hw.pad_right.q} && |dma_src_cnt_d2 == 1'b0) begin + dma_read_fsm_n_state = DMA_READ_FSM_IDLE; + end else begin + // The read operation is the same in both cases + dma_read_fsm_n_state = DMA_READ_FSM_ON; + // Wait if fifo is full, almost full (last data), or if the SPI RX does not have valid data (only in SPI mode 1). + if (fifo_full == 1'b0 && fifo_alm_full == 1'b0 && wait_for_rx == 1'b0) begin + data_in_req = 1'b1; + data_in_we = 1'b0; + data_in_be = 4'b1111; // always read all bytes + data_in_addr = read_ptr_reg; + end end end end @@ -528,6 +1060,8 @@ module dma #( // If all input data read exit if (fifo_empty == 1'b1 && dma_read_fsm_state == DMA_READ_FSM_IDLE) begin dma_done = outstanding_req == '0 && outstanding_addr_req == '0; + // If all input data has been read (dma_read_fsm_state == DMA_READ_FSM_IDLE, set when all data has been read) + // and all requests have been granted, (outstanding_req == 0) then we are done dma_write_fsm_n_state = dma_done ? DMA_WRITE_FSM_IDLE : DMA_WRITE_FSM_ON; end else begin dma_write_fsm_n_state = DMA_WRITE_FSM_ON; @@ -596,9 +1130,9 @@ module dma #( // WINDOW EVENT // Count gnt write transaction and generate event pulse if WINDOW_SIZE is reached - assign dma_window_event = |reg2hw.window_size.q & data_out_gnt & (window_counter + 'h1 >= reg2hw.window_size.q); + assign dma_window_event = |reg2hw.window_size.q & data_out_gnt & (window_counter + 'h1 >= {19'h0, reg2hw.window_size.q}); - always_ff @(posedge clk_i, negedge rst_ni) begin + always_ff @(posedge clk_i, negedge rst_ni) begin : proc_dma_window_cnt if (~rst_ni) begin window_counter <= 'h0; end else begin @@ -606,7 +1140,7 @@ module dma #( if (dma_start | dma_done) begin window_counter <= 'h0; end else if (data_out_gnt) begin - if (window_counter + 'h1 >= reg2hw.window_size.q) begin + if (window_counter + 'h1 >= {19'h0, reg2hw.window_size.q}) begin window_counter <= 'h0; end else begin window_counter <= window_counter + 'h1; @@ -617,7 +1151,7 @@ module dma #( end // Update WINDOW_COUNT register - always_comb begin + always_comb begin : proc_dma_window_cnt_reg hw2reg.window_count.d = reg2hw.window_count.q + 'h1; hw2reg.window_count.de = 1'b0; if (dma_start) begin @@ -631,7 +1165,7 @@ module dma #( // update window_done flag // set on dma_window_event // reset on read - always_ff @(posedge clk_i, negedge rst_ni) begin + always_ff @(posedge clk_i, negedge rst_ni) begin : proc_dma_window_done if (~rst_ni) begin window_done_q <= 1'b0; end else begin diff --git a/hw/ip/dma/rtl/dma_reg_pkg.sv b/hw/ip/dma/rtl/dma_reg_pkg.sv index e4e31fe25..2f57afa3e 100644 --- a/hw/ip/dma/rtl/dma_reg_pkg.sv +++ b/hw/ip/dma/rtl/dma_reg_pkg.sv @@ -7,7 +7,7 @@ package dma_reg_pkg; // Address widths within the block - parameter int BlockAw = 6; + parameter int BlockAw = 7; //////////////////////////// // Typedefs for registers // @@ -20,9 +20,14 @@ package dma_reg_pkg; typedef struct packed {logic [31:0] q;} dma_reg2hw_addr_ptr_reg_t; typedef struct packed { - logic [31:0] q; + logic [15:0] q; logic qe; - } dma_reg2hw_size_reg_t; + } dma_reg2hw_size_d1_reg_t; + + typedef struct packed { + logic [15:0] q; + logic qe; + } dma_reg2hw_size_d2_reg_t; typedef struct packed { struct packed { @@ -35,23 +40,54 @@ package dma_reg_pkg; } window_done; } dma_reg2hw_status_reg_t; - typedef struct packed { - struct packed {logic [7:0] q;} src_ptr_inc; - struct packed {logic [7:0] q;} dst_ptr_inc; - } dma_reg2hw_ptr_inc_reg_t; + typedef struct packed {logic [5:0] q;} dma_reg2hw_src_ptr_inc_d1_reg_t; + + typedef struct packed {logic [22:0] q;} dma_reg2hw_src_ptr_inc_d2_reg_t; + + typedef struct packed {logic [5:0] q;} dma_reg2hw_dst_ptr_inc_d1_reg_t; + + typedef struct packed {logic [22:0] q;} dma_reg2hw_dst_ptr_inc_d2_reg_t; typedef struct packed { struct packed {logic [15:0] q;} rx_trigger_slot; struct packed {logic [15:0] q;} tx_trigger_slot; } dma_reg2hw_slot_reg_t; - typedef struct packed {logic [1:0] q;} dma_reg2hw_data_type_reg_t; + typedef struct packed {logic [1:0] q;} dma_reg2hw_src_data_type_reg_t; + + typedef struct packed {logic [1:0] q;} dma_reg2hw_dst_data_type_reg_t; + + typedef struct packed {logic q;} dma_reg2hw_sign_ext_reg_t; typedef struct packed {logic [1:0] q;} dma_reg2hw_mode_reg_t; - typedef struct packed {logic [31:0] q;} dma_reg2hw_window_size_reg_t; + typedef struct packed {logic q;} dma_reg2hw_dim_config_reg_t; + + typedef struct packed {logic q;} dma_reg2hw_dim_inv_reg_t; + + typedef struct packed { + logic [5:0] q; + logic qe; + } dma_reg2hw_pad_top_reg_t; + + typedef struct packed { + logic [5:0] q; + logic qe; + } dma_reg2hw_pad_bottom_reg_t; + + typedef struct packed { + logic [5:0] q; + logic qe; + } dma_reg2hw_pad_right_reg_t; + + typedef struct packed { + logic [5:0] q; + logic qe; + } dma_reg2hw_pad_left_reg_t; + + typedef struct packed {logic [12:0] q;} dma_reg2hw_window_size_reg_t; - typedef struct packed {logic [31:0] q;} dma_reg2hw_window_count_reg_t; + typedef struct packed {logic [7:0] q;} dma_reg2hw_window_count_reg_t; typedef struct packed { struct packed {logic q;} transaction_done; @@ -64,45 +100,69 @@ package dma_reg_pkg; } dma_hw2reg_status_reg_t; typedef struct packed { - logic [31:0] d; - logic de; + logic [7:0] d; + logic de; } dma_hw2reg_window_count_reg_t; // Register -> HW type typedef struct packed { - dma_reg2hw_src_ptr_reg_t src_ptr; // [250:219] - dma_reg2hw_dst_ptr_reg_t dst_ptr; // [218:187] - dma_reg2hw_addr_ptr_reg_t addr_ptr; // [186:155] - dma_reg2hw_size_reg_t size; // [154:122] - dma_reg2hw_status_reg_t status; // [121:118] - dma_reg2hw_ptr_inc_reg_t ptr_inc; // [117:102] - dma_reg2hw_slot_reg_t slot; // [101:70] - dma_reg2hw_data_type_reg_t data_type; // [69:68] - dma_reg2hw_mode_reg_t mode; // [67:66] - dma_reg2hw_window_size_reg_t window_size; // [65:34] - dma_reg2hw_window_count_reg_t window_count; // [33:2] + dma_reg2hw_src_ptr_reg_t src_ptr; // [283:252] + dma_reg2hw_dst_ptr_reg_t dst_ptr; // [251:220] + dma_reg2hw_addr_ptr_reg_t addr_ptr; // [219:188] + dma_reg2hw_size_d1_reg_t size_d1; // [187:171] + dma_reg2hw_size_d2_reg_t size_d2; // [170:154] + dma_reg2hw_status_reg_t status; // [153:150] + dma_reg2hw_src_ptr_inc_d1_reg_t src_ptr_inc_d1; // [149:144] + dma_reg2hw_src_ptr_inc_d2_reg_t src_ptr_inc_d2; // [143:121] + dma_reg2hw_dst_ptr_inc_d1_reg_t dst_ptr_inc_d1; // [120:115] + dma_reg2hw_dst_ptr_inc_d2_reg_t dst_ptr_inc_d2; // [114:92] + dma_reg2hw_slot_reg_t slot; // [91:60] + dma_reg2hw_src_data_type_reg_t src_data_type; // [59:58] + dma_reg2hw_dst_data_type_reg_t dst_data_type; // [57:56] + dma_reg2hw_sign_ext_reg_t sign_ext; // [55:55] + dma_reg2hw_mode_reg_t mode; // [54:53] + dma_reg2hw_dim_config_reg_t dim_config; // [52:52] + dma_reg2hw_dim_inv_reg_t dim_inv; // [51:51] + dma_reg2hw_pad_top_reg_t pad_top; // [50:44] + dma_reg2hw_pad_bottom_reg_t pad_bottom; // [43:37] + dma_reg2hw_pad_right_reg_t pad_right; // [36:30] + dma_reg2hw_pad_left_reg_t pad_left; // [29:23] + dma_reg2hw_window_size_reg_t window_size; // [22:10] + dma_reg2hw_window_count_reg_t window_count; // [9:2] dma_reg2hw_interrupt_en_reg_t interrupt_en; // [1:0] } dma_reg2hw_t; // HW -> register type typedef struct packed { - dma_hw2reg_status_reg_t status; // [34:33] - dma_hw2reg_window_count_reg_t window_count; // [32:0] + dma_hw2reg_status_reg_t status; // [10:9] + dma_hw2reg_window_count_reg_t window_count; // [8:0] } dma_hw2reg_t; // Register offsets - parameter logic [BlockAw-1:0] DMA_SRC_PTR_OFFSET = 6'h0; - parameter logic [BlockAw-1:0] DMA_DST_PTR_OFFSET = 6'h4; - parameter logic [BlockAw-1:0] DMA_ADDR_PTR_OFFSET = 6'h8; - parameter logic [BlockAw-1:0] DMA_SIZE_OFFSET = 6'hc; - parameter logic [BlockAw-1:0] DMA_STATUS_OFFSET = 6'h10; - parameter logic [BlockAw-1:0] DMA_PTR_INC_OFFSET = 6'h14; - parameter logic [BlockAw-1:0] DMA_SLOT_OFFSET = 6'h18; - parameter logic [BlockAw-1:0] DMA_DATA_TYPE_OFFSET = 6'h1c; - parameter logic [BlockAw-1:0] DMA_MODE_OFFSET = 6'h20; - parameter logic [BlockAw-1:0] DMA_WINDOW_SIZE_OFFSET = 6'h24; - parameter logic [BlockAw-1:0] DMA_WINDOW_COUNT_OFFSET = 6'h28; - parameter logic [BlockAw-1:0] DMA_INTERRUPT_EN_OFFSET = 6'h2c; + parameter logic [BlockAw-1:0] DMA_SRC_PTR_OFFSET = 7'h0; + parameter logic [BlockAw-1:0] DMA_DST_PTR_OFFSET = 7'h4; + parameter logic [BlockAw-1:0] DMA_ADDR_PTR_OFFSET = 7'h8; + parameter logic [BlockAw-1:0] DMA_SIZE_D1_OFFSET = 7'hc; + parameter logic [BlockAw-1:0] DMA_SIZE_D2_OFFSET = 7'h10; + parameter logic [BlockAw-1:0] DMA_STATUS_OFFSET = 7'h14; + parameter logic [BlockAw-1:0] DMA_SRC_PTR_INC_D1_OFFSET = 7'h18; + parameter logic [BlockAw-1:0] DMA_SRC_PTR_INC_D2_OFFSET = 7'h1c; + parameter logic [BlockAw-1:0] DMA_DST_PTR_INC_D1_OFFSET = 7'h20; + parameter logic [BlockAw-1:0] DMA_DST_PTR_INC_D2_OFFSET = 7'h24; + parameter logic [BlockAw-1:0] DMA_SLOT_OFFSET = 7'h28; + parameter logic [BlockAw-1:0] DMA_SRC_DATA_TYPE_OFFSET = 7'h2c; + parameter logic [BlockAw-1:0] DMA_DST_DATA_TYPE_OFFSET = 7'h30; + parameter logic [BlockAw-1:0] DMA_SIGN_EXT_OFFSET = 7'h34; + parameter logic [BlockAw-1:0] DMA_MODE_OFFSET = 7'h38; + parameter logic [BlockAw-1:0] DMA_DIM_CONFIG_OFFSET = 7'h3c; + parameter logic [BlockAw-1:0] DMA_DIM_INV_OFFSET = 7'h40; + parameter logic [BlockAw-1:0] DMA_PAD_TOP_OFFSET = 7'h44; + parameter logic [BlockAw-1:0] DMA_PAD_BOTTOM_OFFSET = 7'h48; + parameter logic [BlockAw-1:0] DMA_PAD_RIGHT_OFFSET = 7'h4c; + parameter logic [BlockAw-1:0] DMA_PAD_LEFT_OFFSET = 7'h50; + parameter logic [BlockAw-1:0] DMA_WINDOW_SIZE_OFFSET = 7'h54; + parameter logic [BlockAw-1:0] DMA_WINDOW_COUNT_OFFSET = 7'h58; + parameter logic [BlockAw-1:0] DMA_INTERRUPT_EN_OFFSET = 7'h5c; // Reset values for hwext registers and their fields parameter logic [1:0] DMA_STATUS_RESVAL = 2'h1; @@ -114,31 +174,55 @@ package dma_reg_pkg; DMA_SRC_PTR, DMA_DST_PTR, DMA_ADDR_PTR, - DMA_SIZE, + DMA_SIZE_D1, + DMA_SIZE_D2, DMA_STATUS, - DMA_PTR_INC, + DMA_SRC_PTR_INC_D1, + DMA_SRC_PTR_INC_D2, + DMA_DST_PTR_INC_D1, + DMA_DST_PTR_INC_D2, DMA_SLOT, - DMA_DATA_TYPE, + DMA_SRC_DATA_TYPE, + DMA_DST_DATA_TYPE, + DMA_SIGN_EXT, DMA_MODE, + DMA_DIM_CONFIG, + DMA_DIM_INV, + DMA_PAD_TOP, + DMA_PAD_BOTTOM, + DMA_PAD_RIGHT, + DMA_PAD_LEFT, DMA_WINDOW_SIZE, DMA_WINDOW_COUNT, DMA_INTERRUPT_EN } dma_id_e; // Register width information to check illegal writes - parameter logic [3:0] DMA_PERMIT[12] = '{ + parameter logic [3:0] DMA_PERMIT[24] = '{ 4'b1111, // index[ 0] DMA_SRC_PTR 4'b1111, // index[ 1] DMA_DST_PTR 4'b1111, // index[ 2] DMA_ADDR_PTR - 4'b1111, // index[ 3] DMA_SIZE - 4'b0001, // index[ 4] DMA_STATUS - 4'b0011, // index[ 5] DMA_PTR_INC - 4'b1111, // index[ 6] DMA_SLOT - 4'b0001, // index[ 7] DMA_DATA_TYPE - 4'b0001, // index[ 8] DMA_MODE - 4'b1111, // index[ 9] DMA_WINDOW_SIZE - 4'b1111, // index[10] DMA_WINDOW_COUNT - 4'b0001 // index[11] DMA_INTERRUPT_EN + 4'b0011, // index[ 3] DMA_SIZE_D1 + 4'b0011, // index[ 4] DMA_SIZE_D2 + 4'b0001, // index[ 5] DMA_STATUS + 4'b0001, // index[ 6] DMA_SRC_PTR_INC_D1 + 4'b0111, // index[ 7] DMA_SRC_PTR_INC_D2 + 4'b0001, // index[ 8] DMA_DST_PTR_INC_D1 + 4'b0111, // index[ 9] DMA_DST_PTR_INC_D2 + 4'b1111, // index[10] DMA_SLOT + 4'b0001, // index[11] DMA_SRC_DATA_TYPE + 4'b0001, // index[12] DMA_DST_DATA_TYPE + 4'b0001, // index[13] DMA_SIGN_EXT + 4'b0001, // index[14] DMA_MODE + 4'b0001, // index[15] DMA_DIM_CONFIG + 4'b0001, // index[16] DMA_DIM_INV + 4'b0001, // index[17] DMA_PAD_TOP + 4'b0001, // index[18] DMA_PAD_BOTTOM + 4'b0001, // index[19] DMA_PAD_RIGHT + 4'b0001, // index[20] DMA_PAD_LEFT + 4'b0011, // index[21] DMA_WINDOW_SIZE + 4'b0001, // index[22] DMA_WINDOW_COUNT + 4'b0001 // index[23] DMA_INTERRUPT_EN }; endpackage diff --git a/hw/ip/dma/rtl/dma_reg_top.sv b/hw/ip/dma/rtl/dma_reg_top.sv index 4a3ff6d02..a57f9059e 100644 --- a/hw/ip/dma/rtl/dma_reg_top.sv +++ b/hw/ip/dma/rtl/dma_reg_top.sv @@ -10,7 +10,7 @@ module dma_reg_top #( parameter type reg_req_t = logic, parameter type reg_rsp_t = logic, - parameter int AW = 6 + parameter int AW = 7 ) ( input logic clk_i, input logic rst_ni, @@ -77,35 +77,68 @@ module dma_reg_top #( logic [31:0] addr_ptr_qs; logic [31:0] addr_ptr_wd; logic addr_ptr_we; - logic [31:0] size_qs; - logic [31:0] size_wd; - logic size_we; + logic [15:0] size_d1_qs; + logic [15:0] size_d1_wd; + logic size_d1_we; + logic [15:0] size_d2_qs; + logic [15:0] size_d2_wd; + logic size_d2_we; logic status_ready_qs; logic status_ready_re; logic status_window_done_qs; logic status_window_done_re; - logic [7:0] ptr_inc_src_ptr_inc_qs; - logic [7:0] ptr_inc_src_ptr_inc_wd; - logic ptr_inc_src_ptr_inc_we; - logic [7:0] ptr_inc_dst_ptr_inc_qs; - logic [7:0] ptr_inc_dst_ptr_inc_wd; - logic ptr_inc_dst_ptr_inc_we; + logic [5:0] src_ptr_inc_d1_qs; + logic [5:0] src_ptr_inc_d1_wd; + logic src_ptr_inc_d1_we; + logic [22:0] src_ptr_inc_d2_qs; + logic [22:0] src_ptr_inc_d2_wd; + logic src_ptr_inc_d2_we; + logic [5:0] dst_ptr_inc_d1_qs; + logic [5:0] dst_ptr_inc_d1_wd; + logic dst_ptr_inc_d1_we; + logic [22:0] dst_ptr_inc_d2_qs; + logic [22:0] dst_ptr_inc_d2_wd; + logic dst_ptr_inc_d2_we; logic [15:0] slot_rx_trigger_slot_qs; logic [15:0] slot_rx_trigger_slot_wd; logic slot_rx_trigger_slot_we; logic [15:0] slot_tx_trigger_slot_qs; logic [15:0] slot_tx_trigger_slot_wd; logic slot_tx_trigger_slot_we; - logic [1:0] data_type_qs; - logic [1:0] data_type_wd; - logic data_type_we; + logic [1:0] src_data_type_qs; + logic [1:0] src_data_type_wd; + logic src_data_type_we; + logic [1:0] dst_data_type_qs; + logic [1:0] dst_data_type_wd; + logic dst_data_type_we; + logic sign_ext_qs; + logic sign_ext_wd; + logic sign_ext_we; logic [1:0] mode_qs; logic [1:0] mode_wd; logic mode_we; - logic [31:0] window_size_qs; - logic [31:0] window_size_wd; + logic dim_config_qs; + logic dim_config_wd; + logic dim_config_we; + logic dim_inv_qs; + logic dim_inv_wd; + logic dim_inv_we; + logic [5:0] pad_top_qs; + logic [5:0] pad_top_wd; + logic pad_top_we; + logic [5:0] pad_bottom_qs; + logic [5:0] pad_bottom_wd; + logic pad_bottom_we; + logic [5:0] pad_right_qs; + logic [5:0] pad_right_wd; + logic pad_right_we; + logic [5:0] pad_left_qs; + logic [5:0] pad_left_wd; + logic pad_left_we; + logic [12:0] window_size_qs; + logic [12:0] window_size_wd; logic window_size_we; - logic [31:0] window_count_qs; + logic [7:0] window_count_qs; logic interrupt_en_transaction_done_qs; logic interrupt_en_transaction_done_wd; logic interrupt_en_transaction_done_we; @@ -195,30 +228,57 @@ module dma_reg_top #( ); - // R[size]: V(False) + // R[size_d1]: V(False) prim_subreg #( - .DW (32), + .DW (16), .SWACCESS("RW"), - .RESVAL (32'h0) - ) u_size ( + .RESVAL (16'h0) + ) u_size_d1 ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(size_d1_we), + .wd(size_d1_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(reg2hw.size_d1.qe), + .q (reg2hw.size_d1.q), + + // to register interface (read) + .qs(size_d1_qs) + ); + + + // R[size_d2]: V(False) + + prim_subreg #( + .DW (16), + .SWACCESS("RW"), + .RESVAL (16'h0) + ) u_size_d2 ( .clk_i (clk_i), .rst_ni(rst_ni), // from register interface - .we(size_we), - .wd(size_wd), + .we(size_d2_we), + .wd(size_d2_wd), // from internal hardware .de(1'b0), .d ('0), // to internal hardware - .qe(reg2hw.size.qe), - .q (reg2hw.size.q), + .qe(reg2hw.size_d2.qe), + .q (reg2hw.size_d2.q), // to register interface (read) - .qs(size_qs) + .qs(size_d2_qs) ); @@ -254,20 +314,19 @@ module dma_reg_top #( ); - // R[ptr_inc]: V(False) + // R[src_ptr_inc_d1]: V(False) - // F[src_ptr_inc]: 7:0 prim_subreg #( - .DW (8), + .DW (6), .SWACCESS("RW"), - .RESVAL (8'h4) - ) u_ptr_inc_src_ptr_inc ( + .RESVAL (6'h4) + ) u_src_ptr_inc_d1 ( .clk_i (clk_i), .rst_ni(rst_ni), // from register interface - .we(ptr_inc_src_ptr_inc_we), - .wd(ptr_inc_src_ptr_inc_wd), + .we(src_ptr_inc_d1_we), + .wd(src_ptr_inc_d1_wd), // from internal hardware .de(1'b0), @@ -275,25 +334,80 @@ module dma_reg_top #( // to internal hardware .qe(), - .q (reg2hw.ptr_inc.src_ptr_inc.q), + .q (reg2hw.src_ptr_inc_d1.q), // to register interface (read) - .qs(ptr_inc_src_ptr_inc_qs) + .qs(src_ptr_inc_d1_qs) ); - // F[dst_ptr_inc]: 15:8 + // R[src_ptr_inc_d2]: V(False) + prim_subreg #( - .DW (8), + .DW (23), + .SWACCESS("RW"), + .RESVAL (23'h4) + ) u_src_ptr_inc_d2 ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(src_ptr_inc_d2_we), + .wd(src_ptr_inc_d2_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.src_ptr_inc_d2.q), + + // to register interface (read) + .qs(src_ptr_inc_d2_qs) + ); + + + // R[dst_ptr_inc_d1]: V(False) + + prim_subreg #( + .DW (6), + .SWACCESS("RW"), + .RESVAL (6'h4) + ) u_dst_ptr_inc_d1 ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(dst_ptr_inc_d1_we), + .wd(dst_ptr_inc_d1_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.dst_ptr_inc_d1.q), + + // to register interface (read) + .qs(dst_ptr_inc_d1_qs) + ); + + + // R[dst_ptr_inc_d2]: V(False) + + prim_subreg #( + .DW (23), .SWACCESS("RW"), - .RESVAL (8'h4) - ) u_ptr_inc_dst_ptr_inc ( + .RESVAL (23'h4) + ) u_dst_ptr_inc_d2 ( .clk_i (clk_i), .rst_ni(rst_ni), // from register interface - .we(ptr_inc_dst_ptr_inc_we), - .wd(ptr_inc_dst_ptr_inc_wd), + .we(dst_ptr_inc_d2_we), + .wd(dst_ptr_inc_d2_wd), // from internal hardware .de(1'b0), @@ -301,10 +415,10 @@ module dma_reg_top #( // to internal hardware .qe(), - .q (reg2hw.ptr_inc.dst_ptr_inc.q), + .q (reg2hw.dst_ptr_inc_d2.q), // to register interface (read) - .qs(ptr_inc_dst_ptr_inc_qs) + .qs(dst_ptr_inc_d2_qs) ); @@ -362,19 +476,73 @@ module dma_reg_top #( ); - // R[data_type]: V(False) + // R[src_data_type]: V(False) + + prim_subreg #( + .DW (2), + .SWACCESS("RW"), + .RESVAL (2'h0) + ) u_src_data_type ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(src_data_type_we), + .wd(src_data_type_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.src_data_type.q), + + // to register interface (read) + .qs(src_data_type_qs) + ); + + + // R[dst_data_type]: V(False) prim_subreg #( .DW (2), .SWACCESS("RW"), .RESVAL (2'h0) - ) u_data_type ( + ) u_dst_data_type ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(dst_data_type_we), + .wd(dst_data_type_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.dst_data_type.q), + + // to register interface (read) + .qs(dst_data_type_qs) + ); + + + // R[sign_ext]: V(False) + + prim_subreg #( + .DW (1), + .SWACCESS("RW"), + .RESVAL (1'h0) + ) u_sign_ext ( .clk_i (clk_i), .rst_ni(rst_ni), // from register interface - .we(data_type_we), - .wd(data_type_wd), + .we(sign_ext_we), + .wd(sign_ext_wd), // from internal hardware .de(1'b0), @@ -382,10 +550,10 @@ module dma_reg_top #( // to internal hardware .qe(), - .q (reg2hw.data_type.q), + .q (reg2hw.sign_ext.q), // to register interface (read) - .qs(data_type_qs) + .qs(sign_ext_qs) ); @@ -416,12 +584,174 @@ module dma_reg_top #( ); + // R[dim_config]: V(False) + + prim_subreg #( + .DW (1), + .SWACCESS("RW"), + .RESVAL (1'h0) + ) u_dim_config ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(dim_config_we), + .wd(dim_config_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.dim_config.q), + + // to register interface (read) + .qs(dim_config_qs) + ); + + + // R[dim_inv]: V(False) + + prim_subreg #( + .DW (1), + .SWACCESS("RW"), + .RESVAL (1'h0) + ) u_dim_inv ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(dim_inv_we), + .wd(dim_inv_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(), + .q (reg2hw.dim_inv.q), + + // to register interface (read) + .qs(dim_inv_qs) + ); + + + // R[pad_top]: V(False) + + prim_subreg #( + .DW (6), + .SWACCESS("RW"), + .RESVAL (6'h0) + ) u_pad_top ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(pad_top_we), + .wd(pad_top_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(reg2hw.pad_top.qe), + .q (reg2hw.pad_top.q), + + // to register interface (read) + .qs(pad_top_qs) + ); + + + // R[pad_bottom]: V(False) + + prim_subreg #( + .DW (6), + .SWACCESS("RW"), + .RESVAL (6'h0) + ) u_pad_bottom ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(pad_bottom_we), + .wd(pad_bottom_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(reg2hw.pad_bottom.qe), + .q (reg2hw.pad_bottom.q), + + // to register interface (read) + .qs(pad_bottom_qs) + ); + + + // R[pad_right]: V(False) + + prim_subreg #( + .DW (6), + .SWACCESS("RW"), + .RESVAL (6'h0) + ) u_pad_right ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(pad_right_we), + .wd(pad_right_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(reg2hw.pad_right.qe), + .q (reg2hw.pad_right.q), + + // to register interface (read) + .qs(pad_right_qs) + ); + + + // R[pad_left]: V(False) + + prim_subreg #( + .DW (6), + .SWACCESS("RW"), + .RESVAL (6'h0) + ) u_pad_left ( + .clk_i (clk_i), + .rst_ni(rst_ni), + + // from register interface + .we(pad_left_we), + .wd(pad_left_wd), + + // from internal hardware + .de(1'b0), + .d ('0), + + // to internal hardware + .qe(reg2hw.pad_left.qe), + .q (reg2hw.pad_left.q), + + // to register interface (read) + .qs(pad_left_qs) + ); + + // R[window_size]: V(False) prim_subreg #( - .DW (32), + .DW (13), .SWACCESS("RW"), - .RESVAL (32'h0) + .RESVAL (13'h0) ) u_window_size ( .clk_i (clk_i), .rst_ni(rst_ni), @@ -446,9 +776,9 @@ module dma_reg_top #( // R[window_count]: V(False) prim_subreg #( - .DW (32), + .DW (8), .SWACCESS("RO"), - .RESVAL (32'h0) + .RESVAL (8'h0) ) u_window_count ( .clk_i (clk_i), .rst_ni(rst_ni), @@ -525,21 +855,33 @@ module dma_reg_top #( - logic [11:0] addr_hit; + logic [23:0] addr_hit; always_comb begin addr_hit = '0; addr_hit[0] = (reg_addr == DMA_SRC_PTR_OFFSET); addr_hit[1] = (reg_addr == DMA_DST_PTR_OFFSET); addr_hit[2] = (reg_addr == DMA_ADDR_PTR_OFFSET); - addr_hit[3] = (reg_addr == DMA_SIZE_OFFSET); - addr_hit[4] = (reg_addr == DMA_STATUS_OFFSET); - addr_hit[5] = (reg_addr == DMA_PTR_INC_OFFSET); - addr_hit[6] = (reg_addr == DMA_SLOT_OFFSET); - addr_hit[7] = (reg_addr == DMA_DATA_TYPE_OFFSET); - addr_hit[8] = (reg_addr == DMA_MODE_OFFSET); - addr_hit[9] = (reg_addr == DMA_WINDOW_SIZE_OFFSET); - addr_hit[10] = (reg_addr == DMA_WINDOW_COUNT_OFFSET); - addr_hit[11] = (reg_addr == DMA_INTERRUPT_EN_OFFSET); + addr_hit[3] = (reg_addr == DMA_SIZE_D1_OFFSET); + addr_hit[4] = (reg_addr == DMA_SIZE_D2_OFFSET); + addr_hit[5] = (reg_addr == DMA_STATUS_OFFSET); + addr_hit[6] = (reg_addr == DMA_SRC_PTR_INC_D1_OFFSET); + addr_hit[7] = (reg_addr == DMA_SRC_PTR_INC_D2_OFFSET); + addr_hit[8] = (reg_addr == DMA_DST_PTR_INC_D1_OFFSET); + addr_hit[9] = (reg_addr == DMA_DST_PTR_INC_D2_OFFSET); + addr_hit[10] = (reg_addr == DMA_SLOT_OFFSET); + addr_hit[11] = (reg_addr == DMA_SRC_DATA_TYPE_OFFSET); + addr_hit[12] = (reg_addr == DMA_DST_DATA_TYPE_OFFSET); + addr_hit[13] = (reg_addr == DMA_SIGN_EXT_OFFSET); + addr_hit[14] = (reg_addr == DMA_MODE_OFFSET); + addr_hit[15] = (reg_addr == DMA_DIM_CONFIG_OFFSET); + addr_hit[16] = (reg_addr == DMA_DIM_INV_OFFSET); + addr_hit[17] = (reg_addr == DMA_PAD_TOP_OFFSET); + addr_hit[18] = (reg_addr == DMA_PAD_BOTTOM_OFFSET); + addr_hit[19] = (reg_addr == DMA_PAD_RIGHT_OFFSET); + addr_hit[20] = (reg_addr == DMA_PAD_LEFT_OFFSET); + addr_hit[21] = (reg_addr == DMA_WINDOW_SIZE_OFFSET); + addr_hit[22] = (reg_addr == DMA_WINDOW_COUNT_OFFSET); + addr_hit[23] = (reg_addr == DMA_INTERRUPT_EN_OFFSET); end assign addrmiss = (reg_re || reg_we) ? ~|addr_hit : 1'b0; @@ -558,7 +900,19 @@ module dma_reg_top #( (addr_hit[ 8] & (|(DMA_PERMIT[ 8] & ~reg_be))) | (addr_hit[ 9] & (|(DMA_PERMIT[ 9] & ~reg_be))) | (addr_hit[10] & (|(DMA_PERMIT[10] & ~reg_be))) | - (addr_hit[11] & (|(DMA_PERMIT[11] & ~reg_be))))); + (addr_hit[11] & (|(DMA_PERMIT[11] & ~reg_be))) | + (addr_hit[12] & (|(DMA_PERMIT[12] & ~reg_be))) | + (addr_hit[13] & (|(DMA_PERMIT[13] & ~reg_be))) | + (addr_hit[14] & (|(DMA_PERMIT[14] & ~reg_be))) | + (addr_hit[15] & (|(DMA_PERMIT[15] & ~reg_be))) | + (addr_hit[16] & (|(DMA_PERMIT[16] & ~reg_be))) | + (addr_hit[17] & (|(DMA_PERMIT[17] & ~reg_be))) | + (addr_hit[18] & (|(DMA_PERMIT[18] & ~reg_be))) | + (addr_hit[19] & (|(DMA_PERMIT[19] & ~reg_be))) | + (addr_hit[20] & (|(DMA_PERMIT[20] & ~reg_be))) | + (addr_hit[21] & (|(DMA_PERMIT[21] & ~reg_be))) | + (addr_hit[22] & (|(DMA_PERMIT[22] & ~reg_be))) | + (addr_hit[23] & (|(DMA_PERMIT[23] & ~reg_be))))); end assign src_ptr_we = addr_hit[0] & reg_we & !reg_error; @@ -570,38 +924,71 @@ module dma_reg_top #( assign addr_ptr_we = addr_hit[2] & reg_we & !reg_error; assign addr_ptr_wd = reg_wdata[31:0]; - assign size_we = addr_hit[3] & reg_we & !reg_error; - assign size_wd = reg_wdata[31:0]; + assign size_d1_we = addr_hit[3] & reg_we & !reg_error; + assign size_d1_wd = reg_wdata[15:0]; - assign status_ready_re = addr_hit[4] & reg_re & !reg_error; + assign size_d2_we = addr_hit[4] & reg_we & !reg_error; + assign size_d2_wd = reg_wdata[15:0]; - assign status_window_done_re = addr_hit[4] & reg_re & !reg_error; + assign status_ready_re = addr_hit[5] & reg_re & !reg_error; - assign ptr_inc_src_ptr_inc_we = addr_hit[5] & reg_we & !reg_error; - assign ptr_inc_src_ptr_inc_wd = reg_wdata[7:0]; + assign status_window_done_re = addr_hit[5] & reg_re & !reg_error; - assign ptr_inc_dst_ptr_inc_we = addr_hit[5] & reg_we & !reg_error; - assign ptr_inc_dst_ptr_inc_wd = reg_wdata[15:8]; + assign src_ptr_inc_d1_we = addr_hit[6] & reg_we & !reg_error; + assign src_ptr_inc_d1_wd = reg_wdata[5:0]; - assign slot_rx_trigger_slot_we = addr_hit[6] & reg_we & !reg_error; + assign src_ptr_inc_d2_we = addr_hit[7] & reg_we & !reg_error; + assign src_ptr_inc_d2_wd = reg_wdata[22:0]; + + assign dst_ptr_inc_d1_we = addr_hit[8] & reg_we & !reg_error; + assign dst_ptr_inc_d1_wd = reg_wdata[5:0]; + + assign dst_ptr_inc_d2_we = addr_hit[9] & reg_we & !reg_error; + assign dst_ptr_inc_d2_wd = reg_wdata[22:0]; + + assign slot_rx_trigger_slot_we = addr_hit[10] & reg_we & !reg_error; assign slot_rx_trigger_slot_wd = reg_wdata[15:0]; - assign slot_tx_trigger_slot_we = addr_hit[6] & reg_we & !reg_error; + assign slot_tx_trigger_slot_we = addr_hit[10] & reg_we & !reg_error; assign slot_tx_trigger_slot_wd = reg_wdata[31:16]; - assign data_type_we = addr_hit[7] & reg_we & !reg_error; - assign data_type_wd = reg_wdata[1:0]; + assign src_data_type_we = addr_hit[11] & reg_we & !reg_error; + assign src_data_type_wd = reg_wdata[1:0]; + + assign dst_data_type_we = addr_hit[12] & reg_we & !reg_error; + assign dst_data_type_wd = reg_wdata[1:0]; - assign mode_we = addr_hit[8] & reg_we & !reg_error; + assign sign_ext_we = addr_hit[13] & reg_we & !reg_error; + assign sign_ext_wd = reg_wdata[0]; + + assign mode_we = addr_hit[14] & reg_we & !reg_error; assign mode_wd = reg_wdata[1:0]; - assign window_size_we = addr_hit[9] & reg_we & !reg_error; - assign window_size_wd = reg_wdata[31:0]; + assign dim_config_we = addr_hit[15] & reg_we & !reg_error; + assign dim_config_wd = reg_wdata[0]; + + assign dim_inv_we = addr_hit[16] & reg_we & !reg_error; + assign dim_inv_wd = reg_wdata[0]; + + assign pad_top_we = addr_hit[17] & reg_we & !reg_error; + assign pad_top_wd = reg_wdata[5:0]; + + assign pad_bottom_we = addr_hit[18] & reg_we & !reg_error; + assign pad_bottom_wd = reg_wdata[5:0]; - assign interrupt_en_transaction_done_we = addr_hit[11] & reg_we & !reg_error; + assign pad_right_we = addr_hit[19] & reg_we & !reg_error; + assign pad_right_wd = reg_wdata[5:0]; + + assign pad_left_we = addr_hit[20] & reg_we & !reg_error; + assign pad_left_wd = reg_wdata[5:0]; + + assign window_size_we = addr_hit[21] & reg_we & !reg_error; + assign window_size_wd = reg_wdata[12:0]; + + assign interrupt_en_transaction_done_we = addr_hit[23] & reg_we & !reg_error; assign interrupt_en_transaction_done_wd = reg_wdata[0]; - assign interrupt_en_window_done_we = addr_hit[11] & reg_we & !reg_error; + assign interrupt_en_window_done_we = addr_hit[23] & reg_we & !reg_error; assign interrupt_en_window_done_wd = reg_wdata[1]; // Read data return @@ -621,41 +1008,88 @@ module dma_reg_top #( end addr_hit[3]: begin - reg_rdata_next[31:0] = size_qs; + reg_rdata_next[15:0] = size_d1_qs; end addr_hit[4]: begin - reg_rdata_next[0] = status_ready_qs; - reg_rdata_next[1] = status_window_done_qs; + reg_rdata_next[15:0] = size_d2_qs; end addr_hit[5]: begin - reg_rdata_next[7:0] = ptr_inc_src_ptr_inc_qs; - reg_rdata_next[15:8] = ptr_inc_dst_ptr_inc_qs; + reg_rdata_next[0] = status_ready_qs; + reg_rdata_next[1] = status_window_done_qs; end addr_hit[6]: begin - reg_rdata_next[15:0] = slot_rx_trigger_slot_qs; - reg_rdata_next[31:16] = slot_tx_trigger_slot_qs; + reg_rdata_next[5:0] = src_ptr_inc_d1_qs; end addr_hit[7]: begin - reg_rdata_next[1:0] = data_type_qs; + reg_rdata_next[22:0] = src_ptr_inc_d2_qs; end addr_hit[8]: begin - reg_rdata_next[1:0] = mode_qs; + reg_rdata_next[5:0] = dst_ptr_inc_d1_qs; end addr_hit[9]: begin - reg_rdata_next[31:0] = window_size_qs; + reg_rdata_next[22:0] = dst_ptr_inc_d2_qs; end addr_hit[10]: begin - reg_rdata_next[31:0] = window_count_qs; + reg_rdata_next[15:0] = slot_rx_trigger_slot_qs; + reg_rdata_next[31:16] = slot_tx_trigger_slot_qs; end addr_hit[11]: begin + reg_rdata_next[1:0] = src_data_type_qs; + end + + addr_hit[12]: begin + reg_rdata_next[1:0] = dst_data_type_qs; + end + + addr_hit[13]: begin + reg_rdata_next[0] = sign_ext_qs; + end + + addr_hit[14]: begin + reg_rdata_next[1:0] = mode_qs; + end + + addr_hit[15]: begin + reg_rdata_next[0] = dim_config_qs; + end + + addr_hit[16]: begin + reg_rdata_next[0] = dim_inv_qs; + end + + addr_hit[17]: begin + reg_rdata_next[5:0] = pad_top_qs; + end + + addr_hit[18]: begin + reg_rdata_next[5:0] = pad_bottom_qs; + end + + addr_hit[19]: begin + reg_rdata_next[5:0] = pad_right_qs; + end + + addr_hit[20]: begin + reg_rdata_next[5:0] = pad_left_qs; + end + + addr_hit[21]: begin + reg_rdata_next[12:0] = window_size_qs; + end + + addr_hit[22]: begin + reg_rdata_next[7:0] = window_count_qs; + end + + addr_hit[23]: begin reg_rdata_next[0] = interrupt_en_transaction_done_qs; reg_rdata_next[1] = interrupt_en_window_done_qs; end @@ -681,7 +1115,7 @@ module dma_reg_top #( endmodule module dma_reg_top_intf #( - parameter int AW = 6, + parameter int AW = 7, localparam int DW = 32 ) ( input logic clk_i, diff --git a/hw/ip/power_manager/data/power_manager.hjson.tpl b/hw/ip/power_manager/data/power_manager.hjson.tpl index fb544a591..cdfd6b2dc 100644 --- a/hw/ip/power_manager/data/power_manager.hjson.tpl +++ b/hw/ip/power_manager/data/power_manager.hjson.tpl @@ -214,64 +214,64 @@ } -% for bank in range(ram_numbanks): - { name: "RAM_${bank}_CLK_GATE", - desc: "Clock-gates the RAM_${bank} domain", +% for bank in xheep.iter_ram_banks(): + { name: "RAM_${bank.name()}_CLK_GATE", + desc: "Clock-gates the RAM_${bank.name()} domain", resval: "0x00000000" swaccess: "rw", hwaccess: "hro", fields: [ - { bits: "0", name: "RAM_${bank}_CLK_GATE", desc: "Clock-gates the RAM_${bank} domain" } + { bits: "0", name: "RAM_${bank.name()}_CLK_GATE", desc: "Clock-gates the RAM_${bank.name()} domain" } ] } - { name: "POWER_GATE_RAM_BLOCK_${bank}_ACK", - desc: "Used by the ram ${bank} switch to ack the power manager", + { name: "POWER_GATE_RAM_BLOCK_${bank.name()}_ACK", + desc: "Used by the ram ${bank.name()} switch to ack the power manager", resval: "0x00000000" swaccess: "ro", hwaccess: "hrw", fields: [ - { bits: "0", name: "POWER_GATE_RAM_BLOCK_${bank}_ACK", desc: "Power Gate Ram Block ${bank} Ack Reg" } + { bits: "0", name: "POWER_GATE_RAM_BLOCK_${bank.name()}_ACK", desc: "Power Gate Ram Block ${bank.name()} Ack Reg" } ] } - { name: "RAM_${bank}_SWITCH", - desc: "Switch off the RAM_${bank} domain", + { name: "RAM_${bank.name()}_SWITCH", + desc: "Switch off the RAM_${bank.name()} domain", resval: "0x00000000" swaccess: "rw", hwaccess: "hro", fields: [ - { bits: "0", name: "RAM_${bank}_SWITCH", desc: "Switch off RAM_${bank} domain" } + { bits: "0", name: "RAM_${bank.name()}_SWITCH", desc: "Switch off RAM_${bank.name()} domain" } ] } - { name: "RAM_${bank}_WAIT_ACK_SWITCH_ON", - desc: "Wait for the RAM_${bank} domain switch ack", + { name: "RAM_${bank.name()}_WAIT_ACK_SWITCH_ON", + desc: "Wait for the RAM_${bank.name()} domain switch ack", resval: "0x00000000" swaccess: "rw", hwaccess: "hro", fields: [ - { bits: "0", name: "RAM_${bank}_WAIT_ACK_SWITCH_ON", desc: "Wait RAM_${bank} domain switch ack" } + { bits: "0", name: "RAM_${bank.name()}_WAIT_ACK_SWITCH_ON", desc: "Wait RAM_${bank.name()} domain switch ack" } ] } - { name: "RAM_${bank}_ISO", - desc: "Set on the isolation of the RAM_${bank} domain", + { name: "RAM_${bank.name()}_ISO", + desc: "Set on the isolation of the RAM_${bank.name()} domain", resval: "0x00000000" swaccess: "rw", hwaccess: "hro", fields: [ - { bits: "0", name: "RAM_${bank}_ISO", desc: "Set on isolation of RAM_${bank} domain" } + { bits: "0", name: "RAM_${bank.name()}_ISO", desc: "Set on isolation of RAM_${bank.name()} domain" } ] } - { name: "RAM_${bank}_RETENTIVE", - desc: "Set on retentive mode for the RAM_${bank} domain", + { name: "RAM_${bank.name()}_RETENTIVE", + desc: "Set on retentive mode for the RAM_${bank.name()} domain", resval: "0x00000000" swaccess: "rw", hwaccess: "hro", fields: [ - { bits: "0", name: "RAM_${bank}_RETENTIVE", desc: "Set on retentive mode for RAM_${bank} domain" } + { bits: "0", name: "RAM_${bank.name()}_RETENTIVE", desc: "Set on retentive mode for RAM_${bank.name()} domain" } ] } @@ -368,14 +368,14 @@ ] } -% for bank in range(ram_numbanks): - { name: "MONITOR_POWER_GATE_RAM_BLOCK_${bank}", - desc: "Used to monitor the signals to power gate ram block ${bank}", +% for bank in xheep.iter_ram_banks(): + { name: "MONITOR_POWER_GATE_RAM_BLOCK_${bank.name()}", + desc: "Used to monitor the signals to power gate ram block ${bank.name()}", resval: "0x00000000" swaccess: "ro", hwaccess: "hwo", fields: [ - { bits: "1:0", name: "MONITOR_POWER_GATE_RAM_BLOCK_${bank}", desc: "Monitor Signals Power Gate Ram Block ${bank} Reg" } + { bits: "1:0", name: "MONITOR_POWER_GATE_RAM_BLOCK_${bank.name()}", desc: "Monitor Signals Power Gate Ram Block ${bank.name()} Reg" } ] } diff --git a/hw/ip/power_manager/data/power_manager.sv.tpl b/hw/ip/power_manager/data/power_manager.sv.tpl index 4b8ba22b9..c37b0b802 100644 --- a/hw/ip/power_manager/data/power_manager.sv.tpl +++ b/hw/ip/power_manager/data/power_manager.sv.tpl @@ -147,8 +147,8 @@ module power_manager #( assign peripheral_subsystem_clkgate_en_no = ~reg2hw.periph_clk_gate.q; -% for bank in range(ram_numbanks): - assign memory_subsystem_clkgate_en_no[${bank}] = ~reg2hw.ram_${bank}_clk_gate.q; +% for bank in xheep.iter_ram_banks(): + assign memory_subsystem_clkgate_en_no[${bank.name()}] = ~reg2hw.ram_${bank.name()}_clk_gate.q; % endfor % for ext in range(external_domains): @@ -417,75 +417,75 @@ module power_manager #( .switch_onoff_signal_o(peripheral_subsystem_powergate_iso_n) ); -% for bank in range(ram_numbanks): +% for bank in xheep.iter_ram_banks(): // -------------------------------------------------------------------------------------- - // RAM_${bank} DOMAIN + // RAM_${bank.name()} DOMAIN // -------------------------------------------------------------------------------------- - logic ram_${bank}_subsystem_powergate_switch_ack_sync; + logic ram_${bank.name()}_subsystem_powergate_switch_ack_sync; sync #( .ResetValue(1'b0) - ) sync_ram_${bank}_ack_i ( + ) sync_ram_${bank.name()}_ack_i ( .clk_i, .rst_ni, - .serial_i(memory_subsystem_banks_powergate_switch_ack_ni[${bank}]), - .serial_o(ram_${bank}_subsystem_powergate_switch_ack_sync) + .serial_i(memory_subsystem_banks_powergate_switch_ack_ni[${bank.name()}]), + .serial_o(ram_${bank.name()}_subsystem_powergate_switch_ack_sync) ); - assign hw2reg.power_gate_ram_block_${bank}_ack.de = 1'b1; - assign hw2reg.power_gate_ram_block_${bank}_ack.d = ram_${bank}_subsystem_powergate_switch_ack_sync; + assign hw2reg.power_gate_ram_block_${bank.name()}_ack.de = 1'b1; + assign hw2reg.power_gate_ram_block_${bank.name()}_ack.d = ram_${bank.name()}_subsystem_powergate_switch_ack_sync; //if you want to wait for ACK, or just bypass it - logic ram_${bank}_switch_wait_ack; - assign ram_${bank}_switch_wait_ack = reg2hw.ram_${bank}_wait_ack_switch_on.q ? reg2hw.power_gate_ram_block_${bank}_ack.q == SWITCH_IDLE_VALUE : 1'b1; + logic ram_${bank.name()}_switch_wait_ack; + assign ram_${bank.name()}_switch_wait_ack = reg2hw.ram_${bank.name()}_wait_ack_switch_on.q ? reg2hw.power_gate_ram_block_${bank.name()}_ack.q == SWITCH_IDLE_VALUE : 1'b1; power_manager_sequence #( .IDLE_VALUE(SWITCH_IDLE_VALUE), .ONOFF_AT_RESET(SWITCH_VALUE_AT_RESET) - ) power_manager_sequence_ram_${bank}_switch_i ( + ) power_manager_sequence_ram_${bank.name()}_switch_i ( .clk_i, .rst_ni, // trigger to start the sequence - .start_off_sequence_i(reg2hw.ram_${bank}_switch.q), - .start_on_sequence_i (~reg2hw.ram_${bank}_switch.q), + .start_off_sequence_i(reg2hw.ram_${bank.name()}_switch.q), + .start_on_sequence_i (~reg2hw.ram_${bank.name()}_switch.q), .switch_ack_i (1'b1), // switch on and off signal, 1 means on - .switch_onoff_signal_o(memory_subsystem_banks_powergate_switch_n[${bank}]) + .switch_onoff_signal_o(memory_subsystem_banks_powergate_switch_n[${bank.name()}]) ); power_manager_sequence #( .IDLE_VALUE(ISO_IDLE_VALUE), .ONOFF_AT_RESET(ISO_VALUE_AT_RESET) - ) power_manager_sequence_ram_${bank}_iso_i ( + ) power_manager_sequence_ram_${bank.name()}_iso_i ( .clk_i, .rst_ni, // trigger to start the sequence - .start_off_sequence_i(reg2hw.ram_${bank}_iso.q), - .start_on_sequence_i (~reg2hw.ram_${bank}_iso.q), - .switch_ack_i (ram_${bank}_switch_wait_ack), + .start_off_sequence_i(reg2hw.ram_${bank.name()}_iso.q), + .start_on_sequence_i (~reg2hw.ram_${bank.name()}_iso.q), + .switch_ack_i (ram_${bank.name()}_switch_wait_ack), // switch on and off signal, 1 means on - .switch_onoff_signal_o(memory_subsystem_banks_powergate_iso_n[${bank}]) + .switch_onoff_signal_o(memory_subsystem_banks_powergate_iso_n[${bank.name()}]) ); power_manager_sequence #( .IDLE_VALUE(ISO_IDLE_VALUE), .ONOFF_AT_RESET(ISO_VALUE_AT_RESET) - ) power_manager_sequence_ram_${bank}_retentive_i ( + ) power_manager_sequence_ram_${bank.name()}_retentive_i ( .clk_i, .rst_ni, // trigger to start the sequence - .start_off_sequence_i(reg2hw.ram_${bank}_retentive.q), - .start_on_sequence_i (~reg2hw.ram_${bank}_retentive.q), + .start_off_sequence_i(reg2hw.ram_${bank.name()}_retentive.q), + .start_on_sequence_i (~reg2hw.ram_${bank.name()}_retentive.q), .switch_ack_i (1'b1), // switch on and off signal, 1 means on - .switch_onoff_signal_o(memory_subsystem_banks_set_retentive_no[${bank}]) + .switch_onoff_signal_o(memory_subsystem_banks_set_retentive_no[${bank.name()}]) ); % endfor @@ -587,9 +587,9 @@ module power_manager #( assign hw2reg.monitor_power_gate_periph.de = 1'b1; assign hw2reg.monitor_power_gate_periph.d = {peripheral_subsystem_rst_n, peripheral_subsystem_powergate_iso_n, peripheral_subsystem_powergate_switch_n}; -% for bank in range(ram_numbanks): - assign hw2reg.monitor_power_gate_ram_block_${bank}.de = 1'b1; - assign hw2reg.monitor_power_gate_ram_block_${bank}.d = {memory_subsystem_banks_powergate_iso_n[${bank}], memory_subsystem_banks_powergate_switch_n[${bank}]}; +% for bank in xheep.iter_ram_banks(): + assign hw2reg.monitor_power_gate_ram_block_${bank.name()}.de = 1'b1; + assign hw2reg.monitor_power_gate_ram_block_${bank.name()}.d = {memory_subsystem_banks_powergate_iso_n[${bank.name()}], memory_subsystem_banks_powergate_switch_n[${bank.name()}]}; % endfor diff --git a/hw/ip_examples/slow_memory/rtl/slow_memory.sv b/hw/ip_examples/slow_memory/rtl/slow_memory.sv index 785ddd467..6cedd80ae 100644 --- a/hw/ip_examples/slow_memory/rtl/slow_memory.sv +++ b/hw/ip_examples/slow_memory/rtl/slow_memory.sv @@ -79,12 +79,21 @@ module slow_memory #( always_comb begin - gnt_o = 1'b0; - rvalid_o = rvalid_q; - state_n = state_q; - counter_n = counter_q - 1; - rvalid_n = rvalid_q; - + gnt_o = 1'b0; + rvalid_o = rvalid_q; + state_n = state_q; + counter_n = counter_q - 1; + rvalid_n = rvalid_q; + mem_req = '0; + mem_we = '0; + mem_addr = '0; + mem_wdata = '0; + mem_be = '0; + mem_req_n = mem_req_q; + mem_we_n = mem_we_q; + mem_addr_n = mem_addr_q; + mem_wdata_n = mem_wdata_q; + mem_be_n = mem_be_q; unique case (state_q) READY: begin @@ -92,13 +101,13 @@ module slow_memory #( if (req_i) begin gnt_o = random1[0]; if (gnt_o) begin - state_n = WAIT_RVALID; - counter_n = random2[4:0] + 1; - mem_req_n <= req_i; - mem_we_n <= we_i; - mem_addr_n <= addr_i; - mem_wdata_n <= wdata_i; - mem_be_n <= be_i; + state_n = WAIT_RVALID; + counter_n = random2[4:0] + 1; + mem_req_n = req_i; + mem_we_n = we_i; + mem_addr_n = addr_i; + mem_wdata_n = wdata_i; + mem_be_n = be_i; end end end diff --git a/hw/simulation/pad_cell_inout.sv b/hw/simulation/pad_cell_inout.sv index 5dcc54493..3529b2570 100644 --- a/hw/simulation/pad_cell_inout.sv +++ b/hw/simulation/pad_cell_inout.sv @@ -5,6 +5,7 @@ /* verilator lint_off UNUSED */ module pad_cell_inout #( parameter PADATTR = 16, + parameter core_v_mini_mcu_pkg::pad_side_e SIDE = core_v_mini_mcu_pkg::TOP, //do not touch these parameters parameter PADATTR_RND = PADATTR == 0 ? 1 : PADATTR ) ( diff --git a/hw/simulation/pad_cell_input.sv b/hw/simulation/pad_cell_input.sv index cfd9c4261..5a7b053ad 100644 --- a/hw/simulation/pad_cell_input.sv +++ b/hw/simulation/pad_cell_input.sv @@ -5,6 +5,7 @@ /* verilator lint_off UNUSED */ module pad_cell_input #( parameter PADATTR = 16, + parameter core_v_mini_mcu_pkg::pad_side_e SIDE = core_v_mini_mcu_pkg::TOP, //do not touch these parameters parameter PADATTR_RND = PADATTR == 0 ? 1 : PADATTR ) ( diff --git a/hw/simulation/pad_cell_output.sv b/hw/simulation/pad_cell_output.sv index cc4960eaf..794e177d0 100644 --- a/hw/simulation/pad_cell_output.sv +++ b/hw/simulation/pad_cell_output.sv @@ -5,6 +5,7 @@ /* verilator lint_off UNUSED */ module pad_cell_output #( parameter PADATTR = 16, + parameter core_v_mini_mcu_pkg::pad_side_e SIDE = core_v_mini_mcu_pkg::TOP, //do not touch these parameters parameter PADATTR_RND = PADATTR == 0 ? 1 : PADATTR ) ( diff --git a/hw/vendor/esl_epfl_cv32e40px.core b/hw/vendor/esl_epfl_cv32e40px.core index 56a485a41..a31337d17 100644 --- a/hw/vendor/esl_epfl_cv32e40px.core +++ b/hw/vendor/esl_epfl_cv32e40px.core @@ -60,3 +60,4 @@ targets: - files_rtl - ff_regfile - target_sim? (files_clk_gate) + - target_sim_sc? (files_clk_gate) diff --git a/hw/vendor/esl_epfl_cv32e40px.lock.hjson b/hw/vendor/esl_epfl_cv32e40px.lock.hjson index 7248758f0..2871b8ed4 100644 --- a/hw/vendor/esl_epfl_cv32e40px.lock.hjson +++ b/hw/vendor/esl_epfl_cv32e40px.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/esl-epfl/cv32e40px.git - rev: 49770e7dd5d569f440810866f4f33ce6a4f7ef1f + rev: 15b9dd6077513342cf44e6853a5fc33098f2e73b } } diff --git a/hw/vendor/esl_epfl_cv32e40px.vendor.hjson b/hw/vendor/esl_epfl_cv32e40px.vendor.hjson index 547959fe2..d0e289ae0 100644 --- a/hw/vendor/esl_epfl_cv32e40px.vendor.hjson +++ b/hw/vendor/esl_epfl_cv32e40px.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/esl-epfl/cv32e40px.git", - rev: "49770e7dd5d569f440810866f4f33ce6a4f7ef1f", + rev: "15b9dd6077513342cf44e6853a5fc33098f2e73b", }, exclude_from_upstream: [ diff --git a/hw/vendor/esl_epfl_cv32e40px/.gitignore b/hw/vendor/esl_epfl_cv32e40px/.gitignore index ef351b194..6c2cd7917 100644 --- a/hw/vendor/esl_epfl_cv32e40px/.gitignore +++ b/hw/vendor/esl_epfl_cv32e40px/.gitignore @@ -18,3 +18,11 @@ TAGS /build /Bender.lock /Bender.local +golden_reference_design +ref_design +golden.src +revised.src +cadence_conformal +synopsys_formality +questa_autocheck +reports diff --git a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_instr_trace.svh b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_instr_trace.svh index a89ed4e45..355bc7382 100644 --- a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_instr_trace.svh +++ b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_instr_trace.svh @@ -1,23 +1,37 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// This file, and derivatives thereof are licensed under the +// Solderpad License, Version 2.0 (the "License"). // -// https://solderpad.org/licenses/ +// Use of this file means you agree to the terms and conditions +// of the license and are in full compliance with the License. +// +// You may obtain a copy of the License at: +// +// https://solderpad.org/licenses/SHL-2.0/ // // Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// and hardware implementations thereof distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, EITHER EXPRESSED OR IMPLIED. +// // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// Tracer data structures and functions -// -// Contributors: Steve Richmond, Silicon Labs -// Pascal Gouedo, Dolphin Design +//////////////////////////////////////////////////////////////////////////////// +// Engineer: Steve Richmond - steve.richmond@silabs.com // +// // +// Design Name: cv32e40p_tracer data structures // +// Project Name: CV32E40P // +// Language: SystemVerilog // +// // +// Description: Moves the class definition for instr_trace_t out of the // +// tracer module for readability and code partitioning // +// // +// Includes various enhancements to make the instr_trace_t // +// class more comprehensive // +// // +//////////////////////////////////////////////////////////////////////////////// typedef struct { logic [5:0] addr; @@ -35,9 +49,13 @@ typedef struct { class instr_trace_t; time simtime; + time stoptime; + bit external_time; int cycles; + int stopcycles; logic [31:0] pc; logic [31:0] instr; + string ctx; //Used to add context in the trace log file (Canceled, debug, interrput,....) bit compressed; bit wb_bypass; bit misaligned; @@ -56,10 +74,15 @@ class instr_trace_t; regs_read = {}; regs_write = {}; mem_access = {}; + external_time = 0; + stoptime = 0; + stopcycles = 0; endfunction function void init(int unsigned cycles, bit [31:0] pc, bit compressed, bit [31:0] instr); - this.simtime = $time; + if(!this.external_time) begin + this.simtime = $time; + end this.cycles = cycles; this.pc = pc; this.compressed = compressed; @@ -308,7 +331,23 @@ class instr_trace_t; begin string insn_str; // Accumulate writes into a single string to enable single $fwrite - insn_str = $sformatf("%t %15d %h %h %-36s", simtime, cycles, pc, instr, str); + if(simtime < 100ns) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 1us) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 10us) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 100us) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 1ms) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 10ms) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 100ms) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else begin + insn_str = $sformatf("%t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end foreach (regs_write[i]) begin if (regs_write[i].addr != 0) @@ -330,6 +369,12 @@ class instr_trace_t; insn_str = $sformatf("%s PA:%08x", insn_str, mem_acc.addr); end + casex (instr) + INSTR_FDIV: insn_str = $sformatf("%s %15d %t", insn_str, stopcycles, stoptime); + INSTR_FSQRT:insn_str = $sformatf("%s %15d %t", insn_str, stopcycles, stoptime); + default: ; + endcase + $fwrite(f, "%s\n", insn_str); end endfunction @@ -489,7 +534,7 @@ class instr_trace_t; begin mnemonic = {compressed ? "c." : "", mnemonic}; regs_read.push_back('{rs1, rs1_value, 0}); - str = $sformatf("%-16s %s, %0d", mnemonic, regAddrToStr(rs1), $signed(imm_sb_type)); + str = $sformatf("%-16s %s, %0d, %0d", mnemonic, regAddrToStr(rs1), $signed(imm_s2_type), $signed(imm_sb_type)); end endfunction // printSBInstr @@ -587,14 +632,14 @@ class instr_trace_t; // immediate post-incremented load regs_read.push_back('{rs1, rs1_value, 0}); regs_write.push_back('{rs1, 'x, 0}); - str = $sformatf("cv.%-13s %s, %0d(x%0d!)", mnemonic, regAddrToStr(rd), $signed(imm_i_type), rs1); + str = $sformatf("cv.%-13s %s, (x%0d), %0d", mnemonic, regAddrToStr(rd), rs1, $signed(imm_i_type)); end else if (instr[6:0] == OPCODE_CUSTOM_1) begin if (instr[27] == 1'b0) begin // reg-reg post-incremented load regs_read.push_back('{rs2, rs2_value, 0}); regs_read.push_back('{rs1, rs1_value, 0}); regs_write.push_back('{rs1, 'x, 0}); - str = $sformatf("cv.%-13s %s, %s(x%0d!)", mnemonic, regAddrToStr(rd), regAddrToStr(rs2), rs1); + str = $sformatf("cv.%-13s %s, (x%0d), %s", mnemonic, regAddrToStr(rd), rs1, regAddrToStr(rs2)); end else begin // reg-reg indexed load regs_read.push_back('{rs2, rs2_value, 0}); @@ -637,7 +682,7 @@ class instr_trace_t; regs_read.push_back('{rs2, rs2_value, 0}); regs_read.push_back('{rs1, rs1_value, 0}); regs_write.push_back('{rs1, 'x, 0}); - str = $sformatf("cv.%-14s %s, %0d(x%0d!)", mnemonic, regAddrToStr(rs2), $signed(imm_s_type), rs1); + str = $sformatf("cv.%-14s %s, (x%0d), %0d", mnemonic, regAddrToStr(rs2), rs1, $signed(imm_s_type)); end else if (instr[31:28] == 4'b0010) begin if (instr[27] == 1'b0) begin // reg-reg post-incremented store @@ -645,7 +690,7 @@ class instr_trace_t; regs_read.push_back('{rs3, rs3_value, 0}); regs_read.push_back('{rs1, rs1_value, 0}); regs_write.push_back('{rs1, 'x, 0}); - str = $sformatf("cv.%-13s %s, %s(x%0d!)", mnemonic, regAddrToStr(rs2), regAddrToStr(rs3), rs1); + str = $sformatf("cv.%-13s %s, (x%0d), %s", mnemonic, regAddrToStr(rs2), rs1, regAddrToStr(rs3)); end else begin // reg-reg indexed store regs_read.push_back('{rs2, rs2_value, 0}); @@ -757,238 +802,429 @@ class instr_trace_t; else str_hb = ".h"; // set mnemonic - case (instr[31:26]) - 6'b000000: begin + case (instr) + INSTR_CVADDH , + INSTR_CVADDSCH , + INSTR_CVADDSCIH, + INSTR_CVADDB , + INSTR_CVADDSCB , + INSTR_CVADDSCIB : begin mnemonic = "cv.add"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000010: begin + INSTR_CVSUBH , + INSTR_CVSUBSCH , + INSTR_CVSUBSCIH, + INSTR_CVSUBB , + INSTR_CVSUBSCB , + INSTR_CVSUBSCIB : begin mnemonic = "cv.sub"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000100: begin + INSTR_CVAVGH , + INSTR_CVAVGSCH , + INSTR_CVAVGSCIH , + INSTR_CVAVGB , + INSTR_CVAVGSCB , + INSTR_CVAVGSCIB : begin mnemonic = "cv.avg"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000110: begin + INSTR_CVAVGUH , + INSTR_CVAVGUSCH , + INSTR_CVAVGUSCIH, + INSTR_CVAVGUB , + INSTR_CVAVGUSCB , + INSTR_CVAVGUSCIB : begin mnemonic = "cv.avgu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b001000: begin + INSTR_CVMINH , + INSTR_CVMINSCH , + INSTR_CVMINSCIH, + INSTR_CVMINB , + INSTR_CVMINSCB , + INSTR_CVMINSCIB : begin mnemonic = "cv.min"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001010: begin + INSTR_CVMINUH , + INSTR_CVMINUSCH , + INSTR_CVMINUSCIH, + INSTR_CVMINUB , + INSTR_CVMINUSCB , + INSTR_CVMINUSCIB : begin mnemonic = "cv.minu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b001100: begin + INSTR_CVMAXH , + INSTR_CVMAXSCH , + INSTR_CVMAXSCIH , + INSTR_CVMAXB , + INSTR_CVMAXSCB , + INSTR_CVMAXSCIB : begin mnemonic = "cv.max"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001110: begin + INSTR_CVMAXUH , + INSTR_CVMAXUSCH , + INSTR_CVMAXUSCIH , + INSTR_CVMAXUB , + INSTR_CVMAXUSCB , + INSTR_CVMAXUSCIB : begin mnemonic = "cv.maxu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b010000: begin + INSTR_CVSRLH , + INSTR_CVSRLSCH , + INSTR_CVSRLSCIH , + INSTR_CVSRLB , + INSTR_CVSRLSCB , + INSTR_CVSRLSCIB : begin mnemonic = "cv.srl"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b010010: begin + INSTR_CVSRAH , + INSTR_CVSRASCH , + INSTR_CVSRASCIH, + INSTR_CVSRAB , + INSTR_CVSRASCB , + INSTR_CVSRASCIB : begin mnemonic = "cv.sra"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b010100: begin + INSTR_CVSLLH , + INSTR_CVSLLSCH , + INSTR_CVSLLSCIH, + INSTR_CVSLLB , + INSTR_CVSLLSCB , + INSTR_CVSLLSCIB : begin mnemonic = "cv.sll"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b010110: begin + INSTR_CVORH , + INSTR_CVORSCH , + INSTR_CVORSCIH, + INSTR_CVORB , + INSTR_CVORSCB , + INSTR_CVORSCIB : begin mnemonic = "cv.or"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b011000: begin + INSTR_CVXORH , + INSTR_CVXORSCH , + INSTR_CVXORSCIH , + INSTR_CVXORB , + INSTR_CVXORSCB , + INSTR_CVXORSCIB : begin mnemonic = "cv.xor"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b011010: begin + INSTR_CVANDH , + INSTR_CVANDSCH , + INSTR_CVANDSCIH , + INSTR_CVANDB , + INSTR_CVANDSCB , + INSTR_CVANDSCIB : begin mnemonic = "cv.and"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b011100: begin + INSTR_CVABSH, + INSTR_CVABSB : begin mnemonic = "cv.abs"; str_imm = $sformatf("0x%0h", imm_vs_type); end // dot products - 6'b100000: begin + INSTR_CVDOTUPH , + INSTR_CVDOTUPSCH , + INSTR_CVDOTUPSCIH, + INSTR_CVDOTUPB , + INSTR_CVDOTUPSCB , + INSTR_CVDOTUPSCIB : begin mnemonic = "cv.dotup"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b100010: begin + INSTR_CVDOTUSPH , + INSTR_CVDOTUSPSCH , + INSTR_CVDOTUSPSCIH, + INSTR_CVDOTUSPB , + INSTR_CVDOTUSPSCB , + INSTR_CVDOTUSPSCIB : begin mnemonic = "cv.dotusp"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b100100: begin + INSTR_CVDOTSPH , + INSTR_CVDOTSPSCH , + INSTR_CVDOTSPSCIH, + INSTR_CVDOTSPB , + INSTR_CVDOTSPSCB , + INSTR_CVDOTSPSCIB : begin mnemonic = "cv.dotsp"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b100110: begin + INSTR_CVSDOTUPH , + INSTR_CVSDOTUPSCH , + INSTR_CVSDOTUPSCIH, + INSTR_CVSDOTUPB , + INSTR_CVSDOTUPSCB , + INSTR_CVSDOTUPSCIB : begin mnemonic = "cv.sdotup"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b101000: begin + INSTR_CVSDOTUSPH , + INSTR_CVSDOTUSPSCH , + INSTR_CVSDOTUSPSCIH, + INSTR_CVSDOTUSPB , + INSTR_CVSDOTUSPSCB , + INSTR_CVSDOTUSPSCIB : begin mnemonic = "cv.sdotusp"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b101010: begin + INSTR_CVSDOTSPH , + INSTR_CVSDOTSPSCH , + INSTR_CVSDOTSPSCIH, + INSTR_CVSDOTSPB , + INSTR_CVSDOTSPSCB , + INSTR_CVSDOTSPSCIB : begin mnemonic = "cv.sdotsp"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b101110: begin - case (instr[14:13]) - 2'b00 : begin - mnemonic = "cv.extract"; - str_imm = $sformatf("0x%0h", imm_vs_type); - end - 2'b01 : begin - mnemonic = "cv.extractu"; - str_imm = $sformatf("0x%0h", imm_vu_type); - end - 2'b10 : begin - mnemonic = "cv.insert"; - str_imm = $sformatf("0x%0h", imm_vs_type); - end - endcase - str_sci = ""; + INSTR_CVEXTRACTH, + INSTR_CVEXTRACTB : begin + mnemonic = "cv.extract"; + str_imm = $sformatf("0x%0h", imm_vs_type); + str_sci = ""; + end + INSTR_CVEXTRACTUH, + INSTR_CVEXTRACTUB : begin + mnemonic = "cv.extractu"; + str_imm = $sformatf("0x%0h", imm_vu_type); + str_sci = ""; + end + INSTR_CVINSERTH, + INSTR_CVINSERTB : begin + mnemonic = "cv.insert"; + str_imm = $sformatf("0x%0h", imm_vs_type); + str_sci = ""; end // shuffle/pack - 6'b110000: begin - if (instr[14:12] == 3'b111) begin - mnemonic = "cv.shuffleI0"; - str_imm = $sformatf("0x%8h", imm_shuffle_type); - end else begin + INSTR_CVSHUFFLEH , + INSTR_CVSHUFFLESCIH, + INSTR_CVSHUFFLEB : begin mnemonic = "cv.shuffle"; if (instr[14:12] == 3'b110) begin str_imm = $sformatf("0x%8h", imm_shuffle_type); end - end end - 6'b110010: begin + + INSTR_CVSHUFFLEL0SCIB : begin + mnemonic = "cv.shuffleI0"; + str_imm = $sformatf("0x%8h", imm_shuffle_type); + end + INSTR_CVSHUFFLEL1SCIB : begin mnemonic = "cv.shuffleI1"; str_imm = $sformatf("0x%8h", imm_shuffle_type); end - 6'b110100: begin + INSTR_CVSHUFFLEL2SCIB : begin mnemonic = "cv.shuffleI2"; str_imm = $sformatf("0x%8h", imm_shuffle_type); end - 6'b110110: begin + INSTR_CVSHUFFLEL3SCIB : begin mnemonic = "cv.shuffleI3"; str_imm = $sformatf("0x%8h", imm_shuffle_type); end - 6'b111000: begin + INSTR_CVSHUFFLE2H, + INSTR_CVSHUFFLE2B : begin mnemonic = "cv.shuffle2"; end - 6'b111100: begin + INSTR_CVPACK, + INSTR_CVPACKH : begin mnemonic = "cv.pack"; if (instr[25] == 1'b0) begin str_hb = ""; end end - 6'b111110: begin - mnemonic = instr[25] ? "cv.packhi" : "cv.packlo"; - end + INSTR_CVPACKHIB : mnemonic = "cv.packhi"; + INSTR_CVPACKLOB : mnemonic = "cv.packlo"; // comparisons - 6'b000001: begin + INSTR_CVCMPEQH , + INSTR_CVCMPEQSCH , + INSTR_CVCMPEQSCIH, + INSTR_CVCMPEQB , + INSTR_CVCMPEQSCB , + INSTR_CVCMPEQSCIB : begin mnemonic = "cv.cmpeq"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000011: begin + INSTR_CVCMPNEH , + INSTR_CVCMPNESCH , + INSTR_CVCMPNESCIH, + INSTR_CVCMPNEB , + INSTR_CVCMPNESCB , + INSTR_CVCMPNESCIB : begin mnemonic = "cv.cmpne"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000101: begin + INSTR_CVCMPGTH , + INSTR_CVCMPGTSCH , + INSTR_CVCMPGTSCIH, + INSTR_CVCMPGTB , + INSTR_CVCMPGTSCB , + INSTR_CVCMPGTSCIB : begin mnemonic = "cv.cmpgt"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000111: begin + INSTR_CVCMPGEH , + INSTR_CVCMPGESCH , + INSTR_CVCMPGESCIH, + INSTR_CVCMPGEB , + INSTR_CVCMPGESCB , + INSTR_CVCMPGESCIB : begin mnemonic = "cv.cmpge"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001001: begin + INSTR_CVCMPLTH , + INSTR_CVCMPLTSCH , + INSTR_CVCMPLTSCIH, + INSTR_CVCMPLTB , + INSTR_CVCMPLTSCB , + INSTR_CVCMPLTSCIB : begin mnemonic = "cv.cmplt"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001011: begin + INSTR_CVCMPLEH , + INSTR_CVCMPLESCH , + INSTR_CVCMPLESCIH, + INSTR_CVCMPLEB , + INSTR_CVCMPLESCB , + INSTR_CVCMPLESCIB : begin mnemonic = "cv.cmple"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001101: begin + INSTR_CVCMPGTUH , + INSTR_CVCMPGTUSCH , + INSTR_CVCMPGTUSCIH, + INSTR_CVCMPGTUB , + INSTR_CVCMPGTUSCB , + INSTR_CVCMPGTUSCIB : begin mnemonic = "cv.cmpgtu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b001111: begin + INSTR_CVCMPGEUH , + INSTR_CVCMPGEUSCH , + INSTR_CVCMPGEUSCIH, + INSTR_CVCMPGEUB , + INSTR_CVCMPGEUSCB , + INSTR_CVCMPGEUSCIB : begin mnemonic = "cv.cmpgeu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b010001: begin + INSTR_CVCMPLTUH , + INSTR_CVCMPLTUSCH , + INSTR_CVCMPLTUSCIH, + INSTR_CVCMPLTUB , + INSTR_CVCMPLTUSCB , + INSTR_CVCMPLTUSCIB : begin mnemonic = "cv.cmpltu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b010011: begin + INSTR_CVCMPLEUH , + INSTR_CVCMPLEUSCH , + INSTR_CVCMPLEUSCIH, + INSTR_CVCMPLEUB , + INSTR_CVCMPLEUSCB , + INSTR_CVCMPLEUSCIB : begin mnemonic = "cv.cmpleu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b010101: begin - unique case (instr[14:13]) - 2'b00: mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r" : "cv.cplxmul.i"; - 2'b01: mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div2" : "cv.cplxmul.i.div2"; - 2'b10: mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div4" : "cv.cplxmul.i.div4"; - 2'b11: mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div8" : "cv.cplxmul.i.div8"; - endcase + INSTR_CVCPLXMULR, + INSTR_CVCPLXMULI : begin + mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r" : "cv.cplxmul.i"; str_sci = ""; str_hb = ""; end - - 6'b010111: begin - mnemonic = "cv.cplxconj"; - str_sci = ""; + INSTR_CVCPLXMULRDIV2, + INSTR_CVCPLXMULIDIV2 : begin + mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div2" : "cv.cplxmul.i.div2"; + str_sci = ""; str_hb = ""; end - - 6'b011001: begin - unique case (instr[14:13]) - 2'b00: mnemonic = "cv.subrotmj"; - 2'b01: mnemonic = "cv.subrotmj.div2"; - 2'b10: mnemonic = "cv.subrotmj.div4"; - 2'b11: mnemonic = "cv.subrotmj.div8"; - endcase + INSTR_CVCPLXMULRDIV4, + INSTR_CVCPLXMULIDIV4 : begin + mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div4" : "cv.cplxmul.i.div4"; str_sci = ""; str_hb = ""; end - - 6'b011011: begin - unique case (instr[14:13]) - 2'b01: mnemonic = "cv.add.div2"; - 2'b10: mnemonic = "cv.add.div4"; - 2'b11: mnemonic = "cv.add.div8"; - endcase + INSTR_CVCPLXMULRDIV8, + INSTR_CVCPLXMULIDIV8 : begin + mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div8" : "cv.cplxmul.i.div8"; str_sci = ""; str_hb = ""; end - 6'b011101: begin - unique case (instr[14:13]) - 2'b01: mnemonic = "cv.sub.div2"; - 2'b10: mnemonic = "cv.sub.div4"; - 2'b11: mnemonic = "cv.sub.div8"; - endcase - str_sci = ""; + INSTR_CVCPLXCONJ : begin + mnemonic = "cv.cplxconj"; + str_sci = ""; str_hb = ""; end + INSTR_CVSUBROTMJ : begin + mnemonic = "cv.subrotmj"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBROTMJDIV2 : begin + mnemonic = "cv.subrotmj.div2"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBROTMJDIV4 : begin + mnemonic = "cv.subrotmj.div4"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBROTMJDIV8 : begin + mnemonic = "cv.subrotmj.div8"; + str_sci = ""; + str_hb = ""; + end + + INSTR_CVADDIV2 : begin + mnemonic = "cv.add.div2"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVADDIV4 : begin + mnemonic = "cv.add.div4"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVADDIV8 : begin + mnemonic = "cv.add.div8"; + str_sci = ""; + str_hb = ""; + end + + INSTR_CVSUBIV2 : begin + mnemonic = "cv.sub.div2"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBIV4 : begin + mnemonic = "cv.sub.div4"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBIV8 : begin + mnemonic = "cv.sub.div8"; + str_sci = ""; + str_hb = ""; + end + default: begin printMnemonic("INVALID"); return; diff --git a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_rvfi.sv b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_rvfi.sv index 4effb2b86..3675a950d 100644 --- a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_rvfi.sv +++ b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_rvfi.sv @@ -1,24 +1,28 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// CV32E40P RVFI interface -// -// Contributors: Davide Schiavone, OpenHW Group -// Halfdan Bechmann, Silicon Labs -// Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Davide Schiavone, OpenHW Group // +// Halfdan Bechmann, Silicon Labs // +// Yoann Pruvost, Dolphin Design // +// // +// Description: CV32E40P RVFI interface // +// // +//////////////////////////////////////////////////////////////////////////////////// `include "cv32e40px_rvfi_pkg.sv" @@ -27,7 +31,8 @@ module cv32e40px_rvfi import cv32e40px_rvfi_pkg::*; #( parameter FPU = 0, - parameter ZFINX = 0 + parameter ZFINX = 0, + parameter NUM_MHPMCOUNTERS = 1 ) ( input logic clk_i, input logic rst_ni, @@ -290,7 +295,7 @@ module cv32e40px_rvfi // performance counters // cycle, instret, hpcounter, cycleh, instreth, hpcounterh // mcycle, minstret, mhpcounter, mcycleh, minstreth, mhpcounterh - input logic [31:0][MHPMCOUNTER_WIDTH-1:0] csr_mhpmcounter_q_i, + input logic [63:0][MHPMCOUNTER_WIDTH-1:0] csr_mhpmcounter_q_i, input logic [31:0] csr_mhpmcounter_write_lower_i, input logic [31:0] csr_mhpmcounter_write_upper_i, @@ -327,6 +332,10 @@ module cv32e40px_rvfi // the convention of RISC-V Formal Interface Specification. output logic [ 0:0] rvfi_valid, output logic [63:0] rvfi_order, + output integer rvfi_start_cycle, + output time rvfi_start_time, + output integer rvfi_stop_cycle, + output time rvfi_stop_time, output logic [31:0] rvfi_insn, output rvfi_trap_t rvfi_trap, output logic [ 0:0] rvfi_halt, @@ -346,6 +355,7 @@ module cv32e40px_rvfi output logic rvfi_frd_wvalid [1:0], output logic [ 4:0] rvfi_frd_addr [1:0], output logic [31:0] rvfi_frd_wdata [1:0], + output logic rvfi_2_rd, output logic [ 4:0] rvfi_rs1_addr, output logic [ 4:0] rvfi_rs2_addr, output logic [ 4:0] rvfi_rs3_addr, @@ -366,8 +376,8 @@ module cv32e40px_rvfi output logic [31:0] rvfi_pc_wdata, output logic [31:0] rvfi_mem_addr, - output logic [ 3:0] rvfi_mem_rmask, - output logic [ 3:0] rvfi_mem_wmask, + output logic [31:0] rvfi_mem_rmask, + output logic [31:0] rvfi_mem_wmask, output logic [31:0] rvfi_mem_rdata, output logic [31:0] rvfi_mem_wdata, @@ -618,6 +628,13 @@ module cv32e40px_rvfi bit clk_i_d; assign #0.01 clk_i_d = clk_i; + integer cycles; + // cycle counter + always_ff @(posedge clk_i_d, negedge rst_ni) begin + if (rst_ni == 1'b0) cycles <= 0; + else cycles <= cycles + 1; + end + logic pc_mux_debug; logic pc_mux_dret; logic pc_mux_exception; @@ -626,6 +643,9 @@ module cv32e40px_rvfi logic pc_mux_nmi; localparam logic [31:0] MSTATUS_WRITE_MASK = 32'h0000_6088; + localparam logic [31:0] MCOUNTINHIBIT_WRITE_MASK = {{(29-NUM_MHPMCOUNTERS){1'b0}}, {(NUM_MHPMCOUNTERS){1'b1}}, 3'b101}; + localparam NUM_HPM_EVENTS = 16; + localparam logic [31:0] MHPMEVENT_WRITE_MASK = {{(31-NUM_HPM_EVENTS){1'b0}}, {(NUM_HPM_EVENTS){1'b1}}}; `include "pipe_freeze_trace.sv" @@ -747,6 +767,10 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end rvfi_order = new_rvfi_trace.m_order; + rvfi_start_cycle = new_rvfi_trace.m_start_cycle; + rvfi_start_time = new_rvfi_trace.m_start_time; + rvfi_stop_cycle = new_rvfi_trace.m_stop_cycle; + rvfi_stop_time = new_rvfi_trace.m_stop_time; rvfi_pc_rdata = new_rvfi_trace.m_pc_rdata; rvfi_insn = new_rvfi_trace.m_insn; @@ -801,6 +825,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; rvfi_frd_addr[1] = '0; rvfi_frd_wdata[1] = '0; + rvfi_2_rd = new_rvfi_trace.m_2_rd_insn; if (new_rvfi_trace.m_rd_addr[0][5] == 1'b0) begin rvfi_rd_addr[0] = new_rvfi_trace.m_rd_addr[0][4:0]; rvfi_rd_wdata[0] = new_rvfi_trace.m_rd_wdata[0]; @@ -905,15 +930,50 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `SET_RVFI_CSR_FROM_INSN(misa) `SET_RVFI_CSR_FROM_INSN(mie) `SET_RVFI_CSR_FROM_INSN(mtvec) - `SET_RVFI_CSR_FROM_INSN(mcountinhibit) + + rvfi_csr_mcountinhibit_rdata = new_rvfi_trace.m_csr.mcountinhibit_rdata; + rvfi_csr_mcountinhibit_rmask = new_rvfi_trace.m_csr.mcountinhibit_rmask; + rvfi_csr_mcountinhibit_wdata = new_rvfi_trace.m_csr.mcountinhibit_wdata; + rvfi_csr_mcountinhibit_wmask = new_rvfi_trace.m_csr.mcountinhibit_wmask & MCOUNTINHIBIT_WRITE_MASK; + `SET_RVFI_CSR_FROM_INSN(mscratch) `SET_RVFI_CSR_FROM_INSN(mepc) `SET_RVFI_CSR_FROM_INSN(mcause) + `SET_RVFI_CSR_FROM_INSN(mcycle) `SET_RVFI_CSR_FROM_INSN(minstret) + `SET_RVFI_CSR_FROM_INSN(minstreth) + + // `SET_RVFI_CSR_FROM_INSN(cycle) + // `SET_RVFI_CSR_FROM_INSN(instret) + rvfi_csr_instret_rdata = new_rvfi_trace.m_csr.minstret_rdata; + rvfi_csr_instret_rmask = new_rvfi_trace.m_csr.minstret_rmask; + rvfi_csr_instret_wdata = new_rvfi_trace.m_csr.minstret_wdata; + rvfi_csr_instret_wmask = new_rvfi_trace.m_csr.minstret_wmask; + + for(int idx=3; idx<32; idx++) begin + rvfi_csr_mhpmcounter_rmask[idx] = new_rvfi_trace.m_csr.mhpmcounter_rmask[idx][31:0]; + rvfi_csr_mhpmcounter_wmask[idx] = new_rvfi_trace.m_csr.mhpmcounter_wmask[idx][31:0]; + rvfi_csr_mhpmcounter_rdata[idx] = new_rvfi_trace.m_csr.mhpmcounter_rdata[idx][31:0]; + rvfi_csr_mhpmcounter_wdata[idx] = new_rvfi_trace.m_csr.mhpmcounter_wdata[idx][31:0]; + + rvfi_csr_mhpmcounterh_rmask[idx] = new_rvfi_trace.m_csr.mhpmcounter_rmask[idx][63:32]; + rvfi_csr_mhpmcounterh_wmask[idx] = new_rvfi_trace.m_csr.mhpmcounter_wmask[idx][63:32]; + rvfi_csr_mhpmcounterh_rdata[idx] = new_rvfi_trace.m_csr.mhpmcounter_rdata[idx][63:32]; + rvfi_csr_mhpmcounterh_wdata[idx] = new_rvfi_trace.m_csr.mhpmcounter_wdata[idx][63:32]; + + rvfi_csr_mhpmevent_rmask[idx] = new_rvfi_trace.m_csr.mhpmevent_rmask[idx]; + rvfi_csr_mhpmevent_wmask[idx] = new_rvfi_trace.m_csr.mhpmevent_wmask[idx] & MHPMEVENT_WRITE_MASK; + rvfi_csr_mhpmevent_rdata[idx] = new_rvfi_trace.m_csr.mhpmevent_rdata[idx]; + rvfi_csr_mhpmevent_wdata[idx] = new_rvfi_trace.m_csr.mhpmevent_wdata[idx]; + end + // `SET_RVFI_CSR_FROM_INSN(instreth) + rvfi_csr_instreth_rdata = new_rvfi_trace.m_csr.minstreth_rdata; + rvfi_csr_instreth_rmask = new_rvfi_trace.m_csr.minstreth_rmask; + rvfi_csr_instreth_wdata = new_rvfi_trace.m_csr.minstreth_wdata; + rvfi_csr_instreth_wmask = new_rvfi_trace.m_csr.minstreth_wmask; + `SET_RVFI_CSR_FROM_INSN(mip) - // if(rvfi_order == 64'h00000000_00000167) begin - // rvfi_csr_mip_rdata = 32'h0010_0000; - // end + rvfi_csr_tdata_rdata[0] = 'Z; rvfi_csr_tdata_rmask[0] = '0; // Does not exist rvfi_csr_tdata_wdata[0] = 'Z; // Does not exist @@ -959,36 +1019,134 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; endfunction // set_rvfi - function void minstret_to_id(); - trace_id.m_csr.minstret_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]; - trace_id.m_csr.minstret_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2]; - trace_id.m_csr.minstret_rmask = '1; - trace_id.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q; - trace_id.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; + function void sample_perf_counter_to_id(int idx); + trace_id.m_csr.mhpmcounter_rdata[idx][31:0] = r_pipe_freeze_trace.csr.mhpmcounter_q[idx][31:0]; + trace_id.m_csr.mhpmcounter_rmask[idx][31:0] = '1; endfunction - function void minstret_to_ex(); - trace_ex.m_csr.minstret_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]; - trace_ex.m_csr.minstret_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2]; - trace_ex.m_csr.minstret_rmask = '1; - trace_ex.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q; - trace_ex.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; + function void perf_counter_to_id(int idx); + if(!trace_id.m_csr.mhpmcounter_we[idx][0]) begin + trace_id.m_csr.mhpmcounter_wdata[idx][31:0] = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]) begin + trace_id.m_csr.mhpmcounter_we[idx][0] = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]; + trace_id.m_csr.mhpmcounter_wdata[idx][31:0] = r_pipe_freeze_trace.csr.wdata_int; + trace_id.m_csr.mhpmcounter_wmask[idx][31:0] = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx] ? '1 : '0; + end + sample_perf_counter_to_id(idx); endfunction - function void tinfo_to_id(); - trace_id.m_csr.tinfo_we = '0; // READ ONLY csr_tinfo_we_i; - trace_id.m_csr.tinfo_rdata = r_pipe_freeze_trace.csr.tinfo_q; - trace_id.m_csr.tinfo_rmask = '1; - trace_id.m_csr.tinfo_wdata = r_pipe_freeze_trace.csr.tinfo_n; - trace_id.m_csr.tinfo_wmask = '0; + function void sample_perf_event_to_trace(int idx, insn_trace_t m_trace); + m_trace.m_csr.mhpmevent_rdata[idx] = r_pipe_freeze_trace.csr.mhpmevent_q[idx]; + m_trace.m_csr.mhpmevent_rmask[idx] = '1; endfunction - function void tinfo_to_ex(); - trace_ex.m_csr.tinfo_we = '0; // READ ONLY csr_tinfo_we_i; - trace_ex.m_csr.tinfo_rdata = r_pipe_freeze_trace.csr.tinfo_q; - trace_ex.m_csr.tinfo_rmask = '1; - trace_ex.m_csr.tinfo_wdata = r_pipe_freeze_trace.csr.tinfo_n; - trace_ex.m_csr.tinfo_wmask = '0; + function void perf_event_to_trace(int idx, insn_trace_t m_trace); + if(!m_trace.m_csr.mhpmevent_we[idx]) begin + m_trace.m_csr.mhpmevent_wdata[idx] = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmevent_we[idx]) begin + m_trace.m_csr.mhpmevent_we[idx] = r_pipe_freeze_trace.csr.mhpmevent_we[idx]; + m_trace.m_csr.mhpmevent_wdata[idx] = r_pipe_freeze_trace.csr.wdata_int; + m_trace.m_csr.mhpmevent_wmask[idx] = r_pipe_freeze_trace.csr.mhpmevent_we[idx] ? '1 : '0; + end + sample_perf_event_to_trace(idx, m_trace); + endfunction + + function void sample_minstret_to_trace(insn_trace_t m_trace); + m_trace.m_csr.minstret_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2][31:0]; + m_trace.m_csr.minstret_rmask = '1; + endfunction + + function void minstret_to_trace(insn_trace_t m_trace); + if(!m_trace.m_csr.minstret_we) begin + m_trace.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]) begin + m_trace.m_csr.minstret_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]; + m_trace.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.wdata_int; + m_trace.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; + end + sample_minstret_to_trace(m_trace); + endfunction + + function void sample_perf_counter_h_to_id(int idx); + trace_id.m_csr.mhpmcounter_rdata[idx][63:32] = r_pipe_freeze_trace.csr.mhpmcounter_q[idx][63:0]; + trace_id.m_csr.mhpmcounter_rmask[idx][63:32] = '1; + endfunction + + function void perf_counter_h_to_id(int idx); + if(!trace_id.m_csr.mhpmcounter_we[idx][1]) begin + trace_id.m_csr.mhpmcounter_wdata[idx][63:32] = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]) begin + trace_id.m_csr.mhpmcounter_we[idx][1] = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]; + trace_id.m_csr.mhpmcounter_wdata[idx][63:32] = r_pipe_freeze_trace.csr.wdata_int; + trace_id.m_csr.mhpmcounter_wmask[idx][63:32] = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx] ? '1 : '0; + end + sample_perf_counter_h_to_id(idx); + endfunction + + function void sample_minstreth_to_trace(insn_trace_t m_trace); + m_trace.m_csr.minstreth_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2][63:32]; + m_trace.m_csr.minstreth_rmask = '1; + endfunction + + function void sample_mcycle_to_trace(insn_trace_t m_trace); + m_trace.m_csr.mcycle_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[0]; + m_trace.m_csr.mcycle_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[0][31:0]; + m_trace.m_csr.mcycle_rmask = '1; + m_trace.m_csr.mcycle_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q[31:0]; + m_trace.m_csr.mcycle_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[0] ? '1 : '0; + endfunction + + function void minstreth_to_trace(insn_trace_t m_trace); + if(!m_trace.m_csr.minstreth_we) begin + m_trace.m_csr.minstreth_wdata = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_upper[2]) begin + m_trace.m_csr.minstreth_we = r_pipe_freeze_trace.csr.mhpmcounter_write_upper[2]; + m_trace.m_csr.minstreth_wdata = r_pipe_freeze_trace.csr.wdata_int; + m_trace.m_csr.minstreth_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_upper[2] ? '1 : '0; + end + sample_minstreth_to_trace(m_trace); + endfunction + + function void sample_perf_counter_to_trace(insn_trace_t m_trace); + sample_minstret_to_trace(m_trace); + sample_minstreth_to_trace(m_trace); + sample_mcycle_to_trace(m_trace); + for(int idx=3; idx<32; idx++)begin + sample_perf_event_to_trace(idx, m_trace); //TO CHANGE + end + endfunction + + function void perf_counter_to_trace(insn_trace_t m_trace); + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]) begin + minstret_to_trace(m_trace); + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_upper[2]) begin + minstreth_to_trace(m_trace); + end + for(int idx=3; idx<32; idx++) begin + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]) begin + perf_counter_to_id(idx); + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_upper[idx]) begin + perf_counter_h_to_id(idx); + end + if(r_pipe_freeze_trace.csr.mhpmevent_we[idx]) begin + perf_event_to_trace(idx, m_trace); + end + end + endfunction + + function void tinfo_to_trace(insn_trace_t m_trace); + m_trace.m_csr.tinfo_we = '0; // READ ONLY csr_tinfo_we_i; + m_trace.m_csr.tinfo_rdata = r_pipe_freeze_trace.csr.tinfo_q; + m_trace.m_csr.tinfo_rmask = '1; + m_trace.m_csr.tinfo_wdata = r_pipe_freeze_trace.csr.tinfo_n; + m_trace.m_csr.tinfo_wmask = '0; endfunction function void mtvec_to_id(); @@ -1083,8 +1241,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; lpcount1_to_id(); lpend1_to_id(); lpstart1_to_id(); - - endfunction bit s_was_flush; //debug exception is flagged as trap only if preceed by a flush @@ -1155,7 +1311,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; e_dev_commit_rf_to_ex_3, e_dev_commit_rf_to_ex_4, e_dev_commit_rf_to_ex_5; - event e_if_2_id_1, e_if_2_id_2, e_if_2_id_3; + event e_if_2_id_1, e_if_2_id_2, e_if_2_id_3, e_if_2_id_4; event e_ex_to_wb_1, e_ex_to_wb_2; event e_id_to_ex_1, e_id_to_ex_2; event e_commit_dpc; @@ -1257,6 +1413,9 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end csr_to_apu_resp(); + + trace_apu_resp.m_stop_cycle = cycles; + trace_apu_resp.m_stop_time = $time; send_rvfi(trace_apu_resp); ->e_send_rvfi_trace_apu_resp; end @@ -1272,35 +1431,47 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_id_done; function void if_to_id(); if (trace_id.m_valid) begin - minstret_to_id(); `CSR_FROM_PIPE(id, misa) `CSR_FROM_PIPE(id, tdata1) `CSR_FROM_PIPE(id, tdata2) - tinfo_to_id(); + tinfo_to_trace(trace_id); `CSR_FROM_PIPE(id, mip) send_rvfi(trace_id); end trace_id.init(trace_if); trace_id.m_trap = ~r_pipe_freeze_trace.minstret; - trace_id.m_is_illegal = r_pipe_freeze_trace.is_illegal; + trace_id.m_is_illegal = trace_id.m_is_illegal | r_pipe_freeze_trace.is_illegal; + `CSR_FROM_PIPE(id, dpc) s_is_pc_set = 1'b0; s_is_irq_start = 1'b0; trace_if.m_valid = 1'b0; s_id_done = 1'b0; - `CSR_FROM_PIPE(id, dpc) endfunction function logic [31:0] be_to_mask(logic [3:0] be); logic [31:0] mask; - mask[7:0] = be[0] ? 8'hFF : 8'h00; - mask[15:8] = be[0] ? 8'hFF : 8'h00; - mask[23:16] = be[0] ? 8'hFF : 8'h00; - mask[31:24] = be[0] ? 8'hFF : 8'h00; + mask[7:0] = (be[0] == 1'b1) ? 8'hFF : 8'h00; + mask[15:8] = (be[1] == 1'b1) ? 8'hFF : 8'h00; + mask[23:16] = (be[2] == 1'b1) ? 8'hFF : 8'h00; + mask[31:24] = (be[3] == 1'b1) ? 8'hFF : 8'h00; be_to_mask = mask; return mask; endfunction + function void commit_rf_to_trace(insn_trace_t m_trace); + if (m_trace.m_got_ex_reg) begin + m_trace.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; + m_trace.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; + m_trace.m_2_rd_insn = 1'b1; + m_trace.m_got_first_data = 1'b1; + end else begin + m_trace.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; + m_trace.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; + m_trace.m_got_first_data = 1'b1; + end + endfunction + task compute_pipeline(); bit s_new_valid_insn; bit s_ex_valid_adjusted; @@ -1320,6 +1491,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_ex_reg_we_adjusted; //ex_reg_we bit s_rf_we_wb_adjusted; // + bit s_dont_override_mstatus_fs_id; + trace_if = new(); trace_id = new(); trace_ex = new(); @@ -1352,6 +1525,8 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_ex_reg_we_adjusted = 1'b0; s_rf_we_wb_adjusted = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; + forever begin wait(e_pipe_monitor_ok.triggered); // event triggered #1; @@ -1368,23 +1543,11 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end if (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_ID && r_pipe_freeze_trace.ebrk_insn_dec) begin - if (trace_wb.m_valid) begin - send_rvfi(trace_wb); - trace_wb.m_valid = 1'b0; - ->e_send_rvfi_trace_wb_1; - end - if (trace_ex.m_valid) begin - send_rvfi(trace_ex); - trace_ex.m_valid = 1'b0; - ->e_send_rvfi_trace_ex_1; - end if (trace_id.m_valid) begin - - minstret_to_id(); `CSR_FROM_PIPE(id, misa) `CSR_FROM_PIPE(id, tdata1) `CSR_FROM_PIPE(id, tdata2) - tinfo_to_id(); + tinfo_to_trace(trace_id); `CSR_FROM_PIPE(id, mip) end end @@ -1418,7 +1581,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (trace_ex.m_valid & s_wb_valid_adjusted) begin // Used flopped values in case write happened before wb_valid - minstret_to_ex(); + sample_perf_counter_to_trace(trace_ex); trace_ex.m_csr.got_minstret = '1; end @@ -1494,14 +1657,15 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end if (trace_ex.m_valid) begin - - if (!trace_ex.m_csr.got_minstret) begin - minstret_to_ex(); + if(trace_ex.m_instret_smaple_trigger == 1) begin //time to sample instret + sample_perf_counter_to_trace(trace_ex); end + trace_ex.m_instret_smaple_trigger = trace_ex.m_instret_smaple_trigger + 1; + `CSR_FROM_PIPE(ex, misa) `CSR_FROM_PIPE(ex, tdata1) `CSR_FROM_PIPE(ex, tdata2) - tinfo_to_ex(); + tinfo_to_trace(trace_ex); if (s_rf_we_wb_adjusted) begin ->e_dev_commit_rf_to_ex_4; @@ -1524,29 +1688,20 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_ex.m_valid = 1'b0; ->e_send_rvfi_trace_ex_2; end else begin - if (!s_ex_valid_adjusted & !trace_ex.m_csr.got_minstret) begin - minstret_to_ex(); - end if (s_rf_we_wb_adjusted) begin ->e_dev_commit_rf_to_ex_1; - if (trace_ex.m_got_ex_reg) begin - trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; - trace_ex.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_2_rd_insn = 1'b1; - trace_ex.m_got_first_data = 1'b1; - end else begin - trace_ex.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; - trace_ex.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_got_first_data = 1'b1; - end + commit_rf_to_trace(trace_ex); - if (r_pipe_freeze_trace.csr.fregs_we) begin + if (r_pipe_freeze_trace.csr.fregs_we && (r_pipe_freeze_trace.rf_we_wb && r_pipe_freeze_trace.rf_addr_wb[5])) begin //Catching mstatus_fs updates caused by flw `CSR_FROM_PIPE(ex, mstatus_fs) trace_ex.m_csr.mstatus_fs_we = 1'b1; trace_ex.m_csr.mstatus_fs_wmask = '1; if(r_pipe_freeze_trace.csr.we && r_pipe_freeze_trace.csr.mstatus_fs_we) begin //In this specific case, two writes to mstatus_fs happen at the same time. We need to recreate the writes caused by fregs_we trace_ex.m_csr.mstatus_fs_wdata = FS_DIRTY; + end else begin + trace_id.m_csr.mstatus_fs_rdata = trace_ex.m_csr.mstatus_fs_wdata; + s_dont_override_mstatus_fs_id = 1'b1; end ->e_fregs_dirty_3; end @@ -1559,9 +1714,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; ->e_ex_to_wb_1; trace_wb.move_down_pipe(trace_ex); end else begin - if (!trace_ex.m_csr.got_minstret) begin - minstret_to_ex(); - end send_rvfi(trace_ex); ->e_send_rvfi_trace_ex_6; end @@ -1570,31 +1722,41 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end else if (s_rf_we_wb_adjusted && !s_was_flush) begin ->e_dev_commit_rf_to_ex_2; - if (trace_ex.m_got_ex_reg) begin - trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; - trace_ex.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_2_rd_insn = 1'b1; - trace_ex.m_got_first_data = 1'b1; - end else begin - trace_ex.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; - trace_ex.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_got_first_data = 1'b1; - end + commit_rf_to_trace(trace_ex); end end // If mret, we need to keep the instruction in Id during flush_ex because mstatus update happens at that time - s_ex_valid_adjusted = (r_pipe_freeze_trace.ex_valid && r_pipe_freeze_trace.ex_ready) && (s_core_is_decoding || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF) || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_FLUSH) || ((r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_EX) && !r_pipe_freeze_trace.mret_insn_dec)) && (!r_pipe_freeze_trace.apu_rvalid || r_pipe_freeze_trace.data_req_ex); + s_ex_valid_adjusted = (r_pipe_freeze_trace.ex_valid && r_pipe_freeze_trace.ex_ready) && (s_core_is_decoding || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF) || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_ID) || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_FLUSH) || ((r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_EX) && !r_pipe_freeze_trace.mret_insn_dec)); //EX_STAGE + if (trace_id.m_valid) begin + if(trace_id.m_instret_smaple_trigger == 1) begin //time to sample instret + sample_perf_counter_to_trace(trace_id); + for(int idx=3; idx<32; idx++) begin + sample_perf_counter_to_id(idx); + sample_perf_counter_h_to_id(idx); + sample_perf_event_to_trace(idx, trace_id); + end + end + trace_id.m_instret_smaple_trigger = trace_id.m_instret_smaple_trigger + 1; if(trace_id.m_sample_csr_write_in_ex && !csr_is_irq && !s_is_irq_start) begin //First cycle after id_ready, csr write is asserted in this cycle `CSR_FROM_PIPE(id, mstatus) - `CSR_FROM_PIPE(id, mstatus_fs) + if(!s_dont_override_mstatus_fs_id) begin + `CSR_FROM_PIPE(id, mstatus_fs) + end `CSR_FROM_PIPE(id, mepc) `CSR_FROM_PIPE(id, mcause) `CSR_FROM_PIPE(id, dscratch0) `CSR_FROM_PIPE(id, dscratch1) + if(r_pipe_freeze_trace.csr.we && (r_pipe_freeze_trace.csr.addr == CSR_DPC)) begin + `CSR_FROM_PIPE(id, dpc) + end + + `CSR_FROM_PIPE(id, mcountinhibit) + + perf_counter_to_trace(trace_id); ->e_csr_in_ex; end @@ -1614,10 +1776,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(id, frm) `CSR_FROM_PIPE(id, fcsr) - if (r_pipe_freeze_trace.csr.we) begin - `CSR_FROM_PIPE(id, dpc) - end - if (r_pipe_freeze_trace.csr.dcsr_we) begin dcsr_to_id(); end @@ -1638,6 +1796,15 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_ex.m_csr.frm_wmask = '0; trace_ex.m_csr.fcsr_wmask = '0; + if(r_pipe_freeze_trace.ctrl_fsm_cs == XRET_JUMP) begin //xret exit pipeline + tinfo_to_trace(trace_id); + `CSR_FROM_PIPE(id, tdata1) + `CSR_FROM_PIPE(id, tdata2) + send_rvfi(trace_id); + trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; + end + if (r_pipe_freeze_trace.apu_req && r_pipe_freeze_trace.apu_gnt) begin trace_id.m_is_apu = 1'b1; trace_id.m_apu_req_id = cnt_apu_req; @@ -1647,6 +1814,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_apu_req.set_to_apu(); apu_trace_q.push_back(trace_apu_req); trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; if(r_pipe_freeze_trace.apu_rvalid && (cnt_apu_req == cnt_apu_resp)) begin//APU return in the same cycle apu_resp(); @@ -1702,6 +1870,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); // The instruction moves forward from ID to EX trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; ->e_id_to_ex_1; end else if (r_pipe_freeze_trace.ex_reg_we && r_pipe_freeze_trace.rf_alu_we_ex) begin @@ -1725,9 +1894,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (s_new_valid_insn) begin // There is a new valid instruction if (trace_id.m_valid) begin if (trace_ex.m_valid) begin - if (!trace_ex.m_csr.got_minstret) begin - minstret_to_ex(); - end if (trace_wb.m_valid) begin send_rvfi(trace_ex); ->e_send_rvfi_trace_ex_4; @@ -1769,6 +1935,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; ->e_id_to_ex_2; end if_to_id(); @@ -1786,18 +1953,24 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end //IF_STAGE - if (r_pipe_freeze_trace.if_valid && r_pipe_freeze_trace.if_ready) begin + if(trace_if.m_valid) begin + if(r_pipe_freeze_trace.is_illegal && r_pipe_freeze_trace.is_decoding) begin + trace_if.m_is_illegal = 1'b1; + end + end + + if (r_pipe_freeze_trace.if_valid && r_pipe_freeze_trace.if_ready && r_pipe_freeze_trace.instr_valid_if) begin if (trace_if.m_valid) begin if (r_pipe_freeze_trace.id_valid && r_pipe_freeze_trace.id_ready && !trace_id.m_valid && r_pipe_freeze_trace.ebrk_insn_dec) begin if_to_id(); trace_id.m_is_ebreak = '1; //trace_if.m_is_ebreak; ->e_if_2_id_2; - end else if (r_pipe_freeze_trace.is_illegal) begin + end else if (trace_if.m_is_illegal) begin if_to_id(); - trace_id.m_is_illegal = 1'b1; ->e_if_2_id_3; end else if (r_pipe_freeze_trace.ecall_insn_dec) begin if_to_id(); + ->e_if_2_id_4; end end diff --git a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_rvfi_trace.sv b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_rvfi_trace.sv index bfb025796..bdbe6127d 100644 --- a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_rvfi_trace.sv +++ b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_rvfi_trace.sv @@ -1,26 +1,31 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// CV32E40P RVFI interface -// -// Contributors: Halfdan Bechmann, Silicon Labs -// Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Halfdan Bechmann, Silicon Labs // +// Yoann Pruvost, Dolphin Design // +// // +// Description: CV32E40P RVFI tracer // +// // +//////////////////////////////////////////////////////////////////////////////////// module cv32e40px_rvfi_trace import cv32e40px_pkg::*; + import cv32e40px_rvfi_pkg::*; #( parameter FPU = 0, parameter ZFINX = 0 @@ -32,9 +37,14 @@ module cv32e40px_rvfi_trace input logic [31:0] imm_s3_type, - input logic rvfi_valid, - input logic [31:0] rvfi_insn, - input logic [31:0] rvfi_pc_rdata, + input logic rvfi_valid, + input logic [31:0] rvfi_insn, + input integer rvfi_start_cycle, + input time rvfi_start_time, + input integer rvfi_stop_cycle, + input time rvfi_stop_time, + input logic [31:0] rvfi_pc_rdata, + input rvfi_trap_t rvfi_trap, input logic [ 4:0] rvfi_rd_addr [1:0], input logic [31:0] rvfi_rd_wdata[1:0], @@ -42,6 +52,7 @@ module cv32e40px_rvfi_trace input logic rvfi_frd_wvalid[1:0], input logic [ 4:0] rvfi_frd_addr [1:0], input logic [31:0] rvfi_frd_wdata [1:0], + input logic rvfi_2_rd, input logic [ 4:0] rvfi_rs1_addr, input logic [ 4:0] rvfi_rs2_addr, @@ -61,8 +72,8 @@ module cv32e40px_rvfi_trace input logic [31:0] rvfi_frs3_rdata, input logic [31:0] rvfi_mem_addr, - input logic [ 3:0] rvfi_mem_rmask, - input logic [ 3:0] rvfi_mem_wmask, + input logic [31:0] rvfi_mem_rmask, + input logic [31:0] rvfi_mem_wmask, input logic [31:0] rvfi_mem_rdata, input logic [31:0] rvfi_mem_wdata ); @@ -74,7 +85,7 @@ module cv32e40px_rvfi_trace integer f; //file pointer string fn; - integer cycles; + // integer cycles; string info_tag; logic is_compressed; @@ -125,7 +136,13 @@ module cv32e40px_rvfi_trace rs3_value = rvfi_rs3_rdata; end - if (rvfi_frd_wvalid[0]) begin + if (rvfi_2_rd) begin + if (rvfi_frd_wvalid[1]) begin + rd = {1'b1, rvfi_frd_addr[1]}; + end else begin + rd = {1'b0, rvfi_rd_addr[1]}; + end + end else if (rvfi_frd_wvalid[0]) begin rd = {1'b1, rvfi_frd_addr[0]}; end else begin rd = {1'b0, rvfi_rd_addr[0]}; @@ -134,57 +151,69 @@ module cv32e40px_rvfi_trace assign rs4 = rs3; - assign imm_i_type = {{20{rvfi_insn[31]}}, rvfi_insn[31:20]}; - assign imm_iz_type = {20'b0, rvfi_insn[31:20]}; - assign imm_s_type = {{20{rvfi_insn[31]}}, rvfi_insn[31:25], rvfi_insn[11:7]}; + cv32e40p_compressed_decoder #( + .FPU(FPU) + ) rvfi_trace_decompress_i ( + .instr_i(rvfi_insn), + .instr_o(decomp_insn), + .is_compressed_o(is_compressed) + ); + + assign imm_i_type = {{20{decomp_insn[31]}}, decomp_insn[31:20]}; + assign imm_iz_type = {20'b0, decomp_insn[31:20]}; + assign imm_s_type = {{20{decomp_insn[31]}}, decomp_insn[31:25], decomp_insn[11:7]}; assign imm_sb_type = { - {19{rvfi_insn[31]}}, rvfi_insn[31], rvfi_insn[7], rvfi_insn[30:25], rvfi_insn[11:8], 1'b0 + {19{decomp_insn[31]}}, + decomp_insn[31], + decomp_insn[7], + decomp_insn[30:25], + decomp_insn[11:8], + 1'b0 }; - assign imm_u_type = {rvfi_insn[31:12], 12'b0}; + assign imm_u_type = {decomp_insn[31:12], 12'b0}; assign imm_uj_type = { - {12{rvfi_insn[31]}}, rvfi_insn[19:12], rvfi_insn[20], rvfi_insn[30:21], 1'b0 + {12{decomp_insn[31]}}, decomp_insn[19:12], decomp_insn[20], decomp_insn[30:21], 1'b0 }; - assign imm_z_type = '0; //{27'b0, rvfi_insn[REG_S1_MSB:REG_S1_LSB]}; + assign imm_z_type = '0; //{27'b0, decomp_insn[REG_S1_MSB:REG_S1_LSB]}; - assign imm_s2_type = {27'b0, rvfi_insn[24:20]}; + assign imm_s2_type = {27'b0, decomp_insn[24:20]}; assign imm_vs_type = '0; assign imm_vu_type = '0; assign imm_shuffle_type = '0; assign imm_clip_type = '0; - cv32e40px_compressed_decoder #( - .FPU(FPU) - ) rvfi_trace_decompress_i ( - .instr_i(rvfi_insn), - .instr_o(decomp_insn), - .is_compressed_o(is_compressed) - ); - `include "cv32e40px_instr_trace.svh" instr_trace_t trace_retire; function instr_trace_t trace_new_instr(); instr_trace_t trace; trace = new(); - trace.init(.cycles(cycles), .pc(rvfi_pc_rdata), .compressed(is_compressed), + trace.external_time = 1; + trace.simtime = rvfi_start_time - 1ns; + trace.stoptime = rvfi_stop_time; + trace.stopcycles = rvfi_stop_cycle; + trace.ctx = (rvfi_trap.trap) ? "(C)" : ""; + trace.init(.cycles(rvfi_start_cycle), .pc(rvfi_pc_rdata), .compressed(is_compressed), .instr(decomp_insn)); return trace; endfunction : trace_new_instr function void apply_reg_write(); foreach (trace_retire.regs_write[i]) begin - if (rvfi_frd_wvalid[0] && (trace_retire.regs_write[i].addr == {1'b1, rvfi_frd_addr[0]})) begin - trace_retire.regs_write[i].value = rvfi_frd_wdata[0]; - end else if (trace_retire.regs_write[i].addr == rvfi_rd_addr[0]) begin - trace_retire.regs_write[i].value = rvfi_rd_wdata[0]; - end if (rvfi_frd_wvalid[1] && (trace_retire.regs_write[i].addr == {1'b1, rvfi_frd_addr[1]})) begin trace_retire.regs_write[i].value = rvfi_frd_wdata[1]; end else if (trace_retire.regs_write[i].addr == rvfi_rd_addr[1]) begin trace_retire.regs_write[i].value = rvfi_rd_wdata[1]; end end + foreach (trace_retire.regs_write[i]) begin + if (rvfi_frd_wvalid[0] && (trace_retire.regs_write[i].addr == {1'b1, rvfi_frd_addr[0]})) begin + trace_retire.regs_write[i].value = rvfi_frd_wdata[0]; + end else if (trace_retire.regs_write[i].addr == rvfi_rd_addr[0]) begin + trace_retire.regs_write[i].value = rvfi_rd_wdata[0]; + end + end endfunction : apply_reg_write function void apply_mem_access(); @@ -202,11 +231,9 @@ instr_trace_t trace_retire; end endfunction : apply_mem_access - // cycle counter - always_ff @(posedge clk_i, negedge rst_ni) begin - if (rst_ni == 1'b0) cycles <= 0; - else cycles <= cycles + 1; - end + string insn_disas; + logic [31:0] insn_pc; + logic [31:0] insn_val; always @(posedge clk_i) begin if (rvfi_valid) begin @@ -214,6 +241,9 @@ instr_trace_t trace_retire; apply_reg_write(); apply_mem_access(); trace_retire.printInstrTrace(); + insn_disas = trace_retire.str; + insn_pc = trace_retire.pc; + insn_val = trace_retire.instr; end end @@ -223,7 +253,8 @@ instr_trace_t trace_retire; $sformat(info_tag, "CORE_TRACER %2d", hart_id_i); $display("[%s] Output filename is: %s", info_tag, fn); f = $fopen(fn, "w"); - $fwrite(f, "Time\tCycle\tPC\tInstr\tDecoded instruction\tRegister and memory contents\n"); + $fwrite(f, + " Time Cycle PC Instr Ctx Decoded instruction Register and memory contents Stop cycle Stop time\n"); end diff --git a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_tb_wrapper.sv b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_tb_wrapper.sv index d4aa94dcd..c14b131ba 100644 --- a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_tb_wrapper.sv +++ b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_tb_wrapper.sv @@ -1,45 +1,49 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// Wrapper for a cv32e40px, containing cv32e40px_top, and rvfi_tracer -// -// Contributors: Davide Schiavone, OpenHW Group -// Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Davide Schiavone, OpenHW Group // +// Yoann Pruvost, Dolphin Design // +// // +// Description: Test-bench wrapper for cv32e40px_top, tracer and and rvfi_tracer // +// // +//////////////////////////////////////////////////////////////////////////////////// -`ifdef CV32E40P_ASSERT_ON +`ifdef CV32E40PX_ASSERT_ON `include "cv32e40px_prefetch_controller_sva.sv" `endif -`ifdef CV32E40P_CORE_LOG +`ifdef CV32E40PX_CORE_LOG `include "cv32e40px_core_log.sv" `endif -`ifdef CV32E40P_APU_TRACE +`ifdef CV32E40PX_APU_TRACE `include "cv32e40px_apu_tracer.sv" `endif -`ifdef CV32E40P_TRACE_EXECUTION +`ifdef CV32E40PX_TRACE_EXECUTION `include "cv32e40px_tracer.sv" `endif -`ifdef CV32E40P_RVFI +`ifdef CV32E40PX_RVFI `include "cv32e40px_rvfi.sv" `endif -`ifdef CV32E40P_RVFI_TRACE_EXECUTION +`ifdef CV32E40PX_RVFI_TRACE_EXECUTION `include "cv32e40px_rvfi_trace.sv" `endif @@ -101,7 +105,7 @@ module cv32e40px_tb_wrapper output logic core_sleep_o ); -`ifdef CV32E40P_ASSERT_ON +`ifdef CV32E40PX_ASSERT_ON // RTL Assertions bind cv32e40px_prefetch_controller: @@ -114,9 +118,9 @@ module cv32e40px_tb_wrapper .FIFO_ADDR_DEPTH(FIFO_ADDR_DEPTH) ) prefetch_controller_sva (.*); -`endif // CV32E40P_ASSERT_ON +`endif // CV32E40PX_ASSERT_ON -`ifdef CV32E40P_CORE_LOG +`ifdef CV32E40PX_CORE_LOG cv32e40px_core_log #( .COREV_PULP (COREV_PULP), .COREV_CLUSTER (COREV_CLUSTER), @@ -130,9 +134,9 @@ module cv32e40px_tb_wrapper .hart_id_i (cv32e40px_top_i.core_i.hart_id_i), .pc_id_i (cv32e40px_top_i.core_i.pc_id) ); -`endif // CV32E40P_CORE_LOG +`endif // CV32E40PX_CORE_LOG -`ifdef CV32E40P_APU_TRACE +`ifdef CV32E40PX_APU_TRACE cv32e40px_apu_tracer apu_tracer_i ( .clk_i (cv32e40px_top_i.core_i.rst_ni), .rst_n (cv32e40px_top_i.core_i.clk_i), @@ -143,7 +147,7 @@ module cv32e40px_tb_wrapper ); `endif -`ifdef CV32E40P_TRACE_EXECUTION +`ifdef CV32E40PX_TRACE_EXECUTION cv32e40px_tracer #( .FPU (FPU), .ZFINX(ZFINX) @@ -210,11 +214,11 @@ module cv32e40px_tb_wrapper .apu_en_i (cv32e40px_top_i.apu_req), .apu_singlecycle_i(cv32e40px_top_i.core_i.ex_stage_i.apu_singlecycle), .apu_multicycle_i (cv32e40px_top_i.core_i.ex_stage_i.apu_multicycle), - .apu_rvalid_i (cv32e40px_top_i.apu_rvalid) + .apu_rvalid_i (cv32e40px_top_i.core_i.ex_stage_i.apu_valid) ); `endif -`ifdef CV32E40P_RVFI +`ifdef CV32E40PX_RVFI logic [1:0][31:0] hwlp_start_q; logic [1:0][31:0] hwlp_end_q; logic [1:0][31:0] hwlp_counter_q; @@ -234,8 +238,9 @@ module cv32e40px_tb_wrapper endgenerate cv32e40px_rvfi #( - .FPU (FPU), - .ZFINX(ZFINX) + .FPU(FPU), + .ZFINX(ZFINX), + .NUM_MHPMCOUNTERS(NUM_MHPMCOUNTERS) ) rvfi_i ( .clk_i (cv32e40px_top_i.core_i.clk_i), .rst_ni(cv32e40px_top_i.core_i.rst_ni), @@ -399,6 +404,9 @@ module cv32e40px_tb_wrapper .csr_mcountinhibit_n_i (cv32e40px_top_i.core_i.cs_registers_i.mcountinhibit_n), .csr_mcountinhibit_we_i(cv32e40px_top_i.core_i.cs_registers_i.mcountinhibit_we), + .csr_mhpmevent_n_i(cv32e40px_top_i.core_i.cs_registers_i.mhpmevent_n), + .csr_mhpmevent_q_i(cv32e40px_top_i.core_i.cs_registers_i.mhpmevent_q), + .csr_mhpmevent_we_i(cv32e40px_top_i.core_i.cs_registers_i.mhpmevent_we), .csr_mscratch_q_i(cv32e40px_top_i.core_i.cs_registers_i.mscratch_q), .csr_mscratch_n_i(cv32e40px_top_i.core_i.cs_registers_i.mscratch_n), .csr_mepc_q_i(cv32e40px_top_i.core_i.cs_registers_i.mepc_q), @@ -441,8 +449,7 @@ module cv32e40px_tb_wrapper ); `endif - -`ifdef CV32E40P_RVFI_TRACE_EXECUTION +`ifdef CV32E40PX_RVFI_TRACE_EXECUTION bind cv32e40px_rvfi: rvfi_i cv32e40px_rvfi_trace #( .FPU (FPU), .ZFINX(ZFINX) @@ -455,22 +462,38 @@ module cv32e40px_tb_wrapper .rvfi_valid(rvfi_valid), .rvfi_insn(rvfi_insn), + .rvfi_start_cycle(rvfi_start_cycle), + .rvfi_start_time(rvfi_start_time), + .rvfi_stop_cycle(rvfi_stop_cycle), + .rvfi_stop_time(rvfi_stop_time), .rvfi_pc_rdata(rvfi_pc_rdata), + .rvfi_trap(rvfi_trap), .rvfi_rd_addr(rvfi_rd_addr), .rvfi_rd_wdata(rvfi_rd_wdata), .rvfi_frd_wvalid(rvfi_frd_wvalid), .rvfi_frd_addr(rvfi_frd_addr), .rvfi_frd_wdata(rvfi_frd_wdata), + .rvfi_2_rd(rvfi_2_rd), .rvfi_rs1_addr(rvfi_rs1_addr), .rvfi_rs2_addr(rvfi_rs2_addr), + .rvfi_rs3_addr(rvfi_rs3_addr), .rvfi_rs1_rdata(rvfi_rs1_rdata), .rvfi_rs2_rdata(rvfi_rs2_rdata), + .rvfi_rs3_rdata(rvfi_rs3_rdata), .rvfi_frs1_addr(rvfi_frs1_addr), .rvfi_frs2_addr(rvfi_frs2_addr), + .rvfi_frs3_addr(rvfi_frs3_addr), .rvfi_frs1_rvalid(rvfi_frs1_rvalid), .rvfi_frs2_rvalid(rvfi_frs2_rvalid), + .rvfi_frs3_rvalid(rvfi_frs3_rvalid), .rvfi_frs1_rdata(rvfi_frs1_rdata), - .rvfi_frs2_rdata(rvfi_frs2_rdata) + .rvfi_frs2_rdata(rvfi_frs2_rdata), + .rvfi_frs3_rdata(rvfi_frs3_rdata), + .rvfi_mem_addr(rvfi_mem_addr), + .rvfi_mem_rmask(rvfi_mem_rmask), + .rvfi_mem_wmask(rvfi_mem_wmask), + .rvfi_mem_rdata(rvfi_mem_rdata), + .rvfi_mem_wdata(rvfi_mem_wdata) ); `endif // Instantiate the Core and the optinal FPU diff --git a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_tracer.sv b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_tracer.sv index c0c13315b..9254cceac 100644 --- a/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_tracer.sv +++ b/hw/vendor/esl_epfl_cv32e40px/bhv/cv32e40px_tracer.sv @@ -1,24 +1,26 @@ -// Copyright (c) 2020 OpenHW Group -// -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://solderpad.org/licenses/ -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 - -// Traces the executed instructions -// -// Contributors: Andreas Traber, ETHZ -// Davide Schiavone, OpenHW Group -// Pascal Gouedo, Dolphin Design +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Engineer: Andreas Traber - atraber@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// Davide Schiavone - pschiavo@iis.ee.ethz.ch // +// // +// Design Name: RISC-V Tracer // +// Project Name: RI5CY // +// Language: SystemVerilog // +// // +// Description: Traces the executed instructions // +// // +//////////////////////////////////////////////////////////////////////////////// `ifdef CV32E40P_TRACE_EXECUTION @@ -183,7 +185,8 @@ module cv32e40px_tracer $sformat(info_tag, "CORE_TRACER %2d", hart_id_i); $display("[%s] Output filename is: %s", info_tag, fn); f = $fopen(fn, "w"); - $fwrite(f, "Time\tCycle\tPC\tInstr\tDecoded instruction\tRegister and memory contents\n"); + $fwrite(f, + " Time Cycle PC Instr Ctx Decoded instruction Register and memory contents\n"); end //initial begin diff --git a/hw/vendor/esl_epfl_cv32e40px/bhv/include/cv32e40px_rvfi_pkg.sv b/hw/vendor/esl_epfl_cv32e40px/bhv/include/cv32e40px_rvfi_pkg.sv index 9a131a68b..5a43daf64 100644 --- a/hw/vendor/esl_epfl_cv32e40px/bhv/include/cv32e40px_rvfi_pkg.sv +++ b/hw/vendor/esl_epfl_cv32e40px/bhv/include/cv32e40px_rvfi_pkg.sv @@ -1,24 +1,28 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// Includes to print info about the RVFI output -// -// Contributors: Davide Schiavone, OpenHW Group -// Halfdan Bechmann, Silicon Labs -// Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Davide Schiavone, OpenHW Group // +// Halfdan Bechmann, Silicon Labs // +// Yoann Pruvost, Dolphin Design // +// // +// Description: Package to print info on RVFI interface // +// // +//////////////////////////////////////////////////////////////////////////////////// package cv32e40px_rvfi_pkg; import cv32e40px_pkg::*; diff --git a/hw/vendor/esl_epfl_cv32e40px/bhv/include/cv32e40px_tracer_pkg.sv b/hw/vendor/esl_epfl_cv32e40px/bhv/include/cv32e40px_tracer_pkg.sv index 90ee5be9d..2046dd740 100644 --- a/hw/vendor/esl_epfl_cv32e40px/bhv/include/cv32e40px_tracer_pkg.sv +++ b/hw/vendor/esl_epfl_cv32e40px/bhv/include/cv32e40px_tracer_pkg.sv @@ -1,23 +1,13 @@ -// Copyright (c) 2020 OpenHW Group -// -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://solderpad.org/licenses/ -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 - -// Tracer package -// -// Contributors: Steve Richmond, Silicon Labs -// Pascal Gouedo, Dolphin Design +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + package cv32e40px_tracer_pkg; import cv32e40px_pkg::*; diff --git a/hw/vendor/esl_epfl_cv32e40px/bhv/insn_trace.sv b/hw/vendor/esl_epfl_cv32e40px/bhv/insn_trace.sv index 3fe7c1848..8cdc06d9e 100644 --- a/hw/vendor/esl_epfl_cv32e40px/bhv/insn_trace.sv +++ b/hw/vendor/esl_epfl_cv32e40px/bhv/insn_trace.sv @@ -1,5 +1,26 @@ -// Copyright 2022 Dolphin Design -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// Copyright 2024 OpenHW Group and Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Yoann Pruvost, Dolphin Design // +// // +// Description: Macros and Functions to print information on RVFI interface // +// // +//////////////////////////////////////////////////////////////////////////////////// `define DEFINE_CSR(CSR_NAME) \ logic ``CSR_NAME``_we; \ @@ -23,6 +44,10 @@ class insn_trace_t; bit m_valid; logic [63:0] m_order; + integer m_start_cycle; + integer m_stop_cycle; + time m_start_time; + time m_stop_time; bit m_skip_order; //next order was used by trap; logic [31:0] m_pc_rdata; logic [31:0] m_insn; @@ -65,14 +90,15 @@ bit m_move_down_pipe; int m_instret_cnt; + int m_instret_smaple_trigger; //We need to sample minstret from csr 2 cycle after id is doen bit m_sample_csr_write_in_ex; struct { logic [31:0] addr ; - logic [ 3:0] rmask; + logic [31:0] rmask; logic [31:0] rdata; - logic [ 3:0] wmask; + logic [31:0] wmask; logic [31:0] wdata; } m_mem; @@ -102,9 +128,28 @@ `DEFINE_CSR(mscratch) `DEFINE_CSR(mepc) `DEFINE_CSR(mcause) + `DEFINE_CSR(mcycle) `DEFINE_CSR(minstret) bit got_minstret; - + `DEFINE_CSR(mcycleh) + `DEFINE_CSR(minstreth) + `DEFINE_CSR(cycle) + `DEFINE_CSR(instret) + // bit got_minstret; + `DEFINE_CSR(cycleh) + `DEFINE_CSR(instreth) + + logic [31:0][ 1:0] mhpmcounter_we; + logic [31:0][63:0] mhpmcounter_rdata; + logic [31:0][63:0] mhpmcounter_rmask; + logic [31:0][63:0] mhpmcounter_wdata; + logic [31:0][63:0] mhpmcounter_wmask; + + logic [31:0] mhpmevent_we; + logic [31:0][31:0] mhpmevent_rdata; + logic [31:0][31:0] mhpmevent_rmask; + logic [31:0][31:0] mhpmevent_wdata; + logic [31:0][31:0] mhpmevent_wmask; `DEFINE_CSR(mip) //mnxti //mintstatus @@ -148,6 +193,10 @@ function new(); this.m_order = 0; + this.m_start_cycle = 0; + this.m_stop_cycle = 0; + this.m_start_time = 0; + this.m_stop_time = 0; this.m_skip_order = 1'b0; this.m_valid = 1'b0; this.m_move_down_pipe = 1'b0; @@ -173,6 +222,7 @@ this.m_frm_we_non_apu = 1'b0; this.m_fcsr_we_non_apu = 1'b0; this.m_instret_cnt = 0; + this.m_instret_smaple_trigger = 0; this.m_sample_csr_write_in_ex = 1'b1; endfunction @@ -613,12 +663,12 @@ INSTR_CVCMPLEB : this.m_mnemonic = "cv.cmple.b"; INSTR_CVCMPLESCB : this.m_mnemonic = "cv.cmple.sc.b"; INSTR_CVCMPLESCIB : this.m_mnemonic = "cv.cmple.sci.b"; - INSTR_CVCMPGTUH : this.m_mnemonic = "cv.cmptu.h"; - INSTR_CVCMPGTUSCH : this.m_mnemonic = "cv.cmptu.sc.h"; - INSTR_CVCMPGTUSCIH : this.m_mnemonic = "cv.cmptu.sci.h"; - INSTR_CVCMPGTUB : this.m_mnemonic = "cv.cmptu.b"; - INSTR_CVCMPGTUSCB : this.m_mnemonic = "cv.cmptu.sc.b"; - INSTR_CVCMPGTUSCIB : this.m_mnemonic = "cv.cmptu.sci.b"; + INSTR_CVCMPGTUH : this.m_mnemonic = "cv.cmpgtu.h"; + INSTR_CVCMPGTUSCH : this.m_mnemonic = "cv.cmpgtu.sc.h"; + INSTR_CVCMPGTUSCIH : this.m_mnemonic = "cv.cmpgtu.sci.h"; + INSTR_CVCMPGTUB : this.m_mnemonic = "cv.cmpgtu.b"; + INSTR_CVCMPGTUSCB : this.m_mnemonic = "cv.cmpgtu.sc.b"; + INSTR_CVCMPGTUSCIB : this.m_mnemonic = "cv.cmpgtu.sci.b"; INSTR_CVCMPGEUH : this.m_mnemonic = "cv.cmpgeu.h"; INSTR_CVCMPGEUSCH : this.m_mnemonic = "cv.cmpgeu.sc.h"; INSTR_CVCMPGEUSCIH : this.m_mnemonic = "cv.cmpgeu.sci.h"; @@ -847,7 +897,18 @@ `INIT_CSR(mscratch) `INIT_CSR(mepc) `INIT_CSR(mcause) + `INIT_CSR(mcycle) `INIT_CSR(minstret) + `INIT_CSR(mcycleh) + `INIT_CSR(minstreth) + `INIT_CSR(cycle) + `INIT_CSR(instret) + `INIT_CSR(cycleh) + `INIT_CSR(instreth) + this.m_csr.mhpmcounter_we = '0; + this.m_csr.mhpmcounter_wmask = '0; + this.m_csr.mhpmevent_we = '0; + this.m_csr.mhpmevent_wmask = '0; `INIT_CSR(mip) `INIT_CSR(tdata1) `INIT_CSR(tdata2) @@ -875,6 +936,10 @@ this.m_valid = 1'b1; this.m_stage = ID; this.m_order = this.m_order + 64'h1; + this.m_start_cycle = cycles; + this.m_stop_cycle = 0; + this.m_start_time = $time; + this.m_stop_time = 0; if(this.m_skip_order) begin this.m_order = this.m_order + 64'h1; end @@ -896,6 +961,7 @@ this.m_got_regs_write = 1'b0; this.m_move_down_pipe = 1'b0; this.m_instret_cnt = 0; + this.m_instret_smaple_trigger = 0; this.m_sample_csr_write_in_ex = 1'b1; this.m_rd_addr[0] = '0; this.m_rd_addr[1] = '0; @@ -951,6 +1017,10 @@ this.m_valid = m_source.m_valid; this.m_stage = m_source.m_stage; this.m_order = m_source.m_order; + this.m_start_cycle = m_source.m_start_cycle; + this.m_stop_cycle = m_source.m_stop_cycle; + this.m_start_time = m_source.m_start_time; + this.m_stop_time = m_source.m_stop_time; this.m_pc_rdata = m_source.m_pc_rdata; this.m_insn = m_source.m_insn; this.m_mnemonic = m_source.m_mnemonic; @@ -970,6 +1040,7 @@ this.m_is_illegal = m_source.m_is_illegal; this.m_is_irq = m_source.m_is_irq; this.m_instret_cnt = m_source.m_instret_cnt; + this.m_instret_smaple_trigger = m_source.m_instret_smaple_trigger; this.m_sample_csr_write_in_ex = m_source.m_sample_csr_write_in_ex; this.m_rs1_addr = m_source.m_rs1_addr; this.m_rs2_addr = m_source.m_rs2_addr; @@ -1000,8 +1071,26 @@ `ASSIGN_CSR(mscratch) `ASSIGN_CSR(mepc) `ASSIGN_CSR(mcause) + `ASSIGN_CSR(mcycle) `ASSIGN_CSR(minstret) this.m_csr.got_minstret = m_source.m_csr.got_minstret; + `ASSIGN_CSR(mcycleh) + `ASSIGN_CSR(minstreth) + `ASSIGN_CSR(cycle) + `ASSIGN_CSR(instret) + // this.m_csr.got_minstret = m_source.m_csr.got_minstret; + `ASSIGN_CSR(cycleh) + `ASSIGN_CSR(instreth) + this.m_csr.mhpmcounter_we = m_source.m_csr.mhpmcounter_we; + this.m_csr.mhpmcounter_rdata = m_source.m_csr.mhpmcounter_rdata; + this.m_csr.mhpmcounter_rmask = m_source.m_csr.mhpmcounter_rmask; + this.m_csr.mhpmcounter_wdata = m_source.m_csr.mhpmcounter_wdata; + this.m_csr.mhpmcounter_wmask = m_source.m_csr.mhpmcounter_wmask; + this.m_csr.mhpmevent_we = m_source.m_csr.mhpmevent_we; + this.m_csr.mhpmevent_rdata = m_source.m_csr.mhpmevent_rdata; + this.m_csr.mhpmevent_rmask = m_source.m_csr.mhpmevent_rmask; + this.m_csr.mhpmevent_wdata = m_source.m_csr.mhpmevent_wdata; + this.m_csr.mhpmevent_wmask = m_source.m_csr.mhpmevent_wmask; `ASSIGN_CSR(mip) `ASSIGN_CSR(tdata1) `ASSIGN_CSR(tdata2) diff --git a/hw/vendor/esl_epfl_cv32e40px/bhv/pipe_freeze_trace.sv b/hw/vendor/esl_epfl_cv32e40px/bhv/pipe_freeze_trace.sv index 88d65d0b0..39a16fa62 100644 --- a/hw/vendor/esl_epfl_cv32e40px/bhv/pipe_freeze_trace.sv +++ b/hw/vendor/esl_epfl_cv32e40px/bhv/pipe_freeze_trace.sv @@ -1,27 +1,29 @@ -// Copyright (c) 2023 OpenHW Group +// Copyright 2024 OpenHW Group and Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// CV32E40P -// -// Contributors: Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Yoann Pruvost, Dolphin Design // +// // +// Description: Structures, Functions and Task used to store all information // +// coming from the core pipeline at every posedge. // +// Those information will then be processed by RVFI. // +// // +//////////////////////////////////////////////////////////////////////////////////// -/* - * This struct is used to store all information comming from the core at every posedge - * The information will then be processed - */ typedef struct { logic is_decoding; logic is_illegal; @@ -349,6 +351,7 @@ function compute_csr_we(); r_pipe_freeze_trace.csr.fflags_we = 1'b0; r_pipe_freeze_trace.csr.frm_we = 1'b0; r_pipe_freeze_trace.csr.fcsr_we = 1'b0; + r_pipe_freeze_trace.csr.mhpmevent_we = '0; r_pipe_freeze_trace.csr.dpc_we = csr_dpc_we_i; if (r_pipe_freeze_trace.csr.we) begin case (r_pipe_freeze_trace.csr.addr) @@ -366,7 +369,10 @@ function compute_csr_we(); r_pipe_freeze_trace.csr.fflags_we = 1'b1; r_pipe_freeze_trace.csr.mstatus_fs_we = 1'b1; end - CSR_FRM: r_pipe_freeze_trace.csr.frm_we = 1'b1; + CSR_FRM: begin + r_pipe_freeze_trace.csr.frm_we = 1'b1; + r_pipe_freeze_trace.csr.mstatus_fs_we = 1'b1; + end CSR_FCSR: begin r_pipe_freeze_trace.csr.fcsr_we = 1'b1; r_pipe_freeze_trace.csr.mstatus_fs_we = 1'b1; @@ -376,6 +382,10 @@ function compute_csr_we(); CSR_DSCRATCH1: r_pipe_freeze_trace.csr.dscratch1_we = 1'b1; endcase end + + if (csr_mhpmevent_we_i) begin + r_pipe_freeze_trace.csr.mhpmevent_we[r_pipe_freeze_trace.csr.addr[4:0]] = 1'b1; + end // CSR_MCAUSE: r_pipe_freeze_trace.csr.mcause_we = r_pipe_freeze_trace.csr.mcause_n != r_pipe_freeze_trace.csr.mcause_q; //for debug purpose endfunction /* @@ -573,7 +583,6 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.mcountinhibit_we = csr_mcountinhibit_we_i; r_pipe_freeze_trace.csr.mhpmevent_n = csr_mhpmevent_n_i; r_pipe_freeze_trace.csr.mhpmevent_q = csr_mhpmevent_q_i; - r_pipe_freeze_trace.csr.mhpmevent_we = csr_mhpmevent_we_i; r_pipe_freeze_trace.csr.mscratch_n = csr_mscratch_n_i; r_pipe_freeze_trace.csr.mscratch_q = csr_mscratch_q_i; r_pipe_freeze_trace.csr.mepc_n = csr_mepc_n_i; diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_controller.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_controller.sv index a3e3bac9c..e9807a381 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_controller.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_controller.sv @@ -597,7 +597,17 @@ module cv32e40px_controller import cv32e40px_pkg::*; csr_status_i: begin halt_if_o = 1'b1; - ctrl_fsm_ns = id_ready_i ? FLUSH_EX : DECODE; + if (~id_ready_i) begin + ctrl_fsm_ns = DECODE; + end else begin + ctrl_fsm_ns = FLUSH_EX; + if (hwlp_end0_eq_pc) begin + hwlp_dec_cnt_o[0] = 1'b1; + end + if (hwlp_end1_eq_pc) begin + hwlp_dec_cnt_o[1] = 1'b1; + end + end end data_load_event_i: begin @@ -617,7 +627,7 @@ module cv32e40px_controller import cv32e40px_pkg::*; ctrl_fsm_ns = hwlp_end0_eq_pc_plus4 || hwlp_end1_eq_pc_plus4 ? DECODE : DECODE_HWLOOP; // we can be at the end of HWloop due to a return from interrupt or ecall or ebreak or exceptions - if(hwlp_end0_eq_pc && hwlp_counter0_gt_1) begin + if (hwlp_end0_eq_pc && hwlp_counter0_gt_1) begin pc_mux_o = PC_HWLOOP; if (~jump_done_q) begin pc_set_o = 1'b1; @@ -791,7 +801,17 @@ module cv32e40px_controller import cv32e40px_pkg::*; csr_status_i: begin halt_if_o = 1'b1; - ctrl_fsm_ns = id_ready_i ? FLUSH_EX : DECODE_HWLOOP; + if (~id_ready_i) begin + ctrl_fsm_ns = DECODE_HWLOOP; + end else begin + ctrl_fsm_ns = FLUSH_EX; + if (hwlp_end0_eq_pc) begin + hwlp_dec_cnt_o[0] = 1'b1; + end + if (hwlp_end1_eq_pc) begin + hwlp_dec_cnt_o[1] = 1'b1; + end + end end data_load_event_i: begin @@ -1067,16 +1087,10 @@ module cv32e40px_controller import cv32e40px_pkg::*; end csr_status_i: begin - - if(hwlp_end0_eq_pc && hwlp_counter0_gt_1) begin - pc_mux_o = PC_HWLOOP; - pc_set_o = 1'b1; - hwlp_dec_cnt_o[0] = 1'b1; - end - if(hwlp_end1_eq_pc && hwlp_counter1_gt_1) begin - pc_mux_o = PC_HWLOOP; - pc_set_o = 1'b1; - hwlp_dec_cnt_o[1] = 1'b1; + if ((hwlp_end0_eq_pc && !hwlp_counter0_eq_0) || + (hwlp_end1_eq_pc && !hwlp_counter1_eq_0)) begin + pc_mux_o = PC_HWLOOP; + pc_set_o = 1'b1; end end @@ -1561,7 +1575,7 @@ endgenerate // HWLoop 0 and 1 having target address constraints property p_hwlp_same_target_address; - @(posedge clk) (hwlp_counter_i[1] > 1 && hwlp_counter_i[0] > 1) |-> ( hwlp_end_addr_i[1] - 4 >= hwlp_end_addr_i[0] - 4 + 8 ); + @(posedge clk) (hwlp_counter_i[1] > 1 && hwlp_counter_i[0] > 1 && pc_id_i >= hwlp_start_addr_i[0] && pc_id_i <= hwlp_end_addr_i[0] - 4) |-> ( hwlp_end_addr_i[1] - 4 >= hwlp_end_addr_i[0] - 4 + 8 ); endproperty a_hwlp_same_target_address : assert property(p_hwlp_same_target_address) else $warning("%t, HWLoops target address do not respect constraints", $time); diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_core.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_core.sv index 25aa6cc98..91c4eb8d5 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_core.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_core.sv @@ -72,7 +72,8 @@ module cv32e40px_core output logic [31:0] data_wdata_o, input logic [31:0] data_rdata_i, - // apu-interconnect + // CVFPU interface + output logic apu_busy_o, // handshake signals output logic apu_req_o, input logic apu_gnt_i, @@ -199,7 +200,6 @@ module cv32e40px_core logic ctrl_busy; logic if_busy; logic lsu_busy; - logic apu_busy; logic [31:0] pc_ex; // PC of last executed branch or cv.elw @@ -445,7 +445,7 @@ module cv32e40px_core .if_busy_i (if_busy), .ctrl_busy_i(ctrl_busy), .lsu_busy_i (lsu_busy), - .apu_busy_i (apu_busy), + .apu_busy_i (apu_busy_o), // PULP cluster .pulp_clock_en_i (pulp_clock_en_i), @@ -690,7 +690,7 @@ module cv32e40px_core .apu_write_regs_valid_o (apu_write_regs_valid), .apu_write_dep_i (apu_write_dep), .apu_perf_dep_o (perf_apu_dep), - .apu_busy_i (apu_busy), + .apu_busy_i (apu_busy_o), // CORE-V-XIF // Compressed Interface @@ -909,9 +909,9 @@ module cv32e40px_core .apu_perf_cont_o(perf_apu_cont), .apu_perf_wb_o (perf_apu_wb), .apu_ready_wb_o (apu_ready_wb), - .apu_busy_o (apu_busy), + .apu_busy_o (apu_busy_o), - // apu-interconnect + // CVFPU interface // handshake signals .apu_req_o (apu_req_o), .apu_gnt_i (apu_gnt_i), @@ -1160,9 +1160,9 @@ module cv32e40px_core assign csr_addr_int = csr_num_e'(csr_access_ex ? alu_operand_b_ex[11:0] : '0); // Floating-Point registers write - assign fregs_we = (FPU & !ZFINX) ? ((regfile_alu_we_fw && regfile_alu_waddr_fw[5]) || - (regfile_we_wb && regfile_waddr_fw_wb_o[5])) - : 1'b0; + assign fregs_we = (FPU == 1 & ZFINX == 0) ? ((regfile_alu_we_fw && regfile_alu_waddr_fw[5]) || + (regfile_we_wb && regfile_waddr_fw_wb_o[5])) + : 1'b0; /////////////////////////// // ____ __ __ ____ // diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_cs_registers.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_cs_registers.sv index f24b3bf9b..530057250 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_cs_registers.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_cs_registers.sv @@ -509,7 +509,7 @@ module cv32e40px_cs_registers // mimpid, Machine Implementation ID CSR_MIMPID: begin - csr_rdata_int = (FPU || COREV_PULP || COREV_CLUSTER) ? 32'h1 : 'b0; + csr_rdata_int = (FPU == 1 || COREV_PULP == 1 || COREV_CLUSTER == 1) ? 32'h1 : 'b0; end // unimplemented, read 0 CSRs diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_decoder.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_decoder.sv index 3d55fdbc8..c8e11cc3b 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_decoder.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_decoder.sv @@ -1057,7 +1057,6 @@ module cv32e40px_decoder 5'b00000: begin fpu_op = cv32e40px_fpu_pkg::ADD; fp_op_group = ADDMUL; - apu_op_o = 2'b0; alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; end @@ -1066,7 +1065,6 @@ module cv32e40px_decoder fpu_op = cv32e40px_fpu_pkg::ADD; fpu_op_mod = 1'b1; fp_op_group = ADDMUL; - apu_op_o = 2'b1; alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; end @@ -1085,7 +1083,6 @@ module cv32e40px_decoder regb_used_o = 1'b0; fpu_op = cv32e40px_fpu_pkg::SQRT; fp_op_group = DIVSQRT; - apu_op_o = 1'b1; // rs2 must be zero if (instr_rdata_i[24:20] != 5'b00000) illegal_insn_o = 1'b1; end @@ -1213,7 +1210,6 @@ module cv32e40px_decoder fpu_op = cv32e40px_fpu_pkg::F2I; fp_op_group = CONV; fpu_op_mod = instr_rdata_i[20]; // signed/unsigned switch - apu_op_o = 2'b1; unique case (instr_rdata_i[26:25]) //fix for casting to different formats other than FP32 2'b00: begin @@ -1249,7 +1245,6 @@ module cv32e40px_decoder fpu_op = cv32e40px_fpu_pkg::I2F; fp_op_group = CONV; fpu_op_mod = instr_rdata_i[20]; // signed/unsigned switch - apu_op_o = 2'b0; // bits [21:20] used, other bits must be 0 if (instr_rdata_i[24:21]) illegal_insn_o = 1'b1; // in RV32, no casts to L allowed. end @@ -1323,20 +1318,20 @@ module cv32e40px_decoder // check rounding mode if (check_fprm) begin unique case (instr_rdata_i[14:12]) inside - [3'b000:3'b100]: ; //legal rounding modes + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100: ; //legal rounding modes 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 if (~C_XF16ALT || fpu_dst_fmt_o != cv32e40px_fpu_pkg::FP16ALT) illegal_insn_o = 1'b1; // actual rounding mode from frm csr unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes - default : illegal_insn_o = 1'b1; + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100 : fp_rnd_mode_o = frm_i; //legal rounding modes + default : illegal_insn_o = 1'b1; endcase end 3'b111: begin // rounding mode from frm csr unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes - default : illegal_insn_o = 1'b1; + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100 : fp_rnd_mode_o = frm_i; //legal rounding modes + default : illegal_insn_o = 1'b1; endcase end default : illegal_insn_o = 1'b1; @@ -1364,6 +1359,7 @@ module cv32e40px_decoder NONCOMP : apu_lat_o = (FPU_OTHERS_LAT<2) ? FPU_OTHERS_LAT+1 : 2'h3; // CONV uses the same latency for all formats CONV : apu_lat_o = (FPU_OTHERS_LAT<2) ? FPU_OTHERS_LAT+1 : 2'h3; + default: ; endcase // Set FPnew OP and OPMOD as the APU op @@ -1425,25 +1421,21 @@ module cv32e40px_decoder unique case (instr_rdata_i[6:0]) // fmadd.fmt - FP Fused multiply-add OPCODE_OP_FMADD : begin - fpu_op = cv32e40px_fpu_pkg::FMADD; - apu_op_o = 2'b00; + fpu_op = cv32e40px_fpu_pkg::FMADD; end // fmsub.fmt - FP Fused multiply-subtract OPCODE_OP_FMSUB : begin - fpu_op = cv32e40px_fpu_pkg::FMADD; - fpu_op_mod = 1'b1; - apu_op_o = 2'b01; + fpu_op = cv32e40px_fpu_pkg::FMADD; + fpu_op_mod = 1'b1; end // fnmsub.fmt - FP Negated fused multiply-subtract OPCODE_OP_FNMSUB : begin - fpu_op = cv32e40px_fpu_pkg::FNMSUB; - apu_op_o = 2'b10; + fpu_op = cv32e40px_fpu_pkg::FNMSUB; end // fnmadd.fmt - FP Negated fused multiply-add OPCODE_OP_FNMADD : begin - fpu_op = cv32e40px_fpu_pkg::FNMSUB; - fpu_op_mod = 1'b1; - apu_op_o = 2'b11; + fpu_op = cv32e40px_fpu_pkg::FNMSUB; + fpu_op_mod = 1'b1; end default : ; endcase @@ -1459,19 +1451,19 @@ module cv32e40px_decoder // check rounding mode unique case (instr_rdata_i[14:12]) inside - [3'b000:3'b100]: ; //legal rounding modes + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100: ; //legal rounding modes 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 if (~C_XF16ALT || fpu_dst_fmt_o != cv32e40px_fpu_pkg::FP16ALT) illegal_insn_o = 1'b1; // actual rounding mode from frm csr unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100 : fp_rnd_mode_o = frm_i; //legal rounding modes default : illegal_insn_o = 1'b1; endcase end 3'b111: begin // rounding mode from frm csr unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100 : fp_rnd_mode_o = frm_i; //legal rounding modes default : illegal_insn_o = 1'b1; endcase end @@ -1493,6 +1485,7 @@ module cv32e40px_decoder // Set FPnew OP and OPMOD as the APU op apu_op_o = {fpu_vec_op, fpu_op_mod, fpu_op}; + // No FPU or (ZFINX == 0 && MSTATUS.FS == FS_OFF) end else begin illegal_insn_o = 1'b1; @@ -1900,15 +1893,14 @@ module cv32e40px_decoder alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; unique case (instr_rdata_i[27:25]) - 3'b000: alu_operator_o = ALU_ADD; // cv.addNr - 3'b001: alu_operator_o = ALU_ADDU; // cv.adduNr - 3'b010: alu_operator_o = ALU_ADDR; // cv.addRNr - 3'b011: alu_operator_o = ALU_ADDUR; // cv.adduRNr - 3'b100: alu_operator_o = ALU_SUB; // cv.subNr - 3'b101: alu_operator_o = ALU_SUBU; // cv.subuNr - 3'b110: alu_operator_o = ALU_SUBR; // cv.subRNr - 3'b111: alu_operator_o = ALU_SUBUR; // cv.subuRNr - default: alu_operator_o = ALU_ADD; + 3'b001: alu_operator_o = ALU_ADDU; // cv.adduNr + 3'b010: alu_operator_o = ALU_ADDR; // cv.addRNr + 3'b011: alu_operator_o = ALU_ADDUR; // cv.adduRNr + 3'b100: alu_operator_o = ALU_SUB; // cv.subNr + 3'b101: alu_operator_o = ALU_SUBU; // cv.subuNr + 3'b110: alu_operator_o = ALU_SUBR; // cv.subRNr + 3'b111: alu_operator_o = ALU_SUBUR; // cv.subuRNr + default: alu_operator_o = ALU_ADD; // cv.addNr endcase end @@ -2085,7 +2077,6 @@ module cv32e40px_decoder // decide between using unsigned and rounding, and combinations unique case ({instr_rdata_i[31:30], instr_rdata_i[12]}) - {2'b00, 1'b0}: alu_operator_o = ALU_ADD; // cv.addN {2'b01, 1'b0}: alu_operator_o = ALU_ADDU; // cv.adduN {2'b10, 1'b0}: alu_operator_o = ALU_ADDR; // cv.addRN {2'b11, 1'b0}: alu_operator_o = ALU_ADDUR; // cv.adduRN @@ -2093,12 +2084,12 @@ module cv32e40px_decoder {2'b01, 1'b1}: alu_operator_o = ALU_SUBU; // cv.subuN {2'b10, 1'b1}: alu_operator_o = ALU_SUBR; // cv.subRN {2'b11, 1'b1}: alu_operator_o = ALU_SUBUR; // cv.subuRN - default : alu_operator_o = ALU_ADD; + default : alu_operator_o = ALU_ADD; // cv.addN endcase end - 2'b10, 2'b11: begin + default: begin // MUL/MAC with subword selection alu_en = 1'b0; mult_int_en = 1'b1; @@ -2126,7 +2117,6 @@ module cv32e40px_decoder mult_operator_o = MUL_I; end end - default: illegal_insn_o = 1'b1; endcase end else begin illegal_insn_o = 1'b1; @@ -2267,6 +2257,11 @@ module cv32e40px_decoder instr_rdata_i[25] != 1'b0) begin illegal_insn_o = 1'b1; end + // Imm6 restrictions + if ((instr_rdata_i[14:12] == 3'b110 && instr_rdata_i[24:23] != 2'b0) || + (instr_rdata_i[14:12] == 3'b111 && instr_rdata_i[24:22] != 3'b0)) begin + illegal_insn_o = 1'b1; + end end 6'b01001_0: begin // cv.sra alu_operator_o = ALU_SRA; @@ -2278,6 +2273,11 @@ module cv32e40px_decoder instr_rdata_i[25] != 1'b0) begin illegal_insn_o = 1'b1; end + // Imm6 restrictions + if ((instr_rdata_i[14:12] == 3'b110 && instr_rdata_i[24:23] != 2'b0) || + (instr_rdata_i[14:12] == 3'b111 && instr_rdata_i[24:22] != 3'b0)) begin + illegal_insn_o = 1'b1; + end end 6'b01010_0: begin // cv.sll alu_operator_o = ALU_SLL; @@ -2289,6 +2289,11 @@ module cv32e40px_decoder instr_rdata_i[25] != 1'b0) begin illegal_insn_o = 1'b1; end + // Imm6 restrictions + if ((instr_rdata_i[14:12] == 3'b110 && instr_rdata_i[24:23] != 2'b0) || + (instr_rdata_i[14:12] == 3'b111 && instr_rdata_i[24:22] != 3'b0)) begin + illegal_insn_o = 1'b1; + end end 6'b01011_0: begin // cv.or alu_operator_o = ALU_OR; @@ -2425,6 +2430,11 @@ module cv32e40px_decoder end default: illegal_insn_o = 1'b1; endcase + // Imm6 restrictions + if ((instr_rdata_i[12] == 1'b0 && instr_rdata_i[24:20] != 5'b0) || + (instr_rdata_i[12] == 1'b1 && instr_rdata_i[24:21] != 4'b0)) begin + illegal_insn_o = 1'b1; + end end 6'b11000_0: begin // cv.shuffle, cv.shuffleI0 alu_operator_o = ALU_SHUF; @@ -2439,6 +2449,10 @@ module cv32e40px_decoder instr_rdata_i[25] != 1'b0) begin illegal_insn_o = 1'b1; end + // Imm6 restriction + if (instr_rdata_i[14:12] == 3'b110 && instr_rdata_i[24:21] != 4'b0) begin + illegal_insn_o = 1'b1; + end end 6'b11001_0, 6'b11010_0, diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_ex_stage.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_ex_stage.sv index 3beaf0227..028761083 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_ex_stage.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_ex_stage.sv @@ -266,7 +266,7 @@ module cv32e40px_ex_stage if (regfile_we_lsu) begin regfile_we_wb_o = 1'b1; - regfile_we_wb_power_o = !COREV_PULP ? 1'b1 : ~data_misaligned_ex_i & wb_ready_i; + regfile_we_wb_power_o = (COREV_PULP == 0) ? 1'b1 : ~data_misaligned_ex_i & wb_ready_i; if (apu_valid & (!apu_singlecycle & !apu_multicycle)) begin wb_contention_lsu = 1'b1; end diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_fp_wrapper.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_fp_wrapper.sv index 54add99df..126a1f449 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_fp_wrapper.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_fp_wrapper.sv @@ -14,7 +14,7 @@ module cv32e40px_fp_wrapper import cv32e40px_apu_core_pkg::*; #( - parameter FPU_ADDMUL_LAT = 0, // Floating-Point ADDition/MULtiplication computing lane pipeline registers number + parameter FPU_ADDMUL_LAT = 0, // Floating-Point ADDition/MULtiplication computing lane pipeline registers number parameter FPU_OTHERS_LAT = 0 // Floating-Point COMParison/CONVersion computing lanes pipeline registers number ) ( // Clock and Reset diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv index 6e996c32f..9f1f668b5 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_id_stage.sv @@ -1772,9 +1772,13 @@ module cv32e40px_id_stage if (id_valid_o) begin // unstall the whole pipeline alu_en_ex_o <= alu_en; if (alu_en) begin - alu_operator_ex_o <= alu_operator; - alu_operand_a_ex_o <= alu_operand_a; - alu_operand_b_ex_o <= alu_operand_b; + alu_operator_ex_o <= alu_operator; + alu_operand_a_ex_o <= alu_operand_a; + if (alu_op_b_mux_sel == OP_B_REGB_OR_FWD && (alu_operator == ALU_CLIP || alu_operator == ALU_CLIPU)) begin + alu_operand_b_ex_o <= {1'b0, alu_operand_b[30:0]}; + end else begin + alu_operand_b_ex_o <= alu_operand_b; + end alu_operand_c_ex_o <= alu_operand_c; bmask_a_ex_o <= bmask_a_id; bmask_b_ex_o <= bmask_b_id; diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_load_store_unit.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_load_store_unit.sv index 674da6647..92194ccdd 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_load_store_unit.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_load_store_unit.sv @@ -121,18 +121,18 @@ module cv32e40px_load_store_unit #( 2'b00: begin // Writing a word if (misaligned_st == 1'b0) begin // non-misaligned case case (data_addr_int[1:0]) - 2'b00: data_be = 4'b1111; - 2'b01: data_be = 4'b1110; - 2'b10: data_be = 4'b1100; - 2'b11: data_be = 4'b1000; + 2'b00: data_be = 4'b1111; + 2'b01: data_be = 4'b1110; + 2'b10: data_be = 4'b1100; + default: data_be = 4'b1000; endcase ; // case (data_addr_int[1:0]) end else begin // misaligned case case (data_addr_int[1:0]) - 2'b00: data_be = 4'b0000; // this is not used, but included for completeness - 2'b01: data_be = 4'b0001; - 2'b10: data_be = 4'b0011; - 2'b11: data_be = 4'b0111; + 2'b01: data_be = 4'b0001; + 2'b10: data_be = 4'b0011; + 2'b11: data_be = 4'b0111; + default: data_be = 4'b0000; // this is not used, but included for completeness endcase ; // case (data_addr_int[1:0]) end @@ -141,10 +141,10 @@ module cv32e40px_load_store_unit #( 2'b01: begin // Writing a half word if (misaligned_st == 1'b0) begin // non-misaligned case case (data_addr_int[1:0]) - 2'b00: data_be = 4'b0011; - 2'b01: data_be = 4'b0110; - 2'b10: data_be = 4'b1100; - 2'b11: data_be = 4'b1000; + 2'b00: data_be = 4'b0011; + 2'b01: data_be = 4'b0110; + 2'b10: data_be = 4'b1100; + default: data_be = 4'b1000; endcase ; // case (data_addr_int[1:0]) end else begin // misaligned case @@ -154,10 +154,10 @@ module cv32e40px_load_store_unit #( 2'b10, 2'b11: begin // Writing a byte case (data_addr_int[1:0]) - 2'b00: data_be = 4'b0001; - 2'b01: data_be = 4'b0010; - 2'b10: data_be = 4'b0100; - 2'b11: data_be = 4'b1000; + 2'b00: data_be = 4'b0001; + 2'b01: data_be = 4'b0010; + 2'b10: data_be = 4'b0100; + default: data_be = 4'b1000; endcase ; // case (data_addr_int[1:0]) end diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_register_file_latch.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_register_file_latch.sv index 217150c04..63a23228c 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_register_file_latch.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_register_file_latch.sv @@ -203,8 +203,8 @@ module cv32e40px_register_file #( mem[0] = '0; for (k = 1; k < NUM_WORDS; k++) begin : w_WordIter - if (~rst_n) mem[k] = '0; - else if (mem_clocks[k] == 1'b1) mem[k] = waddr_onehot_b_q[k] ? wdata_b_q : wdata_a_q; + if (~rst_n) mem[k] <= '0; + else if (mem_clocks[k] == 1'b1) mem[k] <= waddr_onehot_b_q[k] ? wdata_b_q : wdata_a_q; end end @@ -213,9 +213,9 @@ module cv32e40px_register_file #( always_latch begin : latch_wdata_fp if (FPU == 1) begin for (l = 0; l < NUM_FP_WORDS; l++) begin : w_WordIter - if (~rst_n) mem_fp[l] = '0; + if (~rst_n) mem_fp[l] <= '0; else if (mem_clocks[l+NUM_WORDS] == 1'b1) - mem_fp[l] = waddr_onehot_b_q[l+NUM_WORDS] ? wdata_b_q : wdata_a_q; + mem_fp[l] <= waddr_onehot_b_q[l+NUM_WORDS] ? wdata_b_q : wdata_a_q; end end end diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_top.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_top.sv index ad3dc9868..03b996266 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_top.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_top.sv @@ -1,15 +1,27 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Top file instantiating a CV32E40P core and an optional FPU -// Contributor: Davide Schiavone +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +///////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Pascal Gouedo, Dolphin Design // +// // +// Description: Top level module of CV32E40P instantiating the Core and // +// an optional CVFPU with its clock gating cell. // +// // +///////////////////////////////////////////////////////////////////////////// module cv32e40px_top import cv32e40px_core_v_xif_pkg::*; @@ -105,7 +117,7 @@ module cv32e40px_top import cv32e40px_apu_core_pkg::*; // Core to FPU - logic clk; + logic apu_busy; logic apu_req; logic [ APU_NARGS_CPU-1:0][31:0] apu_operands; logic [ APU_WOP_CPU-1:0] apu_op; @@ -117,6 +129,8 @@ module cv32e40px_top logic [ 31:0] apu_rdata; logic [APU_NUSFLAGS_CPU-1:0] apu_rflags; + logic apu_clk_en, apu_clk; + // Instantiate the Core cv32e40px_core #( .COREV_X_IF (COREV_X_IF), @@ -155,6 +169,7 @@ module cv32e40px_top .data_wdata_o (data_wdata_o), .data_rdata_i (data_rdata_i), + .apu_busy_o (apu_busy), .apu_req_o (apu_req), .apu_gnt_i (apu_gnt), .apu_operands_o(apu_operands), @@ -211,12 +226,15 @@ module cv32e40px_top generate if (FPU) begin : fpu_gen + + assign apu_clk_en = apu_req | apu_busy; + // FPU clock gate cv32e40px_clock_gate core_clock_gate_i ( .clk_i (clk_i), - .en_i (!core_sleep_o), + .en_i (apu_clk_en), .scan_cg_en_i(scan_cg_en_i), - .clk_o (clk) + .clk_o (apu_clk) ); // Instantiate the FPU wrapper @@ -224,7 +242,7 @@ module cv32e40px_top .FPU_ADDMUL_LAT(FPU_ADDMUL_LAT), .FPU_OTHERS_LAT(FPU_OTHERS_LAT) ) fp_wrapper_i ( - .clk_i (clk), + .clk_i (apu_clk), .rst_ni (rst_ni), .apu_req_i (apu_req), .apu_gnt_o (apu_gnt), diff --git a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv index fe1471241..dc8907979 100644 --- a/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv +++ b/hw/vendor/esl_epfl_cv32e40px/rtl/cv32e40px_x_disp.sv @@ -8,16 +8,16 @@ // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. -//////////////////////////////////////////////////////////////////////////////// -// Engineer: Moritz Imfeld - moimfeld@student.ethz.ch // -// // -// Design Name: x-interface dispatcher // -// Project Name: cv32e40px // -// Language: SystemVerilog // -// // -// Description: Dispatcher for sending instructions to the x-interface. // -// // -//////////////////////////////////////////////////////////////////////////////// +///////////////////////////////////////////////////////////////////////////// +// Engineer: Moritz Imfeld - moimfeld@ee.ethz.ch // +// // +// Design Name: x-interface dispatcher // +// Project Name: cv32e40px // +// Language: SystemVerilog // +// // +// Description: Dispatcher for sending instructions to the x-interface. // +// // +///////////////////////////////////////////////////////////////////////////// module cv32e40px_x_disp import cv32e40px_core_v_xif_pkg::*; @@ -107,6 +107,7 @@ module cv32e40px_x_disp logic x_if_not_ready; logic x_if_memory_instr; logic illegal_forwarding_prevention; + logic x_issue_illegal; // issue interface assign x_issue_valid_o = x_illegal_insn_dec_i & ~branch_or_jump_i & ~instr_offloaded_q & instr_valid_i & ~illegal_forwarding_prevention; @@ -243,10 +244,11 @@ module cv32e40px_x_disp end end - // illegal instruction assertion + // illegal instruction assignment + assign x_issue_illegal = x_illegal_insn_dec_i & ~instr_offloaded_q & instr_valid_i; always_comb begin x_illegal_insn_o = 1'b0; - if (x_issue_valid_o & x_issue_ready_i & ~x_issue_resp_accept_i) begin + if (x_issue_illegal & x_issue_ready_i & ~x_issue_resp_accept_i) begin x_illegal_insn_o = 1'b1; end end diff --git a/hw/vendor/openhwgroup_cv32e20/cve2_top.core b/hw/vendor/openhwgroup_cv32e20/cve2_top.core index cb4e23b4c..0d9d2c5cc 100644 --- a/hw/vendor/openhwgroup_cv32e20/cve2_top.core +++ b/hw/vendor/openhwgroup_cv32e20/cve2_top.core @@ -75,6 +75,7 @@ targets: - tool_veriblelint ? (files_lint_verible) - files_rtl - target_sim ? (files_clk_gate) + - target_sim_sc ? (files_clk_gate) toplevel: cve2_top parameters: - tool_vivado ? (FPGA_XILINX=true) diff --git a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_core.sv b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_core.sv index 4f79b67f9..aff3d0d44 100644 --- a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_core.sv +++ b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_core.sv @@ -625,14 +625,12 @@ module cve2_core import cve2_pkg::*; #( assign outstanding_store_id = id_stage_i.instr_executing & id_stage_i.lsu_req_dec & id_stage_i.lsu_we; - begin : gen_no_wb_stage - // Without writeback stage only look into whether load or store is in ID to determine if - // a response is expected. - assign outstanding_load_resp = outstanding_load_id; - assign outstanding_store_resp = outstanding_store_id; + // Without writeback stage only look into whether load or store is in ID to determine if + // a response is expected. + assign outstanding_load_resp = outstanding_load_id; + assign outstanding_store_resp = outstanding_store_id; - `ASSERT(NoMemRFWriteWithoutPendingLoad, rf_we_lsu |-> outstanding_load_id, clk_i, !rst_ni) - end + `ASSERT(NoMemRFWriteWithoutPendingLoad, rf_we_lsu |-> outstanding_load_id, clk_i, !rst_ni) `ASSERT(NoMemResponseWithoutPendingAccess, data_rvalid_i |-> outstanding_load_resp | outstanding_store_resp, clk_i, !rst_ni) @@ -1094,6 +1092,9 @@ module cve2_core import cve2_pkg::*; #( rvfi_ext_stage_debug_req[i+1] <= rvfi_ext_stage_debug_req[i]; rvfi_ext_stage_mcycle[i] <= cs_registers_i.mcycle_counter_i.counter_val_o; end + else begin + rvfi_stage_trap[i] <= 0; + end end else begin rvfi_stage_halt[i] <= rvfi_stage_halt[i-1]; rvfi_stage_trap[i] <= rvfi_stage_trap[i-1]; diff --git a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_cs_registers.sv b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_cs_registers.sv index dfce09858..9678638d8 100644 --- a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_cs_registers.sv +++ b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_cs_registers.sv @@ -24,6 +24,7 @@ module cve2_cs_registers #( parameter cve2_pkg::rv32m_e RV32M = cve2_pkg::RV32MFast, parameter cve2_pkg::rv32b_e RV32B = cve2_pkg::RV32BNone ) ( + // Clock and Reset input logic clk_i, input logic rst_ni, @@ -103,7 +104,7 @@ module cve2_cs_registers #( input logic div_wait_i // core waiting for divide ); - import cve2_pkg::*; +import cve2_pkg::*; localparam int unsigned RV32BEnabled = (RV32B == RV32BNone) ? 0 : 1; localparam int unsigned RV32MEnabled = (RV32M == RV32MNone) ? 0 : 1; @@ -1444,7 +1445,83 @@ module cve2_cs_registers #( // CPU control register // ////////////////////////// - // Removed +`ifdef RVFI + logic [63:0] mstatus_extended_read; + logic [63:0] mstatus_extended_write; + + assign mstatus_extended_read[CSR_MSTATUS_MIE_BIT] = mstatus_q.mie; + assign mstatus_extended_read[CSR_MSTATUS_MPIE_BIT] = mstatus_q.mpie; + assign mstatus_extended_read[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW] = mstatus_q.mpp; + assign mstatus_extended_read[CSR_MSTATUS_MPRV_BIT] = mstatus_q.mprv; + assign mstatus_extended_read[CSR_MSTATUS_TW_BIT] = mstatus_q.tw; + + assign mstatus_extended_write[CSR_MSTATUS_MIE_BIT] = mstatus_d.mie; + assign mstatus_extended_write[CSR_MSTATUS_MPIE_BIT] = mstatus_d.mpie; + assign mstatus_extended_write[CSR_MSTATUS_MPP_BIT_HIGH:CSR_MSTATUS_MPP_BIT_LOW] = mstatus_d.mpp; + assign mstatus_extended_write[CSR_MSTATUS_MPRV_BIT] = mstatus_d.mprv; + assign mstatus_extended_write[CSR_MSTATUS_TW_BIT] = mstatus_d.tw; + + wire [63:0] rvfi_csr_bypass; + + assign rvfi_csr_bypass = csr_save_cause_i; + + bit [63:0] rvfi_csr_addr; + bit [63:0] rvfi_csr_rdata; + bit [63:0] rvfi_csr_wdata; + bit [63:0] rvfi_csr_rmask; + bit [63:0] rvfi_csr_wmask; + wire [63:0] rvfi_csr_wmask_q; + wire [63:0] rvfi_csr_rmask_q; + assign rvfi_csr_if.rvfi_csr_addr = rvfi_csr_addr; + assign rvfi_csr_if.rvfi_csr_rdata = rvfi_csr_rdata; + assign rvfi_csr_if.rvfi_csr_wdata = rvfi_csr_wdata; + assign rvfi_csr_if.rvfi_csr_rmask = rvfi_csr_rmask; + assign rvfi_csr_if.rvfi_csr_wmask = rvfi_csr_wmask; + assign rvfi_csr_rmask_q = ((~csr_wr & csr_op_en_i & ~illegal_csr_insn_o)) ? -1 : 0; + assign rvfi_csr_wmask_q = ((csr_wr & csr_op_en_i & ~illegal_csr_insn_o)) ? -1 : 0; + always @(posedge clknrst_if.clk) begin + rvfi_csr_addr = csr_addr_i; + rvfi_csr_rdata = csr_rdata_int; + rvfi_csr_wdata = csr_wdata_int; + rvfi_csr_rmask = (rvfi_csr_rmask_q); + rvfi_csr_wmask = (rvfi_csr_wmask_q); + end + +`define RVFI_CONNECT(CSR_ADDR, CSR_NAME, CSR_RDATA, CSR_WDATA, CSR_RMASK, CSR_WMASK) \ + bit [63:0] rvfi_``CSR_NAME``_csr_rdata;\ + bit [63:0] rvfi_``CSR_NAME``_csr_wdata;\ + bit [63:0] rvfi_``CSR_NAME``_csr_rmask;\ + bit [63:0] rvfi_``CSR_NAME``_csr_wmask;\ + wire [63:0] rvfi_``CSR_NAME``_csr_wmask_q; \ + wire [63:0] rvfi_``CSR_NAME``_csr_rmask_q; \ + assign rvfi_csr_if.rvfi_named_csr_rdata[CSR_ADDR] = (!rvfi_csr_bypass) ? rvfi_``CSR_NAME``_csr_rdata : ``CSR_RDATA``; \ + assign rvfi_csr_if.rvfi_named_csr_wdata[CSR_ADDR] = (!rvfi_csr_bypass) ? rvfi_``CSR_NAME``_csr_wdata : ``CSR_WDATA``; \ + assign rvfi_csr_if.rvfi_named_csr_rmask[CSR_ADDR] = (!rvfi_csr_bypass) ? rvfi_``CSR_NAME``_csr_rmask : rvfi_``CSR_NAME``_csr_rmask_q; \ + assign rvfi_csr_if.rvfi_named_csr_wmask[CSR_ADDR] = (!rvfi_csr_bypass) ? rvfi_``CSR_NAME``_csr_wmask : rvfi_``CSR_NAME``_csr_wmask_q; \ + assign rvfi_``CSR_NAME``_csr_rmask_q = ((~csr_wr & csr_op_en_i & ~illegal_csr_insn_o & (csr_addr_i == CSR_ADDR)) CSR_RMASK) ? -1 : 0; \ + assign rvfi_``CSR_NAME``_csr_wmask_q = ((csr_wr & csr_op_en_i & ~illegal_csr_insn_o & (csr_addr_i == CSR_ADDR)) CSR_WMASK) ? -1 : 0; \ + always @(posedge clknrst_if.clk) begin \ + rvfi_``CSR_NAME``_csr_rdata = ``CSR_RDATA``; \ + rvfi_``CSR_NAME``_csr_wdata = ``CSR_WDATA``; \ + rvfi_``CSR_NAME``_csr_rmask = (rvfi_``CSR_NAME``_csr_rmask_q); \ + rvfi_``CSR_NAME``_csr_wmask = (rvfi_``CSR_NAME``_csr_wmask_q); \ + end + + `RVFI_CONNECT( CSR_MSTATUS, mstatus , mstatus_extended_read , mstatus_extended_write , , || mstatus_en) + `RVFI_CONNECT( CSR_MIE, mie , mie_q , mie_d , , || mie_en ) + `RVFI_CONNECT( CSR_MIP, mip , mip , csr_wdata_i , , ) + `RVFI_CONNECT( CSR_MISA, misa , MISA_VALUE , csr_wdata_i , , ) + `RVFI_CONNECT( CSR_MTVEC, mtvec , mtvec_q , mtvec_d , , || mtvec_en ) + `RVFI_CONNECT( CSR_MEPC, mepc , mepc_q , mepc_d , , || mepc_en ) + `RVFI_CONNECT( CSR_MCAUSE, mcause , mcause_q , mcause_d , , || mcause_en ) + `RVFI_CONNECT( CSR_MTVAL, mtval , mtval_q , mtval_d , , || mtval_en ) + `RVFI_CONNECT( CSR_MSTATUSH, mstatush , 'h0 , csr_wdata_i , , ) + `RVFI_CONNECT( CSR_DCSR, dcsr , dcsr_q , dcsr_d , , || dcsr_en) + `RVFI_CONNECT( CSR_DPC, dpc , depc_q , depc_d , , || depc_en) + `RVFI_CONNECT( CSR_DSCRATCH0, dscratch0 , dscratch0_q , csr_wdata_i , , || dscratch0_en) + `RVFI_CONNECT( CSR_DSCRATCH1, dscratch1 , dscratch1_q , csr_wdata_i , , || dscratch1_en) + +`endif //////////////// // Assertions // diff --git a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_fetch_fifo.sv b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_fetch_fifo.sv index 20b2b62b3..60933a030 100644 --- a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_fetch_fifo.sv +++ b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_fetch_fifo.sv @@ -225,7 +225,6 @@ module cve2_fetch_fifo #( end for (genvar i = 0; i < DEPTH; i++) begin : g_fifo_regs - begin : g_rdata always_ff @(posedge clk_i or negedge rst_ni) begin if (!rst_ni) begin rdata_q[i] <= '0; @@ -235,7 +234,6 @@ module cve2_fetch_fifo #( err_q[i] <= err_d[i]; end end - end end //////////////// diff --git a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_id_stage.sv b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_id_stage.sv index e989ff295..e7ebf52c2 100644 --- a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_id_stage.sv +++ b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_id_stage.sv @@ -755,6 +755,7 @@ module cve2_id_stage #( // Signal which instructions to count as retired in minstret, all traps along with ebrk and // ecall instructions are not counted. assign instr_perf_count_id_o = ~ebrk_insn & ~ecall_insn_dec & ~illegal_insn_dec & + ~(dret_insn_dec & ~debug_mode_o) & ~illegal_csr_insn_i & ~instr_fetch_err_i; // An instruction is ready to move to the writeback diff --git a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_pkg.sv b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_pkg.sv index 6cd8f4b1e..2e5b35352 100644 --- a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_pkg.sv +++ b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_pkg.sv @@ -574,5 +574,85 @@ package cve2_pkg; // alter this to point to their system specific configuration data structure. localparam logic [31:0] CSR_MCONFIGPTR_VALUE = 32'b0; + // RVFI CSR element + typedef struct packed { + bit [63:0] rdata; + bit [63:0] rmask; + bit [63:0] wdata; + bit [63:0] wmask; + } rvfi_csr_elmt_t; + + // RVFI CSR structure + typedef struct packed { + rvfi_csr_elmt_t fflags; + rvfi_csr_elmt_t frm; + rvfi_csr_elmt_t fcsr; + rvfi_csr_elmt_t ftran; + rvfi_csr_elmt_t dcsr; + rvfi_csr_elmt_t dpc; + rvfi_csr_elmt_t dscratch0; + rvfi_csr_elmt_t dscratch1; + rvfi_csr_elmt_t sstatus; + rvfi_csr_elmt_t sie; + rvfi_csr_elmt_t sip; + rvfi_csr_elmt_t stvec; + rvfi_csr_elmt_t scounteren; + rvfi_csr_elmt_t sscratch; + rvfi_csr_elmt_t sepc; + rvfi_csr_elmt_t scause; + rvfi_csr_elmt_t stval; + rvfi_csr_elmt_t satp; + rvfi_csr_elmt_t mstatus; + rvfi_csr_elmt_t mstatush; + rvfi_csr_elmt_t misa; + rvfi_csr_elmt_t medeleg; + rvfi_csr_elmt_t mideleg; + rvfi_csr_elmt_t mie; + rvfi_csr_elmt_t mtvec; + rvfi_csr_elmt_t mcounteren; + rvfi_csr_elmt_t mscratch; + rvfi_csr_elmt_t mepc; + rvfi_csr_elmt_t mcause; + rvfi_csr_elmt_t mtval; + rvfi_csr_elmt_t mip; + rvfi_csr_elmt_t menvcfg; + rvfi_csr_elmt_t menvcfgh; + rvfi_csr_elmt_t mvendorid; + rvfi_csr_elmt_t marchid; + rvfi_csr_elmt_t mhartid; + rvfi_csr_elmt_t mcountinhibit; + rvfi_csr_elmt_t mcycle; + rvfi_csr_elmt_t mcycleh; + rvfi_csr_elmt_t minstret; + rvfi_csr_elmt_t minstreth; + rvfi_csr_elmt_t cycle; + rvfi_csr_elmt_t cycleh; + rvfi_csr_elmt_t instret; + rvfi_csr_elmt_t instreth; + rvfi_csr_elmt_t dcache; + rvfi_csr_elmt_t icache; + rvfi_csr_elmt_t acc_cons; + rvfi_csr_elmt_t pmpcfg0; + rvfi_csr_elmt_t pmpcfg1; + rvfi_csr_elmt_t pmpcfg2; + rvfi_csr_elmt_t pmpcfg3; + rvfi_csr_elmt_t pmpaddr0; + rvfi_csr_elmt_t pmpaddr1; + rvfi_csr_elmt_t pmpaddr2; + rvfi_csr_elmt_t pmpaddr3; + rvfi_csr_elmt_t pmpaddr4; + rvfi_csr_elmt_t pmpaddr5; + rvfi_csr_elmt_t pmpaddr6; + rvfi_csr_elmt_t pmpaddr7; + rvfi_csr_elmt_t pmpaddr8; + rvfi_csr_elmt_t pmpaddr9; + rvfi_csr_elmt_t pmpaddr10; + rvfi_csr_elmt_t pmpaddr11; + rvfi_csr_elmt_t pmpaddr12; + rvfi_csr_elmt_t pmpaddr13; + rvfi_csr_elmt_t pmpaddr14; + rvfi_csr_elmt_t pmpaddr15; + } rvfi_csr_t; + endpackage diff --git a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_top_tracing.sv b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_top_tracing.sv index 989e83d7d..7589bc3a4 100644 --- a/hw/vendor/openhwgroup_cv32e20/rtl/cve2_top_tracing.sv +++ b/hw/vendor/openhwgroup_cv32e20/rtl/cve2_top_tracing.sv @@ -11,7 +11,6 @@ module cve2_top_tracing import cve2_pkg::*; #( parameter int unsigned MHPMCounterWidth = 40, parameter bit RV32E = 1'b0, parameter rv32m_e RV32M = RV32MFast, - parameter bit BranchPredictor = 1'b0, parameter int unsigned DmHaltAddr = 32'h1A110800, parameter int unsigned DmExceptionAddr = 32'h1A110808 ) ( @@ -112,7 +111,6 @@ module cve2_top_tracing import cve2_pkg::*; #( .MHPMCounterWidth ( MHPMCounterWidth ), .RV32E ( RV32E ), .RV32M ( RV32M ), - .BranchPredictor ( BranchPredictor ), .DmHaltAddr ( DmHaltAddr ), .DmExceptionAddr ( DmExceptionAddr ) ) u_cve2_top ( diff --git a/hw/vendor/openhwgroup_cv32e40p.core b/hw/vendor/openhwgroup_cv32e40p.core index f11d35c11..1d8a39f7f 100644 --- a/hw/vendor/openhwgroup_cv32e40p.core +++ b/hw/vendor/openhwgroup_cv32e40p.core @@ -64,3 +64,4 @@ targets: - files_rtl - ff_regfile - target_sim? (files_clk_gate) + - target_sim_sc? (files_clk_gate) diff --git a/hw/vendor/openhwgroup_cv32e40p.lock.hjson b/hw/vendor/openhwgroup_cv32e40p.lock.hjson index 00ea518e8..fcaf1619e 100644 --- a/hw/vendor/openhwgroup_cv32e40p.lock.hjson +++ b/hw/vendor/openhwgroup_cv32e40p.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/openhwgroup/cv32e40p.git - rev: c8d65849ec060c6f7bc62325b46ba0ab7eae8805 + rev: a43277c0dc64c02be3f5d713438f315ecffde2b9 } } diff --git a/hw/vendor/openhwgroup_cv32e40p.vendor.hjson b/hw/vendor/openhwgroup_cv32e40p.vendor.hjson index 462a2848b..9712edb0d 100644 --- a/hw/vendor/openhwgroup_cv32e40p.vendor.hjson +++ b/hw/vendor/openhwgroup_cv32e40p.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/openhwgroup/cv32e40p.git", - rev: "c8d65849ec060c6f7bc62325b46ba0ab7eae8805", + rev: "a43277c0dc64c02be3f5d713438f315ecffde2b9", }, patch_dir: "patches/openhwgroup_cv32e40p", diff --git a/hw/vendor/openhwgroup_cv32e40p/.gitignore b/hw/vendor/openhwgroup_cv32e40p/.gitignore index ef351b194..6c2cd7917 100644 --- a/hw/vendor/openhwgroup_cv32e40p/.gitignore +++ b/hw/vendor/openhwgroup_cv32e40p/.gitignore @@ -18,3 +18,11 @@ TAGS /build /Bender.lock /Bender.local +golden_reference_design +ref_design +golden.src +revised.src +cadence_conformal +synopsys_formality +questa_autocheck +reports diff --git a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_instr_trace.svh b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_instr_trace.svh index 8c4ae99f3..355bc7382 100644 --- a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_instr_trace.svh +++ b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_instr_trace.svh @@ -1,23 +1,37 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at +// This file, and derivatives thereof are licensed under the +// Solderpad License, Version 2.0 (the "License"). // -// https://solderpad.org/licenses/ +// Use of this file means you agree to the terms and conditions +// of the license and are in full compliance with the License. +// +// You may obtain a copy of the License at: +// +// https://solderpad.org/licenses/SHL-2.0/ // // Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// and hardware implementations thereof distributed under the License +// is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS +// OF ANY KIND, EITHER EXPRESSED OR IMPLIED. +// // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// Tracer data structures and functions -// -// Contributors: Steve Richmond, Silicon Labs -// Pascal Gouedo, Dolphin Design +//////////////////////////////////////////////////////////////////////////////// +// Engineer: Steve Richmond - steve.richmond@silabs.com // +// // +// Design Name: cv32e40p_tracer data structures // +// Project Name: CV32E40P // +// Language: SystemVerilog // +// // +// Description: Moves the class definition for instr_trace_t out of the // +// tracer module for readability and code partitioning // +// // +// Includes various enhancements to make the instr_trace_t // +// class more comprehensive // +// // +//////////////////////////////////////////////////////////////////////////////// typedef struct { logic [5:0] addr; @@ -35,9 +49,13 @@ typedef struct { class instr_trace_t; time simtime; + time stoptime; + bit external_time; int cycles; + int stopcycles; logic [31:0] pc; logic [31:0] instr; + string ctx; //Used to add context in the trace log file (Canceled, debug, interrput,....) bit compressed; bit wb_bypass; bit misaligned; @@ -56,10 +74,15 @@ class instr_trace_t; regs_read = {}; regs_write = {}; mem_access = {}; + external_time = 0; + stoptime = 0; + stopcycles = 0; endfunction function void init(int unsigned cycles, bit [31:0] pc, bit compressed, bit [31:0] instr); - this.simtime = $time; + if(!this.external_time) begin + this.simtime = $time; + end this.cycles = cycles; this.pc = pc; this.compressed = compressed; @@ -308,7 +331,23 @@ class instr_trace_t; begin string insn_str; // Accumulate writes into a single string to enable single $fwrite - insn_str = $sformatf("%t %15d %h %h %-36s", simtime, cycles, pc, instr, str); + if(simtime < 100ns) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 1us) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 10us) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 100us) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 1ms) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 10ms) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else if (simtime < 100ms) begin + insn_str = $sformatf(" %t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end else begin + insn_str = $sformatf("%t %15d %h %h %-3s %-36s", simtime, cycles, pc, instr, ctx, str); + end foreach (regs_write[i]) begin if (regs_write[i].addr != 0) @@ -330,6 +369,12 @@ class instr_trace_t; insn_str = $sformatf("%s PA:%08x", insn_str, mem_acc.addr); end + casex (instr) + INSTR_FDIV: insn_str = $sformatf("%s %15d %t", insn_str, stopcycles, stoptime); + INSTR_FSQRT:insn_str = $sformatf("%s %15d %t", insn_str, stopcycles, stoptime); + default: ; + endcase + $fwrite(f, "%s\n", insn_str); end endfunction @@ -489,7 +534,7 @@ class instr_trace_t; begin mnemonic = {compressed ? "c." : "", mnemonic}; regs_read.push_back('{rs1, rs1_value, 0}); - str = $sformatf("%-16s %s, %0d", mnemonic, regAddrToStr(rs1), $signed(imm_sb_type)); + str = $sformatf("%-16s %s, %0d, %0d", mnemonic, regAddrToStr(rs1), $signed(imm_s2_type), $signed(imm_sb_type)); end endfunction // printSBInstr @@ -587,14 +632,14 @@ class instr_trace_t; // immediate post-incremented load regs_read.push_back('{rs1, rs1_value, 0}); regs_write.push_back('{rs1, 'x, 0}); - str = $sformatf("cv.%-13s %s, %0d(x%0d!)", mnemonic, regAddrToStr(rd), $signed(imm_i_type), rs1); + str = $sformatf("cv.%-13s %s, (x%0d), %0d", mnemonic, regAddrToStr(rd), rs1, $signed(imm_i_type)); end else if (instr[6:0] == OPCODE_CUSTOM_1) begin if (instr[27] == 1'b0) begin // reg-reg post-incremented load regs_read.push_back('{rs2, rs2_value, 0}); regs_read.push_back('{rs1, rs1_value, 0}); regs_write.push_back('{rs1, 'x, 0}); - str = $sformatf("cv.%-13s %s, %s(x%0d!)", mnemonic, regAddrToStr(rd), regAddrToStr(rs2), rs1); + str = $sformatf("cv.%-13s %s, (x%0d), %s", mnemonic, regAddrToStr(rd), rs1, regAddrToStr(rs2)); end else begin // reg-reg indexed load regs_read.push_back('{rs2, rs2_value, 0}); @@ -637,7 +682,7 @@ class instr_trace_t; regs_read.push_back('{rs2, rs2_value, 0}); regs_read.push_back('{rs1, rs1_value, 0}); regs_write.push_back('{rs1, 'x, 0}); - str = $sformatf("cv.%-14s %s, %0d(x%0d!)", mnemonic, regAddrToStr(rs2), $signed(imm_s_type), rs1); + str = $sformatf("cv.%-14s %s, (x%0d), %0d", mnemonic, regAddrToStr(rs2), rs1, $signed(imm_s_type)); end else if (instr[31:28] == 4'b0010) begin if (instr[27] == 1'b0) begin // reg-reg post-incremented store @@ -645,7 +690,7 @@ class instr_trace_t; regs_read.push_back('{rs3, rs3_value, 0}); regs_read.push_back('{rs1, rs1_value, 0}); regs_write.push_back('{rs1, 'x, 0}); - str = $sformatf("cv.%-13s %s, %s(x%0d!)", mnemonic, regAddrToStr(rs2), regAddrToStr(rs3), rs1); + str = $sformatf("cv.%-13s %s, (x%0d), %s", mnemonic, regAddrToStr(rs2), rs1, regAddrToStr(rs3)); end else begin // reg-reg indexed store regs_read.push_back('{rs2, rs2_value, 0}); @@ -679,22 +724,22 @@ class instr_trace_t; // decode and print instruction case (instr[11:8]) // cv.starti, cv.endi - 4'b0000, 4'b0010: str = $sformatf("%-16s %d, 0x%0x", mnemonic, rd[0], imm_iz_type); + 4'b0000, 4'b0010: str = $sformatf("%-16s %d, 0x%0x", mnemonic, instr[7], imm_iz_type); // cv.counti - 4'b0100: str = $sformatf("%-16s %d, %d", mnemonic, rd[0], imm_iz_type); + 4'b0100: str = $sformatf("%-16s %d, %d", mnemonic, instr[7], imm_iz_type); // cv.start, cv.end, cv.count 4'b0001, 4'b0011, 4'b0101: begin regs_read.push_back('{rs1, rs1_value, 0}); - str = $sformatf("%-16s %d, %s", mnemonic, rd[0], regAddrToStr(rs1)); + str = $sformatf("%-16s %d, %s", mnemonic, instr[7], regAddrToStr(rs1)); end // cv.setupi 4'b0110: begin - str = $sformatf("%-16s %d, %d, 0x%0x", mnemonic, rd[0], imm_iz_type, rs1); + str = $sformatf("%-16s %d, %d, 0x%0x", mnemonic, instr[7], imm_iz_type, rs1); end // cv.setup 4'b0111: begin regs_read.push_back('{rs1, rs1_value, 0}); - str = $sformatf("%-16s %d, %s, 0x%0x", mnemonic, rd[0], regAddrToStr(rs1), imm_iz_type); + str = $sformatf("%-16s %d, %s, 0x%0x", mnemonic, instr[7], regAddrToStr(rs1), imm_iz_type); end endcase end @@ -757,238 +802,429 @@ class instr_trace_t; else str_hb = ".h"; // set mnemonic - case (instr[31:26]) - 6'b000000: begin + case (instr) + INSTR_CVADDH , + INSTR_CVADDSCH , + INSTR_CVADDSCIH, + INSTR_CVADDB , + INSTR_CVADDSCB , + INSTR_CVADDSCIB : begin mnemonic = "cv.add"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000010: begin + INSTR_CVSUBH , + INSTR_CVSUBSCH , + INSTR_CVSUBSCIH, + INSTR_CVSUBB , + INSTR_CVSUBSCB , + INSTR_CVSUBSCIB : begin mnemonic = "cv.sub"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000100: begin + INSTR_CVAVGH , + INSTR_CVAVGSCH , + INSTR_CVAVGSCIH , + INSTR_CVAVGB , + INSTR_CVAVGSCB , + INSTR_CVAVGSCIB : begin mnemonic = "cv.avg"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000110: begin + INSTR_CVAVGUH , + INSTR_CVAVGUSCH , + INSTR_CVAVGUSCIH, + INSTR_CVAVGUB , + INSTR_CVAVGUSCB , + INSTR_CVAVGUSCIB : begin mnemonic = "cv.avgu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b001000: begin + INSTR_CVMINH , + INSTR_CVMINSCH , + INSTR_CVMINSCIH, + INSTR_CVMINB , + INSTR_CVMINSCB , + INSTR_CVMINSCIB : begin mnemonic = "cv.min"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001010: begin + INSTR_CVMINUH , + INSTR_CVMINUSCH , + INSTR_CVMINUSCIH, + INSTR_CVMINUB , + INSTR_CVMINUSCB , + INSTR_CVMINUSCIB : begin mnemonic = "cv.minu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b001100: begin + INSTR_CVMAXH , + INSTR_CVMAXSCH , + INSTR_CVMAXSCIH , + INSTR_CVMAXB , + INSTR_CVMAXSCB , + INSTR_CVMAXSCIB : begin mnemonic = "cv.max"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001110: begin + INSTR_CVMAXUH , + INSTR_CVMAXUSCH , + INSTR_CVMAXUSCIH , + INSTR_CVMAXUB , + INSTR_CVMAXUSCB , + INSTR_CVMAXUSCIB : begin mnemonic = "cv.maxu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b010000: begin + INSTR_CVSRLH , + INSTR_CVSRLSCH , + INSTR_CVSRLSCIH , + INSTR_CVSRLB , + INSTR_CVSRLSCB , + INSTR_CVSRLSCIB : begin mnemonic = "cv.srl"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b010010: begin + INSTR_CVSRAH , + INSTR_CVSRASCH , + INSTR_CVSRASCIH, + INSTR_CVSRAB , + INSTR_CVSRASCB , + INSTR_CVSRASCIB : begin mnemonic = "cv.sra"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b010100: begin + INSTR_CVSLLH , + INSTR_CVSLLSCH , + INSTR_CVSLLSCIH, + INSTR_CVSLLB , + INSTR_CVSLLSCB , + INSTR_CVSLLSCIB : begin mnemonic = "cv.sll"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b010110: begin + INSTR_CVORH , + INSTR_CVORSCH , + INSTR_CVORSCIH, + INSTR_CVORB , + INSTR_CVORSCB , + INSTR_CVORSCIB : begin mnemonic = "cv.or"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b011000: begin + INSTR_CVXORH , + INSTR_CVXORSCH , + INSTR_CVXORSCIH , + INSTR_CVXORB , + INSTR_CVXORSCB , + INSTR_CVXORSCIB : begin mnemonic = "cv.xor"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b011010: begin + INSTR_CVANDH , + INSTR_CVANDSCH , + INSTR_CVANDSCIH , + INSTR_CVANDB , + INSTR_CVANDSCB , + INSTR_CVANDSCIB : begin mnemonic = "cv.and"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b011100: begin + INSTR_CVABSH, + INSTR_CVABSB : begin mnemonic = "cv.abs"; str_imm = $sformatf("0x%0h", imm_vs_type); end // dot products - 6'b100000: begin + INSTR_CVDOTUPH , + INSTR_CVDOTUPSCH , + INSTR_CVDOTUPSCIH, + INSTR_CVDOTUPB , + INSTR_CVDOTUPSCB , + INSTR_CVDOTUPSCIB : begin mnemonic = "cv.dotup"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b100010: begin + INSTR_CVDOTUSPH , + INSTR_CVDOTUSPSCH , + INSTR_CVDOTUSPSCIH, + INSTR_CVDOTUSPB , + INSTR_CVDOTUSPSCB , + INSTR_CVDOTUSPSCIB : begin mnemonic = "cv.dotusp"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b100100: begin + INSTR_CVDOTSPH , + INSTR_CVDOTSPSCH , + INSTR_CVDOTSPSCIH, + INSTR_CVDOTSPB , + INSTR_CVDOTSPSCB , + INSTR_CVDOTSPSCIB : begin mnemonic = "cv.dotsp"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b100110: begin + INSTR_CVSDOTUPH , + INSTR_CVSDOTUPSCH , + INSTR_CVSDOTUPSCIH, + INSTR_CVSDOTUPB , + INSTR_CVSDOTUPSCB , + INSTR_CVSDOTUPSCIB : begin mnemonic = "cv.sdotup"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b101000: begin + INSTR_CVSDOTUSPH , + INSTR_CVSDOTUSPSCH , + INSTR_CVSDOTUSPSCIH, + INSTR_CVSDOTUSPB , + INSTR_CVSDOTUSPSCB , + INSTR_CVSDOTUSPSCIB : begin mnemonic = "cv.sdotusp"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b101010: begin + INSTR_CVSDOTSPH , + INSTR_CVSDOTSPSCH , + INSTR_CVSDOTSPSCIH, + INSTR_CVSDOTSPB , + INSTR_CVSDOTSPSCB , + INSTR_CVSDOTSPSCIB : begin mnemonic = "cv.sdotsp"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b101110: begin - case (instr[14:13]) - 2'b00 : begin - mnemonic = "cv.extract"; - str_imm = $sformatf("0x%0h", imm_vs_type); - end - 2'b01 : begin - mnemonic = "cv.extractu"; - str_imm = $sformatf("0x%0h", imm_vu_type); - end - 2'b10 : begin - mnemonic = "cv.insert"; - str_imm = $sformatf("0x%0h", imm_vs_type); - end - endcase - str_sci = ""; + INSTR_CVEXTRACTH, + INSTR_CVEXTRACTB : begin + mnemonic = "cv.extract"; + str_imm = $sformatf("0x%0h", imm_vs_type); + str_sci = ""; + end + INSTR_CVEXTRACTUH, + INSTR_CVEXTRACTUB : begin + mnemonic = "cv.extractu"; + str_imm = $sformatf("0x%0h", imm_vu_type); + str_sci = ""; end - + INSTR_CVINSERTH, + INSTR_CVINSERTB : begin + mnemonic = "cv.insert"; + str_imm = $sformatf("0x%0h", imm_vs_type); + str_sci = ""; + end + // shuffle/pack - 6'b110000: begin - if (instr[14:12] == 3'b111) begin - mnemonic = "cv.shuffleI0"; - str_imm = $sformatf("0x%8h", imm_shuffle_type); - end else begin + INSTR_CVSHUFFLEH , + INSTR_CVSHUFFLESCIH, + INSTR_CVSHUFFLEB : begin mnemonic = "cv.shuffle"; if (instr[14:12] == 3'b110) begin str_imm = $sformatf("0x%8h", imm_shuffle_type); end - end end - 6'b110010: begin + + INSTR_CVSHUFFLEL0SCIB : begin + mnemonic = "cv.shuffleI0"; + str_imm = $sformatf("0x%8h", imm_shuffle_type); + end + INSTR_CVSHUFFLEL1SCIB : begin mnemonic = "cv.shuffleI1"; str_imm = $sformatf("0x%8h", imm_shuffle_type); end - 6'b110100: begin + INSTR_CVSHUFFLEL2SCIB : begin mnemonic = "cv.shuffleI2"; str_imm = $sformatf("0x%8h", imm_shuffle_type); end - 6'b110110: begin + INSTR_CVSHUFFLEL3SCIB : begin mnemonic = "cv.shuffleI3"; str_imm = $sformatf("0x%8h", imm_shuffle_type); end - 6'b111000: begin + INSTR_CVSHUFFLE2H, + INSTR_CVSHUFFLE2B : begin mnemonic = "cv.shuffle2"; end - 6'b111100: begin + INSTR_CVPACK, + INSTR_CVPACKH : begin mnemonic = "cv.pack"; if (instr[25] == 1'b0) begin str_hb = ""; end end - 6'b111110: begin - mnemonic = instr[25] ? "cv.packhi" : "cv.packlo"; - end + INSTR_CVPACKHIB : mnemonic = "cv.packhi"; + INSTR_CVPACKLOB : mnemonic = "cv.packlo"; // comparisons - 6'b000001: begin + INSTR_CVCMPEQH , + INSTR_CVCMPEQSCH , + INSTR_CVCMPEQSCIH, + INSTR_CVCMPEQB , + INSTR_CVCMPEQSCB , + INSTR_CVCMPEQSCIB : begin mnemonic = "cv.cmpeq"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000011: begin + INSTR_CVCMPNEH , + INSTR_CVCMPNESCH , + INSTR_CVCMPNESCIH, + INSTR_CVCMPNEB , + INSTR_CVCMPNESCB , + INSTR_CVCMPNESCIB : begin mnemonic = "cv.cmpne"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000101: begin + INSTR_CVCMPGTH , + INSTR_CVCMPGTSCH , + INSTR_CVCMPGTSCIH, + INSTR_CVCMPGTB , + INSTR_CVCMPGTSCB , + INSTR_CVCMPGTSCIB : begin mnemonic = "cv.cmpgt"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b000111: begin + INSTR_CVCMPGEH , + INSTR_CVCMPGESCH , + INSTR_CVCMPGESCIH, + INSTR_CVCMPGEB , + INSTR_CVCMPGESCB , + INSTR_CVCMPGESCIB : begin mnemonic = "cv.cmpge"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001001: begin + INSTR_CVCMPLTH , + INSTR_CVCMPLTSCH , + INSTR_CVCMPLTSCIH, + INSTR_CVCMPLTB , + INSTR_CVCMPLTSCB , + INSTR_CVCMPLTSCIB : begin mnemonic = "cv.cmplt"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001011: begin + INSTR_CVCMPLEH , + INSTR_CVCMPLESCH , + INSTR_CVCMPLESCIH, + INSTR_CVCMPLEB , + INSTR_CVCMPLESCB , + INSTR_CVCMPLESCIB : begin mnemonic = "cv.cmple"; str_imm = $sformatf("0x%0h", imm_vs_type); end - 6'b001101: begin + INSTR_CVCMPGTUH , + INSTR_CVCMPGTUSCH , + INSTR_CVCMPGTUSCIH, + INSTR_CVCMPGTUB , + INSTR_CVCMPGTUSCB , + INSTR_CVCMPGTUSCIB : begin mnemonic = "cv.cmpgtu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b001111: begin + INSTR_CVCMPGEUH , + INSTR_CVCMPGEUSCH , + INSTR_CVCMPGEUSCIH, + INSTR_CVCMPGEUB , + INSTR_CVCMPGEUSCB , + INSTR_CVCMPGEUSCIB : begin mnemonic = "cv.cmpgeu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b010001: begin + INSTR_CVCMPLTUH , + INSTR_CVCMPLTUSCH , + INSTR_CVCMPLTUSCIH, + INSTR_CVCMPLTUB , + INSTR_CVCMPLTUSCB , + INSTR_CVCMPLTUSCIB : begin mnemonic = "cv.cmpltu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b010011: begin + INSTR_CVCMPLEUH , + INSTR_CVCMPLEUSCH , + INSTR_CVCMPLEUSCIH, + INSTR_CVCMPLEUB , + INSTR_CVCMPLEUSCB , + INSTR_CVCMPLEUSCIB : begin mnemonic = "cv.cmpleu"; str_imm = $sformatf("0x%0h", imm_vu_type); end - 6'b010101: begin - unique case (instr[14:13]) - 2'b00: mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r" : "cv.cplxmul.i"; - 2'b01: mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div2" : "cv.cplxmul.i.div2"; - 2'b10: mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div4" : "cv.cplxmul.i.div4"; - 2'b11: mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div8" : "cv.cplxmul.i.div8"; - endcase + INSTR_CVCPLXMULR, + INSTR_CVCPLXMULI : begin + mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r" : "cv.cplxmul.i"; str_sci = ""; str_hb = ""; end - - 6'b010111: begin - mnemonic = "cv.cplxconj"; - str_sci = ""; + INSTR_CVCPLXMULRDIV2, + INSTR_CVCPLXMULIDIV2 : begin + mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div2" : "cv.cplxmul.i.div2"; + str_sci = ""; str_hb = ""; end - - 6'b011001: begin - unique case (instr[14:13]) - 2'b00: mnemonic = "cv.subrotmj"; - 2'b01: mnemonic = "cv.subrotmj.div2"; - 2'b10: mnemonic = "cv.subrotmj.div4"; - 2'b11: mnemonic = "cv.subrotmj.div8"; - endcase + INSTR_CVCPLXMULRDIV4, + INSTR_CVCPLXMULIDIV4 : begin + mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div4" : "cv.cplxmul.i.div4"; str_sci = ""; str_hb = ""; end - - 6'b011011: begin - unique case (instr[14:13]) - 2'b01: mnemonic = "cv.add.div2"; - 2'b10: mnemonic = "cv.add.div4"; - 2'b11: mnemonic = "cv.add.div8"; - endcase + INSTR_CVCPLXMULRDIV8, + INSTR_CVCPLXMULIDIV8 : begin + mnemonic = instr[25] == 1'b0 ? "cv.cplxmul.r.div8" : "cv.cplxmul.i.div8"; str_sci = ""; str_hb = ""; end - 6'b011101: begin - unique case (instr[14:13]) - 2'b01: mnemonic = "cv.sub.div2"; - 2'b10: mnemonic = "cv.sub.div4"; - 2'b11: mnemonic = "cv.sub.div8"; - endcase - str_sci = ""; + INSTR_CVCPLXCONJ : begin + mnemonic = "cv.cplxconj"; + str_sci = ""; str_hb = ""; end + INSTR_CVSUBROTMJ : begin + mnemonic = "cv.subrotmj"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBROTMJDIV2 : begin + mnemonic = "cv.subrotmj.div2"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBROTMJDIV4 : begin + mnemonic = "cv.subrotmj.div4"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBROTMJDIV8 : begin + mnemonic = "cv.subrotmj.div8"; + str_sci = ""; + str_hb = ""; + end + + INSTR_CVADDIV2 : begin + mnemonic = "cv.add.div2"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVADDIV4 : begin + mnemonic = "cv.add.div4"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVADDIV8 : begin + mnemonic = "cv.add.div8"; + str_sci = ""; + str_hb = ""; + end + + INSTR_CVSUBIV2 : begin + mnemonic = "cv.sub.div2"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBIV4 : begin + mnemonic = "cv.sub.div4"; + str_sci = ""; + str_hb = ""; + end + INSTR_CVSUBIV8 : begin + mnemonic = "cv.sub.div8"; + str_sci = ""; + str_hb = ""; + end + default: begin printMnemonic("INVALID"); return; diff --git a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_rvfi.sv b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_rvfi.sv index ca158f969..99429d90d 100644 --- a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_rvfi.sv +++ b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_rvfi.sv @@ -1,24 +1,28 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// CV32E40P RVFI interface -// -// Contributors: Davide Schiavone, OpenHW Group -// Halfdan Bechmann, Silicon Labs -// Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Davide Schiavone, OpenHW Group // +// Halfdan Bechmann, Silicon Labs // +// Yoann Pruvost, Dolphin Design // +// // +// Description: CV32E40P RVFI interface // +// // +//////////////////////////////////////////////////////////////////////////////////// `include "cv32e40p_rvfi_pkg.sv" @@ -27,7 +31,8 @@ module cv32e40p_rvfi import cv32e40p_rvfi_pkg::*; #( parameter FPU = 0, - parameter ZFINX = 0 + parameter ZFINX = 0, + parameter NUM_MHPMCOUNTERS = 1 ) ( input logic clk_i, input logic rst_ni, @@ -73,6 +78,10 @@ module cv32e40p_rvfi input logic is_compressed_id_i, input logic ebrk_insn_dec_i, + input logic ecall_insn_dec_i, + + input logic mret_insn_dec_i, + input logic mret_dec_i, input logic [5:0] csr_cause_i, @@ -126,6 +135,9 @@ module cv32e40p_rvfi input logic [31:0] data_wdata_ex_i, input logic lsu_split_q_ex_i, + input logic mult_ready_i, + input logic alu_ready_i, + //// WB probes //// input logic [31:0] pc_wb_i, input logic wb_ready_i, @@ -202,6 +214,7 @@ module cv32e40p_rvfi input logic csr_we_i, input logic [31:0] csr_wdata_int_i, + input logic csr_fregs_we_i, input logic csr_jvt_we_i, input Status_t csr_mstatus_n_i, input Status_t csr_mstatus_q_i, @@ -282,7 +295,7 @@ module cv32e40p_rvfi // performance counters // cycle, instret, hpcounter, cycleh, instreth, hpcounterh // mcycle, minstret, mhpcounter, mcycleh, minstreth, mhpcounterh - input logic [31:0][MHPMCOUNTER_WIDTH-1:0] csr_mhpmcounter_q_i, + input logic [63:0][MHPMCOUNTER_WIDTH-1:0] csr_mhpmcounter_q_i, input logic [31:0] csr_mhpmcounter_write_lower_i, input logic [31:0] csr_mhpmcounter_write_upper_i, @@ -319,6 +332,10 @@ module cv32e40p_rvfi // the convention of RISC-V Formal Interface Specification. output logic [ 0:0] rvfi_valid, output logic [63:0] rvfi_order, + output integer rvfi_start_cycle, + output time rvfi_start_time, + output integer rvfi_stop_cycle, + output time rvfi_stop_time, output logic [31:0] rvfi_insn, output rvfi_trap_t rvfi_trap, output logic [ 0:0] rvfi_halt, @@ -338,6 +355,7 @@ module cv32e40p_rvfi output logic rvfi_frd_wvalid [1:0], output logic [ 4:0] rvfi_frd_addr [1:0], output logic [31:0] rvfi_frd_wdata [1:0], + output logic rvfi_2_rd, output logic [ 4:0] rvfi_rs1_addr, output logic [ 4:0] rvfi_rs2_addr, output logic [ 4:0] rvfi_rs3_addr, @@ -358,8 +376,8 @@ module cv32e40p_rvfi output logic [31:0] rvfi_pc_wdata, output logic [31:0] rvfi_mem_addr, - output logic [ 3:0] rvfi_mem_rmask, - output logic [ 3:0] rvfi_mem_wmask, + output logic [31:0] rvfi_mem_rmask, + output logic [31:0] rvfi_mem_wmask, output logic [31:0] rvfi_mem_rdata, output logic [31:0] rvfi_mem_wdata, @@ -610,6 +628,13 @@ module cv32e40p_rvfi bit clk_i_d; assign #0.01 clk_i_d = clk_i; + integer cycles; + // cycle counter + always_ff @(posedge clk_i_d, negedge rst_ni) begin + if (rst_ni == 1'b0) cycles <= 0; + else cycles <= cycles + 1; + end + logic pc_mux_debug; logic pc_mux_dret; logic pc_mux_exception; @@ -617,6 +642,11 @@ module cv32e40p_rvfi logic pc_mux_interrupt; logic pc_mux_nmi; + localparam logic [31:0] MSTATUS_WRITE_MASK = 32'h0000_6088; + localparam logic [31:0] MCOUNTINHIBIT_WRITE_MASK = {{(29-NUM_MHPMCOUNTERS){1'b0}}, {(NUM_MHPMCOUNTERS){1'b1}}, 3'b101}; + localparam NUM_HPM_EVENTS = 16; + localparam logic [31:0] MHPMEVENT_WRITE_MASK = {{(31-NUM_HPM_EVENTS){1'b0}}, {(NUM_HPM_EVENTS){1'b1}}}; + `include "pipe_freeze_trace.sv" `include "insn_trace.sv" @@ -633,6 +663,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; logic [2:0] saved_debug_cause; integer next_send; + event e_empty_queue; function void empty_fifo(); integer i, trace_q_size; trace_q_size = wb_bypass_trace_q.size(); @@ -648,6 +679,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; new_rvfi_trace.m_csr.mstatus_fs_rdata = r_pipe_freeze_trace.csr.mstatus_fs_n; rvfi_trace_q.push_back(new_rvfi_trace); next_send = next_send + 1; + ->e_empty_queue; end else begin wb_bypass_trace_q.push_back(new_rvfi_trace); end @@ -658,6 +690,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; /* * Function used to alocate a new insn and send it to the rvfi driver */ + event e_add_to_bypass; function void send_rvfi(insn_trace_t m_wb_insn); insn_trace_t new_rvfi_trace; new_rvfi_trace = new(); @@ -667,6 +700,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; next_send = next_send + 1; end else begin wb_bypass_trace_q.push_back(new_rvfi_trace); + ->e_add_to_bypass; end empty_fifo(); endfunction @@ -733,6 +767,10 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end rvfi_order = new_rvfi_trace.m_order; + rvfi_start_cycle = new_rvfi_trace.m_start_cycle; + rvfi_start_time = new_rvfi_trace.m_start_time; + rvfi_stop_cycle = new_rvfi_trace.m_stop_cycle; + rvfi_stop_time = new_rvfi_trace.m_stop_time; rvfi_pc_rdata = new_rvfi_trace.m_pc_rdata; rvfi_insn = new_rvfi_trace.m_insn; @@ -787,6 +825,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; rvfi_frd_addr[1] = '0; rvfi_frd_wdata[1] = '0; + rvfi_2_rd = new_rvfi_trace.m_2_rd_insn; if (new_rvfi_trace.m_rd_addr[0][5] == 1'b0) begin rvfi_rd_addr[0] = new_rvfi_trace.m_rd_addr[0][4:0]; rvfi_rd_wdata[0] = new_rvfi_trace.m_rd_wdata[0]; @@ -837,7 +876,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; //CSR rvfi_csr_mstatus_rmask = new_rvfi_trace.m_csr.mstatus_rmask | new_rvfi_trace.m_csr.mstatus_fs_rmask; - rvfi_csr_mstatus_wmask = new_rvfi_trace.m_csr.mstatus_wmask; + rvfi_csr_mstatus_wmask = new_rvfi_trace.m_csr.mstatus_wmask & MSTATUS_WRITE_MASK; rvfi_csr_mstatus_wmask[31] = new_rvfi_trace.m_csr.mstatus_fs_wmask[31]; rvfi_csr_mstatus_wmask[14:13] = new_rvfi_trace.m_csr.mstatus_fs_wmask[14:13]; @@ -870,7 +909,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end rvfi_csr_mstatus_wdata[30:18] = '0; // MPRV is not implemented in the target configuration, writes to it are ignored - rvfi_csr_mstatus_wdata[17] = 1'b0;//new_rvfi_trace.m_csr.mstatus_wdata.mprv; + rvfi_csr_mstatus_wdata[17] = 1'b0; //new_rvfi_trace.m_csr.mstatus_wdata.mprv; rvfi_csr_mstatus_wdata[16:15] = '0; if (FPU == 1 && ZFINX == 0) begin rvfi_csr_mstatus_wdata[14:13] = new_rvfi_trace.m_csr.mstatus_fs_wdata; @@ -882,24 +921,59 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; rvfi_csr_mstatus_wdata[7] = new_rvfi_trace.m_csr.mstatus_wdata.mpie; rvfi_csr_mstatus_wdata[6:5] = '0; // UPIE is not implemented in the target configuration, writes to it are ignored - rvfi_csr_mstatus_wdata[4] = 1'b0;//new_rvfi_trace.m_csr.mstatus_wdata.upie; + rvfi_csr_mstatus_wdata[4] = 1'b0; //new_rvfi_trace.m_csr.mstatus_wdata.upie; rvfi_csr_mstatus_wdata[3] = new_rvfi_trace.m_csr.mstatus_wdata.mie; rvfi_csr_mstatus_wdata[2:1] = '0; // UIE is not implemented in the target configuration, writes to it are ignored - rvfi_csr_mstatus_wdata[0] = 1'b0;//new_rvfi_trace.m_csr.mstatus_wdata.uie; + rvfi_csr_mstatus_wdata[0] = 1'b0; //new_rvfi_trace.m_csr.mstatus_wdata.uie; `SET_RVFI_CSR_FROM_INSN(misa) `SET_RVFI_CSR_FROM_INSN(mie) `SET_RVFI_CSR_FROM_INSN(mtvec) - `SET_RVFI_CSR_FROM_INSN(mcountinhibit) + + rvfi_csr_mcountinhibit_rdata = new_rvfi_trace.m_csr.mcountinhibit_rdata; + rvfi_csr_mcountinhibit_rmask = new_rvfi_trace.m_csr.mcountinhibit_rmask; + rvfi_csr_mcountinhibit_wdata = new_rvfi_trace.m_csr.mcountinhibit_wdata; + rvfi_csr_mcountinhibit_wmask = new_rvfi_trace.m_csr.mcountinhibit_wmask & MCOUNTINHIBIT_WRITE_MASK; + `SET_RVFI_CSR_FROM_INSN(mscratch) `SET_RVFI_CSR_FROM_INSN(mepc) `SET_RVFI_CSR_FROM_INSN(mcause) + `SET_RVFI_CSR_FROM_INSN(mcycle) `SET_RVFI_CSR_FROM_INSN(minstret) + `SET_RVFI_CSR_FROM_INSN(minstreth) + + // `SET_RVFI_CSR_FROM_INSN(cycle) + // `SET_RVFI_CSR_FROM_INSN(instret) + rvfi_csr_instret_rdata = new_rvfi_trace.m_csr.minstret_rdata; + rvfi_csr_instret_rmask = new_rvfi_trace.m_csr.minstret_rmask; + rvfi_csr_instret_wdata = new_rvfi_trace.m_csr.minstret_wdata; + rvfi_csr_instret_wmask = new_rvfi_trace.m_csr.minstret_wmask; + + for(int idx=3; idx<32; idx++) begin + rvfi_csr_mhpmcounter_rmask[idx] = new_rvfi_trace.m_csr.mhpmcounter_rmask[idx][31:0]; + rvfi_csr_mhpmcounter_wmask[idx] = new_rvfi_trace.m_csr.mhpmcounter_wmask[idx][31:0]; + rvfi_csr_mhpmcounter_rdata[idx] = new_rvfi_trace.m_csr.mhpmcounter_rdata[idx][31:0]; + rvfi_csr_mhpmcounter_wdata[idx] = new_rvfi_trace.m_csr.mhpmcounter_wdata[idx][31:0]; + + rvfi_csr_mhpmcounterh_rmask[idx] = new_rvfi_trace.m_csr.mhpmcounter_rmask[idx][63:32]; + rvfi_csr_mhpmcounterh_wmask[idx] = new_rvfi_trace.m_csr.mhpmcounter_wmask[idx][63:32]; + rvfi_csr_mhpmcounterh_rdata[idx] = new_rvfi_trace.m_csr.mhpmcounter_rdata[idx][63:32]; + rvfi_csr_mhpmcounterh_wdata[idx] = new_rvfi_trace.m_csr.mhpmcounter_wdata[idx][63:32]; + + rvfi_csr_mhpmevent_rmask[idx] = new_rvfi_trace.m_csr.mhpmevent_rmask[idx]; + rvfi_csr_mhpmevent_wmask[idx] = new_rvfi_trace.m_csr.mhpmevent_wmask[idx] & MHPMEVENT_WRITE_MASK; + rvfi_csr_mhpmevent_rdata[idx] = new_rvfi_trace.m_csr.mhpmevent_rdata[idx]; + rvfi_csr_mhpmevent_wdata[idx] = new_rvfi_trace.m_csr.mhpmevent_wdata[idx]; + end + // `SET_RVFI_CSR_FROM_INSN(instreth) + rvfi_csr_instreth_rdata = new_rvfi_trace.m_csr.minstreth_rdata; + rvfi_csr_instreth_rmask = new_rvfi_trace.m_csr.minstreth_rmask; + rvfi_csr_instreth_wdata = new_rvfi_trace.m_csr.minstreth_wdata; + rvfi_csr_instreth_wmask = new_rvfi_trace.m_csr.minstreth_wmask; + `SET_RVFI_CSR_FROM_INSN(mip) - // if(rvfi_order == 64'h00000000_00000167) begin - // rvfi_csr_mip_rdata = 32'h0010_0000; - // end + rvfi_csr_tdata_rdata[0] = 'Z; rvfi_csr_tdata_rmask[0] = '0; // Does not exist rvfi_csr_tdata_wdata[0] = 'Z; // Does not exist @@ -945,36 +1019,134 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; endfunction // set_rvfi - function void minstret_to_id(); - trace_id.m_csr.minstret_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]; - trace_id.m_csr.minstret_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2]; - trace_id.m_csr.minstret_rmask = '1; - trace_id.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q; - trace_id.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; + function void sample_perf_counter_to_id(int idx); + trace_id.m_csr.mhpmcounter_rdata[idx][31:0] = r_pipe_freeze_trace.csr.mhpmcounter_q[idx][31:0]; + trace_id.m_csr.mhpmcounter_rmask[idx][31:0] = '1; + endfunction + + function void perf_counter_to_id(int idx); + if(!trace_id.m_csr.mhpmcounter_we[idx][0]) begin + trace_id.m_csr.mhpmcounter_wdata[idx][31:0] = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]) begin + trace_id.m_csr.mhpmcounter_we[idx][0] = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]; + trace_id.m_csr.mhpmcounter_wdata[idx][31:0] = r_pipe_freeze_trace.csr.wdata_int; + trace_id.m_csr.mhpmcounter_wmask[idx][31:0] = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx] ? '1 : '0; + end + sample_perf_counter_to_id(idx); + endfunction + + function void sample_perf_event_to_trace(int idx, insn_trace_t m_trace); + m_trace.m_csr.mhpmevent_rdata[idx] = r_pipe_freeze_trace.csr.mhpmevent_q[idx]; + m_trace.m_csr.mhpmevent_rmask[idx] = '1; endfunction - function void minstret_to_ex(); - trace_ex.m_csr.minstret_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]; - trace_ex.m_csr.minstret_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2]; - trace_ex.m_csr.minstret_rmask = '1; - trace_ex.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q; - trace_ex.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; + function void perf_event_to_trace(int idx, insn_trace_t m_trace); + if(!m_trace.m_csr.mhpmevent_we[idx]) begin + m_trace.m_csr.mhpmevent_wdata[idx] = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmevent_we[idx]) begin + m_trace.m_csr.mhpmevent_we[idx] = r_pipe_freeze_trace.csr.mhpmevent_we[idx]; + m_trace.m_csr.mhpmevent_wdata[idx] = r_pipe_freeze_trace.csr.wdata_int; + m_trace.m_csr.mhpmevent_wmask[idx] = r_pipe_freeze_trace.csr.mhpmevent_we[idx] ? '1 : '0; + end + sample_perf_event_to_trace(idx, m_trace); endfunction - function void tinfo_to_id(); - trace_id.m_csr.tinfo_we = '0; // READ ONLY csr_tinfo_we_i; - trace_id.m_csr.tinfo_rdata = r_pipe_freeze_trace.csr.tinfo_q; - trace_id.m_csr.tinfo_rmask = '1; - trace_id.m_csr.tinfo_wdata = r_pipe_freeze_trace.csr.tinfo_n; - trace_id.m_csr.tinfo_wmask = '0; + function void sample_minstret_to_trace(insn_trace_t m_trace); + m_trace.m_csr.minstret_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2][31:0]; + m_trace.m_csr.minstret_rmask = '1; endfunction - function void tinfo_to_ex(); - trace_ex.m_csr.tinfo_we = '0; // READ ONLY csr_tinfo_we_i; - trace_ex.m_csr.tinfo_rdata = r_pipe_freeze_trace.csr.tinfo_q; - trace_ex.m_csr.tinfo_rmask = '1; - trace_ex.m_csr.tinfo_wdata = r_pipe_freeze_trace.csr.tinfo_n; - trace_ex.m_csr.tinfo_wmask = '0; + function void minstret_to_trace(insn_trace_t m_trace); + if(!m_trace.m_csr.minstret_we) begin + m_trace.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]) begin + m_trace.m_csr.minstret_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]; + m_trace.m_csr.minstret_wdata = r_pipe_freeze_trace.csr.wdata_int; + m_trace.m_csr.minstret_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2] ? '1 : '0; + end + sample_minstret_to_trace(m_trace); + endfunction + + function void sample_perf_counter_h_to_id(int idx); + trace_id.m_csr.mhpmcounter_rdata[idx][63:32] = r_pipe_freeze_trace.csr.mhpmcounter_q[idx][63:0]; + trace_id.m_csr.mhpmcounter_rmask[idx][63:32] = '1; + endfunction + + function void perf_counter_h_to_id(int idx); + if(!trace_id.m_csr.mhpmcounter_we[idx][1]) begin + trace_id.m_csr.mhpmcounter_wdata[idx][63:32] = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]) begin + trace_id.m_csr.mhpmcounter_we[idx][1] = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]; + trace_id.m_csr.mhpmcounter_wdata[idx][63:32] = r_pipe_freeze_trace.csr.wdata_int; + trace_id.m_csr.mhpmcounter_wmask[idx][63:32] = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx] ? '1 : '0; + end + sample_perf_counter_h_to_id(idx); + endfunction + + function void sample_minstreth_to_trace(insn_trace_t m_trace); + m_trace.m_csr.minstreth_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[2][63:32]; + m_trace.m_csr.minstreth_rmask = '1; + endfunction + + function void sample_mcycle_to_trace(insn_trace_t m_trace); + m_trace.m_csr.mcycle_we = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[0]; + m_trace.m_csr.mcycle_rdata = r_pipe_freeze_trace.csr.mhpmcounter_q[0][31:0]; + m_trace.m_csr.mcycle_rmask = '1; + m_trace.m_csr.mcycle_wdata = r_pipe_freeze_trace.csr.mhpmcounter_q[31:0]; + m_trace.m_csr.mcycle_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_lower[0] ? '1 : '0; + endfunction + + function void minstreth_to_trace(insn_trace_t m_trace); + if(!m_trace.m_csr.minstreth_we) begin + m_trace.m_csr.minstreth_wdata = r_pipe_freeze_trace.csr.wdata_int; + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_upper[2]) begin + m_trace.m_csr.minstreth_we = r_pipe_freeze_trace.csr.mhpmcounter_write_upper[2]; + m_trace.m_csr.minstreth_wdata = r_pipe_freeze_trace.csr.wdata_int; + m_trace.m_csr.minstreth_wmask = r_pipe_freeze_trace.csr.mhpmcounter_write_upper[2] ? '1 : '0; + end + sample_minstreth_to_trace(m_trace); + endfunction + + function void sample_perf_counter_to_trace(insn_trace_t m_trace); + sample_minstret_to_trace(m_trace); + sample_minstreth_to_trace(m_trace); + sample_mcycle_to_trace(m_trace); + for(int idx=3; idx<32; idx++)begin + sample_perf_event_to_trace(idx, m_trace); //TO CHANGE + end + endfunction + + function void perf_counter_to_trace(insn_trace_t m_trace); + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[2]) begin + minstret_to_trace(m_trace); + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_upper[2]) begin + minstreth_to_trace(m_trace); + end + for(int idx=3; idx<32; idx++) begin + if(r_pipe_freeze_trace.csr.mhpmcounter_write_lower[idx]) begin + perf_counter_to_id(idx); + end + if(r_pipe_freeze_trace.csr.mhpmcounter_write_upper[idx]) begin + perf_counter_h_to_id(idx); + end + if(r_pipe_freeze_trace.csr.mhpmevent_we[idx]) begin + perf_event_to_trace(idx, m_trace); + end + end + endfunction + + function void tinfo_to_trace(insn_trace_t m_trace); + m_trace.m_csr.tinfo_we = '0; // READ ONLY csr_tinfo_we_i; + m_trace.m_csr.tinfo_rdata = r_pipe_freeze_trace.csr.tinfo_q; + m_trace.m_csr.tinfo_rmask = '1; + m_trace.m_csr.tinfo_wdata = r_pipe_freeze_trace.csr.tinfo_n; + m_trace.m_csr.tinfo_wmask = '0; endfunction function void mtvec_to_id(); @@ -1069,8 +1241,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; lpcount1_to_id(); lpend1_to_id(); lpstart1_to_id(); - - endfunction bit s_was_flush; //debug exception is flagged as trap only if preceed by a flush @@ -1111,6 +1281,9 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; * The third updates the rvfi interface */ `define CSR_FROM_PIPE(TRACE_NAME, CSR_NAME) \ + if(!trace_``TRACE_NAME``.m_csr.``CSR_NAME``_we) begin \ + trace_``TRACE_NAME``.m_csr.``CSR_NAME``_wdata = r_pipe_freeze_trace.csr.``CSR_NAME``_n; \ + end\ if (r_pipe_freeze_trace.csr.``CSR_NAME``_we) begin \ trace_``TRACE_NAME``.m_csr.``CSR_NAME``_we = r_pipe_freeze_trace.csr.``CSR_NAME``_we; \ trace_``TRACE_NAME``.m_csr.``CSR_NAME``_wdata = r_pipe_freeze_trace.csr.``CSR_NAME``_n; \ @@ -1120,9 +1293,14 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_``TRACE_NAME``.m_csr.``CSR_NAME``_rmask = '1; event e_mstatus_to_id; + event e_fregs_dirty_1, e_fregs_dirty_2, e_fregs_dirty_3; function void mstatus_to_id(); `CSR_FROM_PIPE(id, mstatus) `CSR_FROM_PIPE(id, mstatus_fs) + if(r_pipe_freeze_trace.csr.fregs_we && !r_pipe_freeze_trace.csr.mstatus_fs_we && !(r_pipe_freeze_trace.csr.we && r_pipe_freeze_trace.csr.mstatus_fs_we)) begin //writes happening in ex that needs to be reported to id + trace_id.m_csr.mstatus_fs_rdata = r_pipe_freeze_trace.csr.mstatus_fs_n; + ->e_fregs_dirty_2; + end ->e_mstatus_to_id; endfunction //those event are for debug purpose @@ -1133,10 +1311,11 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; e_dev_commit_rf_to_ex_3, e_dev_commit_rf_to_ex_4, e_dev_commit_rf_to_ex_5; - event e_if_2_id_1, e_if_2_id_2; + event e_if_2_id_1, e_if_2_id_2, e_if_2_id_3, e_if_2_id_4; event e_ex_to_wb_1, e_ex_to_wb_2; event e_id_to_ex_1, e_id_to_ex_2; event e_commit_dpc; + event e_csr_in_ex, e_csr_irq; event e_send_rvfi_trace_apu_resp; event @@ -1160,12 +1339,17 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(apu_resp, fcsr) `CSR_FROM_PIPE(apu_resp, fflags) - // `CSR_FROM_PIPE(apu_resp, mstatus) `CSR_FROM_PIPE(apu_resp, mstatus_fs) - if (r_pipe_freeze_trace.csr.mstatus_we) begin + if (r_pipe_freeze_trace.csr.mstatus_fs_we && (trace_id.m_order > trace_apu_resp.m_order)) begin + trace_id.m_csr.mstatus_fs_rdata = r_pipe_freeze_trace.csr.mstatus_fs_n; + end + if (r_pipe_freeze_trace.csr.mstatus_fs_we && (trace_ex.m_order > trace_apu_resp.m_order)) begin trace_ex.m_csr.mstatus_fs_rdata = r_pipe_freeze_trace.csr.mstatus_fs_n; end + if (r_pipe_freeze_trace.csr.mstatus_fs_we && (trace_wb.m_order > trace_apu_resp.m_order)) begin + trace_wb.m_csr.mstatus_fs_rdata = r_pipe_freeze_trace.csr.mstatus_fs_n; + end endfunction function void csr_to_apu_req(); @@ -1229,6 +1413,9 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end end csr_to_apu_resp(); + + trace_apu_resp.m_stop_cycle = cycles; + trace_apu_resp.m_stop_time = $time; send_rvfi(trace_apu_resp); ->e_send_rvfi_trace_apu_resp; end @@ -1243,27 +1430,48 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_is_irq_start; bit s_id_done; function void if_to_id(); + if (trace_id.m_valid) begin + `CSR_FROM_PIPE(id, misa) + `CSR_FROM_PIPE(id, tdata1) + `CSR_FROM_PIPE(id, tdata2) + tinfo_to_trace(trace_id); + `CSR_FROM_PIPE(id, mip) + send_rvfi(trace_id); + end trace_id.init(trace_if); trace_id.m_trap = ~r_pipe_freeze_trace.minstret; - trace_id.m_is_illegal = r_pipe_freeze_trace.is_illegal; + trace_id.m_is_illegal = trace_id.m_is_illegal | r_pipe_freeze_trace.is_illegal; + `CSR_FROM_PIPE(id, dpc) s_is_pc_set = 1'b0; s_is_irq_start = 1'b0; trace_if.m_valid = 1'b0; s_id_done = 1'b0; - `CSR_FROM_PIPE(id, dpc) endfunction function logic [31:0] be_to_mask(logic [3:0] be); logic [31:0] mask; - mask[7:0] = be[0] ? 8'hFF : 8'h00; - mask[15:8] = be[0] ? 8'hFF : 8'h00; - mask[23:16] = be[0] ? 8'hFF : 8'h00; - mask[31:24] = be[0] ? 8'hFF : 8'h00; + mask[7:0] = (be[0] == 1'b1) ? 8'hFF : 8'h00; + mask[15:8] = (be[1] == 1'b1) ? 8'hFF : 8'h00; + mask[23:16] = (be[2] == 1'b1) ? 8'hFF : 8'h00; + mask[31:24] = (be[3] == 1'b1) ? 8'hFF : 8'h00; be_to_mask = mask; return mask; endfunction + function void commit_rf_to_trace(insn_trace_t m_trace); + if (m_trace.m_got_ex_reg) begin + m_trace.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; + m_trace.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; + m_trace.m_2_rd_insn = 1'b1; + m_trace.m_got_first_data = 1'b1; + end else begin + m_trace.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; + m_trace.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; + m_trace.m_got_first_data = 1'b1; + end + endfunction + task compute_pipeline(); bit s_new_valid_insn; bit s_ex_valid_adjusted; @@ -1280,34 +1488,44 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; bit s_core_is_decoding; // For readability, ctrl_fsm is DECODE or DECODE_HWLOOP - trace_if = new(); - trace_id = new(); - trace_ex = new(); - trace_wb = new(); - s_new_valid_insn = 1'b0; - s_ex_valid_adjusted = 1'b0; + bit s_ex_reg_we_adjusted; //ex_reg_we + bit s_rf_we_wb_adjusted; // + + bit s_dont_override_mstatus_fs_id; + + trace_if = new(); + trace_id = new(); + trace_ex = new(); + trace_wb = new(); + s_new_valid_insn = 1'b0; + s_ex_valid_adjusted = 1'b0; + + s_id_done = 1'b0; + s_apu_wb_ok = 1'b0; + s_apu_0_cycle_reps = 1'b0; + + next_send = 1; + cnt_data_req = 0; + cnt_data_resp = 0; + cnt_apu_req = 0; + cnt_apu_resp = 0; + csr_is_irq = '0; + is_dbg_taken = '0; + s_was_flush = 1'b0; - s_id_done = 1'b0; - s_apu_wb_ok = 1'b0; - s_apu_0_cycle_reps = 1'b0; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; - next_send = 1; - cnt_data_req = 0; - cnt_data_resp = 0; - cnt_apu_req = 0; - cnt_apu_resp = 0; - csr_is_irq = '0; - is_dbg_taken = '0; - s_was_flush = 1'b0; + s_is_pc_set = 1'b0; + s_is_irq_start = 1'b0; + s_skip_wb = 1'b0; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; + s_core_is_decoding = 1'b0; - s_is_pc_set = 1'b0; - s_is_irq_start = 1'b0; - s_skip_wb = 1'b0; + s_ex_reg_we_adjusted = 1'b0; + s_rf_we_wb_adjusted = 1'b0; - s_core_is_decoding = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; forever begin wait(e_pipe_monitor_ok.triggered); // event triggered @@ -1325,23 +1543,11 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end if (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_ID && r_pipe_freeze_trace.ebrk_insn_dec) begin - if (trace_wb.m_valid) begin - send_rvfi(trace_wb); - trace_wb.m_valid = 1'b0; - ->e_send_rvfi_trace_wb_1; - end - if (trace_ex.m_valid) begin - send_rvfi(trace_ex); - trace_ex.m_valid = 1'b0; - ->e_send_rvfi_trace_ex_1; - end if (trace_id.m_valid) begin - - minstret_to_id(); `CSR_FROM_PIPE(id, misa) `CSR_FROM_PIPE(id, tdata1) `CSR_FROM_PIPE(id, tdata2) - tinfo_to_id(); + tinfo_to_trace(trace_id); `CSR_FROM_PIPE(id, mip) end end @@ -1375,7 +1581,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (trace_ex.m_valid & s_wb_valid_adjusted) begin // Used flopped values in case write happened before wb_valid - minstret_to_ex(); + sample_perf_counter_to_trace(trace_ex); trace_ex.m_csr.got_minstret = '1; end @@ -1387,7 +1593,9 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_new_valid_insn = r_pipe_freeze_trace.id_valid && r_pipe_freeze_trace.is_decoding;// && !r_pipe_freeze_trace.apu_rvalid; - s_wb_valid_adjusted = r_pipe_freeze_trace.wb_valid && (s_core_is_decoding || (r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_EX));// && !r_pipe_freeze_trace.apu_rvalid;; + s_wb_valid_adjusted = r_pipe_freeze_trace.wb_valid && (s_core_is_decoding || (r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_EX) || (r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_WB) || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_FLUSH) || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_ID) || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF));// && !r_pipe_freeze_trace.apu_rvalid;; + s_ex_reg_we_adjusted = r_pipe_freeze_trace.ex_reg_we && r_pipe_freeze_trace.mult_ready && r_pipe_freeze_trace.alu_ready && r_pipe_freeze_trace.lsu_ready_ex && !s_apu_to_alu_port; + s_rf_we_wb_adjusted = r_pipe_freeze_trace.rf_we_wb && (~r_pipe_freeze_trace.data_misaligned_ex && r_pipe_freeze_trace.wb_ready) && (!s_apu_to_lsu_port || r_pipe_freeze_trace.wb_contention_lsu); s_fflags_we_non_apu = 1'b0; if (r_pipe_freeze_trace.csr.fflags_we) begin @@ -1418,60 +1626,55 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; s_skip_wb = 1'b1; end end - if (trace_wb.m_valid && !s_skip_wb) begin - if (r_pipe_freeze_trace.rf_we_wb) begin - if((trace_wb.m_rd_addr[0] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[0]) && trace_wb.m_mem_req_id_valid[0]) begin - trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; - trace_wb.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - trace_wb.m_mem_req_id_valid[0] = 1'b0; - end else if (trace_wb.m_2_rd_insn && (trace_wb.m_rd_addr[1] == r_pipe_freeze_trace.rf_addr_wb) && (cnt_data_resp == trace_wb.m_mem_req_id[1]) && trace_wb.m_mem_req_id_valid[1]) begin - trace_wb.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; - trace_wb.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; - trace_wb.m_mem_req_id_valid[1] = 1'b0; - end - end - if (!trace_wb.m_data_missaligned) begin - send_rvfi(trace_wb); - ->e_dev_send_wb_1; ->e_send_rvfi_trace_wb_2; - trace_wb.m_valid = 1'b0; + if (trace_wb.m_valid && !s_skip_wb && s_rf_we_wb_adjusted) begin + if (trace_wb.m_2_rd_insn) begin + trace_wb.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; + trace_wb.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; + end else if (trace_wb.m_ex_fw) begin + trace_wb.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; + trace_wb.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; + trace_wb.m_2_rd_insn = 1'b1; end else begin - if (s_wb_valid_adjusted) begin - if (r_pipe_freeze_trace.rf_we_wb) begin - if (!trace_wb.m_ex_fw) begin - trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; - trace_wb.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - end - if (trace_wb.m_data_missaligned && !trace_wb.m_got_first_data) begin - trace_wb.m_got_first_data = 1'b1; - end else begin - send_rvfi(trace_wb); - ->e_dev_send_wb_2; ->e_send_rvfi_trace_wb_3; - trace_wb.m_valid = 1'b0; - end - end // rf_we_wb + trace_wb.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; + trace_wb.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; + end + + if (r_pipe_freeze_trace.csr.fregs_we) begin + `CSR_FROM_PIPE(wb, mstatus_fs) + trace_wb.m_csr.mstatus_fs_we = 1'b1; + trace_wb.m_csr.mstatus_fs_wmask = '1; + if(r_pipe_freeze_trace.csr.we && r_pipe_freeze_trace.csr.mstatus_fs_we) begin //In this specific case, two writes to mstatus_fs happen at the same time. We need to recreate the writes caused by fregs_we + trace_wb.m_csr.mstatus_fs_wdata = FS_DIRTY; end + ->e_fregs_dirty_1; end + + send_rvfi(trace_wb); + ->e_dev_send_wb_1; ->e_send_rvfi_trace_wb_2; + trace_wb.m_valid = 1'b0; + end if (trace_ex.m_valid) begin - - if (!trace_ex.m_csr.got_minstret) begin - minstret_to_ex(); + if(trace_ex.m_instret_smaple_trigger == 1) begin //time to sample instret + sample_perf_counter_to_trace(trace_ex); end + trace_ex.m_instret_smaple_trigger = trace_ex.m_instret_smaple_trigger + 1; + `CSR_FROM_PIPE(ex, misa) `CSR_FROM_PIPE(ex, tdata1) `CSR_FROM_PIPE(ex, tdata2) - tinfo_to_ex(); + tinfo_to_trace(trace_ex); - if (r_pipe_freeze_trace.regfile_we_lsu) begin + if (s_rf_we_wb_adjusted) begin ->e_dev_commit_rf_to_ex_4; - if ((cnt_data_resp == trace_ex.m_mem_req_id[0]) && !(trace_ex.m_got_ex_reg) && trace_ex.m_mem_req_id_valid[0]) begin + if (!(trace_ex.m_got_ex_reg) && trace_ex.m_mem_req_id_valid[0]) begin trace_ex.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; trace_ex.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; trace_ex.m_got_first_data = 1'b1; trace_ex.m_mem_req_id_valid[0] = 1'b0; - end else if ((cnt_data_resp == trace_ex.m_mem_req_id[1]) && trace_ex.m_mem_req_id_valid[1]) begin + end else if (trace_ex.m_mem_req_id_valid[1]) begin trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; trace_ex.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; trace_ex.m_got_first_data = 1'b1; @@ -1485,66 +1688,86 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_ex.m_valid = 1'b0; ->e_send_rvfi_trace_ex_2; end else begin - if (r_pipe_freeze_trace.rf_we_wb && !s_apu_to_lsu_port) begin + + if (s_rf_we_wb_adjusted) begin ->e_dev_commit_rf_to_ex_1; - if (trace_ex.m_got_ex_reg) begin - trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; - trace_ex.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_2_rd_insn = 1'b1; - trace_ex.m_got_first_data = 1'b1; - end else begin - trace_ex.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; - trace_ex.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_got_first_data = 1'b1; + commit_rf_to_trace(trace_ex); + + if (r_pipe_freeze_trace.csr.fregs_we && (r_pipe_freeze_trace.rf_we_wb && r_pipe_freeze_trace.rf_addr_wb[5])) begin //Catching mstatus_fs updates caused by flw + `CSR_FROM_PIPE(ex, mstatus_fs) + trace_ex.m_csr.mstatus_fs_we = 1'b1; + trace_ex.m_csr.mstatus_fs_wmask = '1; + if(r_pipe_freeze_trace.csr.we && r_pipe_freeze_trace.csr.mstatus_fs_we) begin //In this specific case, two writes to mstatus_fs happen at the same time. We need to recreate the writes caused by fregs_we + trace_ex.m_csr.mstatus_fs_wdata = FS_DIRTY; + end else begin + trace_id.m_csr.mstatus_fs_rdata = trace_ex.m_csr.mstatus_fs_wdata; + s_dont_override_mstatus_fs_id = 1'b1; + end + ->e_fregs_dirty_3; end - end - if (!s_ex_valid_adjusted & !trace_ex.m_csr.got_minstret) begin - minstret_to_ex(); - end - if (trace_ex.m_is_load) begin // only move relevant instr in wb stage - ->e_ex_to_wb_1; - trace_wb.move_down_pipe(trace_ex); + send_rvfi(trace_ex); + trace_ex.m_valid = 1'b0; + end else begin - if (!trace_ex.m_csr.got_minstret) begin - minstret_to_ex(); + if (trace_ex.m_is_load) begin // only move relevant instr in wb stage + ->e_ex_to_wb_1; + trace_wb.move_down_pipe(trace_ex); + end else begin + send_rvfi(trace_ex); + ->e_send_rvfi_trace_ex_6; end - send_rvfi(trace_ex); - ->e_send_rvfi_trace_ex_6; + trace_ex.m_valid = 1'b0; end - trace_ex.m_valid = 1'b0; end - end else if (r_pipe_freeze_trace.rf_we_wb && !s_apu_to_lsu_port && !s_was_flush) begin + end else if (s_rf_we_wb_adjusted && !s_was_flush) begin ->e_dev_commit_rf_to_ex_2; - if (trace_ex.m_got_ex_reg) begin - trace_ex.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; - trace_ex.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_2_rd_insn = 1'b1; - trace_ex.m_got_first_data = 1'b1; - end else begin - trace_ex.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; - trace_ex.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - trace_ex.m_got_first_data = 1'b1; - end + commit_rf_to_trace(trace_ex); end end - s_ex_valid_adjusted = (r_pipe_freeze_trace.ex_valid && r_pipe_freeze_trace.ex_ready) && (s_core_is_decoding || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF)) && (!r_pipe_freeze_trace.apu_rvalid || r_pipe_freeze_trace.data_req_ex); + // If mret, we need to keep the instruction in Id during flush_ex because mstatus update happens at that time + s_ex_valid_adjusted = (r_pipe_freeze_trace.ex_valid && r_pipe_freeze_trace.ex_ready) && (s_core_is_decoding || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_IF) || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_TAKEN_ID) || (r_pipe_freeze_trace.ctrl_fsm_cs == DBG_FLUSH) || ((r_pipe_freeze_trace.ctrl_fsm_cs == FLUSH_EX) && !r_pipe_freeze_trace.mret_insn_dec)); //EX_STAGE - if (trace_id.m_valid) begin - mtvec_to_id(); - `CSR_FROM_PIPE(id, mip) - `CSR_FROM_PIPE(id, misa) + if (trace_id.m_valid) begin + if(trace_id.m_instret_smaple_trigger == 1) begin //time to sample instret + sample_perf_counter_to_trace(trace_id); + for(int idx=3; idx<32; idx++) begin + sample_perf_counter_to_id(idx); + sample_perf_counter_h_to_id(idx); + sample_perf_event_to_trace(idx, trace_id); + end + end + trace_id.m_instret_smaple_trigger = trace_id.m_instret_smaple_trigger + 1; - if (!csr_is_irq && !s_is_irq_start) begin - mstatus_to_id(); + if(trace_id.m_sample_csr_write_in_ex && !csr_is_irq && !s_is_irq_start) begin //First cycle after id_ready, csr write is asserted in this cycle + `CSR_FROM_PIPE(id, mstatus) + if(!s_dont_override_mstatus_fs_id) begin + `CSR_FROM_PIPE(id, mstatus_fs) + end `CSR_FROM_PIPE(id, mepc) - if (trace_id.m_csr.mcause_we == '0) begin //for debug purpose - `CSR_FROM_PIPE(id, mcause) + `CSR_FROM_PIPE(id, mcause) + `CSR_FROM_PIPE(id, dscratch0) + `CSR_FROM_PIPE(id, dscratch1) + if(r_pipe_freeze_trace.csr.we && (r_pipe_freeze_trace.csr.addr == CSR_DPC)) begin + `CSR_FROM_PIPE(id, dpc) end + + `CSR_FROM_PIPE(id, mcountinhibit) + + perf_counter_to_trace(trace_id); + ->e_csr_in_ex; end + if(r_pipe_freeze_trace.is_decoding) begin + trace_id.m_sample_csr_write_in_ex = 1'b0; + end + mtvec_to_id(); + + `CSR_FROM_PIPE(id, mip) + `CSR_FROM_PIPE(id, misa) + `CSR_FROM_PIPE(id, mcountinhibit) `CSR_FROM_PIPE(id, mscratch) `CSR_FROM_PIPE(id, mie) @@ -1553,10 +1776,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; `CSR_FROM_PIPE(id, frm) `CSR_FROM_PIPE(id, fcsr) - if (r_pipe_freeze_trace.csr.we) begin - `CSR_FROM_PIPE(id, dpc) - end - if (r_pipe_freeze_trace.csr.dcsr_we) begin dcsr_to_id(); end @@ -1577,6 +1796,15 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_ex.m_csr.frm_wmask = '0; trace_ex.m_csr.fcsr_wmask = '0; + if(r_pipe_freeze_trace.ctrl_fsm_cs == XRET_JUMP) begin //xret exit pipeline + tinfo_to_trace(trace_id); + `CSR_FROM_PIPE(id, tdata1) + `CSR_FROM_PIPE(id, tdata2) + send_rvfi(trace_id); + trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; + end + if (r_pipe_freeze_trace.apu_req && r_pipe_freeze_trace.apu_gnt) begin trace_id.m_is_apu = 1'b1; trace_id.m_apu_req_id = cnt_apu_req; @@ -1586,6 +1814,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_apu_req.set_to_apu(); apu_trace_q.push_back(trace_apu_req); trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; if(r_pipe_freeze_trace.apu_rvalid && (cnt_apu_req == cnt_apu_resp)) begin//APU return in the same cycle apu_resp(); @@ -1603,10 +1832,10 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.m_rd_addr[0] = r_pipe_freeze_trace.ex_reg_addr; trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.ex_reg_wdata; trace_id.m_got_ex_reg = 1'b1; - end else if (!trace_ex.m_valid & r_pipe_freeze_trace.rf_we_wb & !trace_id.m_ex_fw) begin + end else if (!trace_ex.m_valid & s_rf_we_wb_adjusted & !trace_id.m_ex_fw) begin trace_id.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; - end else if (r_pipe_freeze_trace.rf_we_wb) begin + end else if (s_rf_we_wb_adjusted) begin trace_id.m_rd_addr[1] = r_pipe_freeze_trace.rf_addr_wb; trace_id.m_rd_wdata[1] = r_pipe_freeze_trace.rf_wdata_wb; trace_id.m_2_rd_insn = 1'b1; @@ -1621,19 +1850,16 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.m_mem.addr = r_pipe_freeze_trace.data_addr_pmp; if (r_pipe_freeze_trace.data_misaligned) begin cnt_data_req = cnt_data_req + 1; + trace_id.m_mem_req_id[0] = cnt_data_req; end + if (!r_pipe_freeze_trace.data_we_ex) begin trace_id.m_is_load = 1'b1; trace_id.m_mem.wmask = be_to_mask(r_pipe_freeze_trace.lsu_data_be); //'1; - if (r_pipe_freeze_trace.data_misaligned) begin - trace_id.m_data_missaligned = 1'b1; - trace_id.m_mem_req_id[1] = trace_id.m_mem_req_id[0]; - trace_id.m_mem_req_id[0] = cnt_data_req; - trace_id.m_mem_req_id_valid[1] = 1'b1; - end end else begin trace_id.m_mem.rmask = be_to_mask(r_pipe_freeze_trace.lsu_data_be); //'1; end + if (trace_id.m_got_ex_reg) begin // Shift index 0 to 1 trace_id.m_mem_req_id[1] = trace_id.m_mem_req_id[0]; trace_id.m_mem_req_id[0] = 0; @@ -1644,6 +1870,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); // The instruction moves forward from ID to EX trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; ->e_id_to_ex_1; end else if (r_pipe_freeze_trace.ex_reg_we && r_pipe_freeze_trace.rf_alu_we_ex) begin @@ -1667,9 +1894,6 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; if (s_new_valid_insn) begin // There is a new valid instruction if (trace_id.m_valid) begin if (trace_ex.m_valid) begin - if (!trace_ex.m_csr.got_minstret) begin - minstret_to_ex(); - end if (trace_wb.m_valid) begin send_rvfi(trace_ex); ->e_send_rvfi_trace_ex_4; @@ -1692,15 +1916,10 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.m_mem.addr = r_pipe_freeze_trace.data_addr_pmp; if (r_pipe_freeze_trace.data_misaligned) begin cnt_data_req = cnt_data_req + 1; + trace_id.m_mem_req_id[0] = cnt_data_req; end if (!r_pipe_freeze_trace.data_we_ex) begin trace_id.m_is_load = 1'b1; - if (r_pipe_freeze_trace.data_misaligned) begin - trace_id.m_data_missaligned = 1'b1; - trace_id.m_mem_req_id[1] = trace_id.m_mem_req_id[0]; - trace_id.m_mem_req_id[0] = cnt_data_req; - trace_id.m_mem_req_id_valid[1] = 1'b1; - end end if (trace_id.m_got_ex_reg) begin // Shift index 0 to 1 trace_id.m_mem_req_id[1] = trace_id.m_mem_req_id[0]; @@ -1708,7 +1927,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; trace_id.m_mem_req_id_valid[0] = 1'b0; trace_id.m_mem_req_id_valid[1] = 1'b1; end - end else if (r_pipe_freeze_trace.rf_we_wb && !r_pipe_freeze_trace.ex_reg_we) begin + end else if (s_rf_we_wb_adjusted && !r_pipe_freeze_trace.ex_reg_we) begin trace_id.m_rd_addr[0] = r_pipe_freeze_trace.rf_addr_wb; trace_id.m_rd_wdata[0] = r_pipe_freeze_trace.rf_wdata_wb; end @@ -1716,6 +1935,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; hwloop_to_id(); trace_ex.move_down_pipe(trace_id); trace_id.m_valid = 1'b0; + s_dont_override_mstatus_fs_id = 1'b0; ->e_id_to_ex_2; end if_to_id(); @@ -1733,13 +1953,28 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; end //IF_STAGE - if (r_pipe_freeze_trace.if_valid && r_pipe_freeze_trace.if_ready) begin - if(trace_if.m_valid && r_pipe_freeze_trace.id_valid && r_pipe_freeze_trace.id_ready && !trace_id.m_valid && r_pipe_freeze_trace.ebrk_insn_dec) begin - if_to_id(); - trace_id.m_is_ebreak = '1; //trace_if.m_is_ebreak; - ->e_if_2_id_2; + if(trace_if.m_valid) begin + if(r_pipe_freeze_trace.is_illegal && r_pipe_freeze_trace.is_decoding) begin + trace_if.m_is_illegal = 1'b1; + end + end + + if (r_pipe_freeze_trace.if_valid && r_pipe_freeze_trace.if_ready && r_pipe_freeze_trace.instr_valid_if) begin + if (trace_if.m_valid) begin + if (r_pipe_freeze_trace.id_valid && r_pipe_freeze_trace.id_ready && !trace_id.m_valid && r_pipe_freeze_trace.ebrk_insn_dec) begin + if_to_id(); + trace_id.m_is_ebreak = '1; //trace_if.m_is_ebreak; + ->e_if_2_id_2; + end else if (trace_if.m_is_illegal) begin + if_to_id(); + ->e_if_2_id_3; + end else if (r_pipe_freeze_trace.ecall_insn_dec) begin + if_to_id(); + ->e_if_2_id_4; + end end + trace_if.m_insn = r_pipe_freeze_trace.instr_if; //Instr comes from if, buffer for one cycle trace_if.m_pc_rdata = r_pipe_freeze_trace.pc_if; trace_if.m_dbg_taken = is_dbg_taken; @@ -1754,6 +1989,7 @@ insn_trace_t trace_if, trace_id, trace_ex, trace_ex_next, trace_wb; mstatus_to_id(); `CSR_FROM_PIPE(id, mepc) `CSR_FROM_PIPE(id, mcause) + ->e_csr_irq; end if (!s_id_done && r_pipe_freeze_trace.is_decoding) begin diff --git a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_rvfi_trace.sv b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_rvfi_trace.sv index 417f562a2..adf851759 100644 --- a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_rvfi_trace.sv +++ b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_rvfi_trace.sv @@ -1,26 +1,31 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// CV32E40P RVFI interface -// -// Contributors: Halfdan Bechmann, Silicon Labs -// Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Halfdan Bechmann, Silicon Labs // +// Yoann Pruvost, Dolphin Design // +// // +// Description: CV32E40P RVFI tracer // +// // +//////////////////////////////////////////////////////////////////////////////////// module cv32e40p_rvfi_trace import cv32e40p_pkg::*; + import cv32e40p_rvfi_pkg::*; #( parameter FPU = 0, parameter ZFINX = 0 @@ -32,9 +37,14 @@ module cv32e40p_rvfi_trace input logic [31:0] imm_s3_type, - input logic rvfi_valid, - input logic [31:0] rvfi_insn, - input logic [31:0] rvfi_pc_rdata, + input logic rvfi_valid, + input logic [31:0] rvfi_insn, + input integer rvfi_start_cycle, + input time rvfi_start_time, + input integer rvfi_stop_cycle, + input time rvfi_stop_time, + input logic [31:0] rvfi_pc_rdata, + input rvfi_trap_t rvfi_trap, input logic [ 4:0] rvfi_rd_addr [1:0], input logic [31:0] rvfi_rd_wdata[1:0], @@ -42,6 +52,7 @@ module cv32e40p_rvfi_trace input logic rvfi_frd_wvalid[1:0], input logic [ 4:0] rvfi_frd_addr [1:0], input logic [31:0] rvfi_frd_wdata [1:0], + input logic rvfi_2_rd, input logic [ 4:0] rvfi_rs1_addr, input logic [ 4:0] rvfi_rs2_addr, @@ -61,8 +72,8 @@ module cv32e40p_rvfi_trace input logic [31:0] rvfi_frs3_rdata, input logic [31:0] rvfi_mem_addr, - input logic [ 3:0] rvfi_mem_rmask, - input logic [ 3:0] rvfi_mem_wmask, + input logic [31:0] rvfi_mem_rmask, + input logic [31:0] rvfi_mem_wmask, input logic [31:0] rvfi_mem_rdata, input logic [31:0] rvfi_mem_wdata ); @@ -74,7 +85,7 @@ module cv32e40p_rvfi_trace integer f; //file pointer string fn; - integer cycles; + // integer cycles; string info_tag; logic is_compressed; @@ -125,7 +136,13 @@ module cv32e40p_rvfi_trace rs3_value = rvfi_rs3_rdata; end - if (rvfi_frd_wvalid[0]) begin + if (rvfi_2_rd) begin + if (rvfi_frd_wvalid[1]) begin + rd = {1'b1, rvfi_frd_addr[1]}; + end else begin + rd = {1'b0, rvfi_rd_addr[1]}; + end + end else if (rvfi_frd_wvalid[0]) begin rd = {1'b1, rvfi_frd_addr[0]}; end else begin rd = {1'b0, rvfi_rd_addr[0]}; @@ -134,57 +151,69 @@ module cv32e40p_rvfi_trace assign rs4 = rs3; - assign imm_i_type = {{20{rvfi_insn[31]}}, rvfi_insn[31:20]}; - assign imm_iz_type = {20'b0, rvfi_insn[31:20]}; - assign imm_s_type = {{20{rvfi_insn[31]}}, rvfi_insn[31:25], rvfi_insn[11:7]}; + cv32e40p_compressed_decoder #( + .FPU(FPU) + ) rvfi_trace_decompress_i ( + .instr_i(rvfi_insn), + .instr_o(decomp_insn), + .is_compressed_o(is_compressed) + ); + + assign imm_i_type = {{20{decomp_insn[31]}}, decomp_insn[31:20]}; + assign imm_iz_type = {20'b0, decomp_insn[31:20]}; + assign imm_s_type = {{20{decomp_insn[31]}}, decomp_insn[31:25], decomp_insn[11:7]}; assign imm_sb_type = { - {19{rvfi_insn[31]}}, rvfi_insn[31], rvfi_insn[7], rvfi_insn[30:25], rvfi_insn[11:8], 1'b0 + {19{decomp_insn[31]}}, + decomp_insn[31], + decomp_insn[7], + decomp_insn[30:25], + decomp_insn[11:8], + 1'b0 }; - assign imm_u_type = {rvfi_insn[31:12], 12'b0}; + assign imm_u_type = {decomp_insn[31:12], 12'b0}; assign imm_uj_type = { - {12{rvfi_insn[31]}}, rvfi_insn[19:12], rvfi_insn[20], rvfi_insn[30:21], 1'b0 + {12{decomp_insn[31]}}, decomp_insn[19:12], decomp_insn[20], decomp_insn[30:21], 1'b0 }; - assign imm_z_type = '0; //{27'b0, rvfi_insn[REG_S1_MSB:REG_S1_LSB]}; + assign imm_z_type = '0; //{27'b0, decomp_insn[REG_S1_MSB:REG_S1_LSB]}; - assign imm_s2_type = {27'b0, rvfi_insn[24:20]}; + assign imm_s2_type = {27'b0, decomp_insn[24:20]}; assign imm_vs_type = '0; assign imm_vu_type = '0; assign imm_shuffle_type = '0; assign imm_clip_type = '0; - cv32e40p_compressed_decoder #( - .FPU(FPU) - ) rvfi_trace_decompress_i ( - .instr_i(rvfi_insn), - .instr_o(decomp_insn), - .is_compressed_o(is_compressed) - ); - `include "cv32e40p_instr_trace.svh" instr_trace_t trace_retire; function instr_trace_t trace_new_instr(); instr_trace_t trace; trace = new(); - trace.init(.cycles(cycles), .pc(rvfi_pc_rdata), .compressed(is_compressed), + trace.external_time = 1; + trace.simtime = rvfi_start_time - 1ns; + trace.stoptime = rvfi_stop_time; + trace.stopcycles = rvfi_stop_cycle; + trace.ctx = (rvfi_trap.trap) ? "(C)" : ""; + trace.init(.cycles(rvfi_start_cycle), .pc(rvfi_pc_rdata), .compressed(is_compressed), .instr(decomp_insn)); return trace; endfunction : trace_new_instr function void apply_reg_write(); foreach (trace_retire.regs_write[i]) begin - if (rvfi_frd_wvalid[0] && (trace_retire.regs_write[i].addr == {1'b1, rvfi_frd_addr[0]})) begin - trace_retire.regs_write[i].value = rvfi_frd_wdata[0]; - end else if (trace_retire.regs_write[i].addr == rvfi_rd_addr[0]) begin - trace_retire.regs_write[i].value = rvfi_rd_wdata[0]; - end if (rvfi_frd_wvalid[1] && (trace_retire.regs_write[i].addr == {1'b1, rvfi_frd_addr[1]})) begin trace_retire.regs_write[i].value = rvfi_frd_wdata[1]; end else if (trace_retire.regs_write[i].addr == rvfi_rd_addr[1]) begin trace_retire.regs_write[i].value = rvfi_rd_wdata[1]; end end + foreach (trace_retire.regs_write[i]) begin + if (rvfi_frd_wvalid[0] && (trace_retire.regs_write[i].addr == {1'b1, rvfi_frd_addr[0]})) begin + trace_retire.regs_write[i].value = rvfi_frd_wdata[0]; + end else if (trace_retire.regs_write[i].addr == rvfi_rd_addr[0]) begin + trace_retire.regs_write[i].value = rvfi_rd_wdata[0]; + end + end endfunction : apply_reg_write function void apply_mem_access(); @@ -202,11 +231,9 @@ instr_trace_t trace_retire; end endfunction : apply_mem_access - // cycle counter - always_ff @(posedge clk_i, negedge rst_ni) begin - if (rst_ni == 1'b0) cycles <= 0; - else cycles <= cycles + 1; - end + string insn_disas; + logic [31:0] insn_pc; + logic [31:0] insn_val; always @(posedge clk_i) begin if (rvfi_valid) begin @@ -214,6 +241,9 @@ instr_trace_t trace_retire; apply_reg_write(); apply_mem_access(); trace_retire.printInstrTrace(); + insn_disas = trace_retire.str; + insn_pc = trace_retire.pc; + insn_val = trace_retire.instr; end end @@ -223,7 +253,8 @@ instr_trace_t trace_retire; $sformat(info_tag, "CORE_TRACER %2d", hart_id_i); $display("[%s] Output filename is: %s", info_tag, fn); f = $fopen(fn, "w"); - $fwrite(f, "Time\tCycle\tPC\tInstr\tDecoded instruction\tRegister and memory contents\n"); + $fwrite(f, + " Time Cycle PC Instr Ctx Decoded instruction Register and memory contents Stop cycle Stop time\n"); end diff --git a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_tb_wrapper.sv b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_tb_wrapper.sv index 725ed4f05..25245407e 100644 --- a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_tb_wrapper.sv +++ b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_tb_wrapper.sv @@ -1,23 +1,27 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// Wrapper for a cv32e40p, containing cv32e40p_top, and rvfi_tracer -// -// Contributors: Davide Schiavone, OpenHW Group -// Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Davide Schiavone, OpenHW Group // +// Yoann Pruvost, Dolphin Design // +// // +// Description: Test-bench wrapper for cv32e40p_top, tracer and and rvfi_tracer // +// // +//////////////////////////////////////////////////////////////////////////////////// `ifdef CV32E40P_ASSERT_ON `include "cv32e40p_prefetch_controller_sva.sv" @@ -234,8 +238,9 @@ module cv32e40p_tb_wrapper endgenerate cv32e40p_rvfi #( - .FPU (FPU), - .ZFINX(ZFINX) + .FPU(FPU), + .ZFINX(ZFINX), + .NUM_MHPMCOUNTERS(NUM_MHPMCOUNTERS) ) rvfi_i ( .clk_i (cv32e40p_top_i.core_i.clk_i), .rst_ni(cv32e40p_top_i.core_i.rst_ni), @@ -279,8 +284,12 @@ module cv32e40p_tb_wrapper // .instr (cv32e40p_top_i.core_i.id_stage_i.instr ), .is_compressed_id_i(cv32e40p_top_i.core_i.id_stage_i.is_compressed_i), .ebrk_insn_dec_i (cv32e40p_top_i.core_i.id_stage_i.ebrk_insn_dec), - .csr_cause_i (cv32e40p_top_i.core_i.csr_cause), - .debug_csr_save_i (cv32e40p_top_i.core_i.debug_csr_save), + .ecall_insn_dec_i (cv32e40p_top_i.core_i.id_stage_i.ecall_insn_dec), + .mret_insn_dec_i (cv32e40p_top_i.core_i.id_stage_i.mret_insn_dec), + .mret_dec_i (cv32e40p_top_i.core_i.id_stage_i.mret_dec), + + .csr_cause_i (cv32e40p_top_i.core_i.csr_cause), + .debug_csr_save_i(cv32e40p_top_i.core_i.debug_csr_save), // HWLOOP regs .hwlp_start_q_i (hwlp_start_q), @@ -305,9 +314,11 @@ module cv32e40p_tb_wrapper // .rf_addr_alu_i (cv32e40p_top_i.core_i.id_stage_i.regfile_alu_waddr_fw_i), // .rf_wdata_alu_i (cv32e40p_top_i.core_i.id_stage_i.regfile_alu_wdata_fw_i), + .mult_ready_i (cv32e40p_top_i.core_i.ex_stage_i.mult_ready), + .alu_ready_i (cv32e40p_top_i.core_i.ex_stage_i.alu_ready), //// WB probes //// - .wb_valid_i(cv32e40p_top_i.core_i.wb_valid), - + .wb_valid_i (cv32e40p_top_i.core_i.wb_valid), + .wb_ready_i (cv32e40p_top_i.core_i.lsu_ready_wb), //// LSU probes //// .data_we_ex_i (cv32e40p_top_i.core_i.data_we_ex), .data_atop_ex_i (cv32e40p_top_i.core_i.data_atop_ex), @@ -360,6 +371,8 @@ module cv32e40p_tb_wrapper .csr_we_i (cv32e40p_top_i.core_i.cs_registers_i.csr_we_int), .csr_wdata_int_i(cv32e40p_top_i.core_i.cs_registers_i.csr_wdata_int), + .csr_fregs_we_i(cv32e40p_top_i.core_i.cs_registers_i.fregs_we_i), + .csr_mstatus_n_i (cv32e40p_top_i.core_i.cs_registers_i.mstatus_n), .csr_mstatus_q_i (cv32e40p_top_i.core_i.cs_registers_i.mstatus_q), .csr_mstatus_fs_n_i(cv32e40p_top_i.core_i.cs_registers_i.mstatus_fs_n), @@ -391,6 +404,9 @@ module cv32e40p_tb_wrapper .csr_mcountinhibit_n_i (cv32e40p_top_i.core_i.cs_registers_i.mcountinhibit_n), .csr_mcountinhibit_we_i(cv32e40p_top_i.core_i.cs_registers_i.mcountinhibit_we), + .csr_mhpmevent_n_i(cv32e40p_top_i.core_i.cs_registers_i.mhpmevent_n), + .csr_mhpmevent_q_i(cv32e40p_top_i.core_i.cs_registers_i.mhpmevent_q), + .csr_mhpmevent_we_i(cv32e40p_top_i.core_i.cs_registers_i.mhpmevent_we), .csr_mscratch_q_i(cv32e40p_top_i.core_i.cs_registers_i.mscratch_q), .csr_mscratch_n_i(cv32e40p_top_i.core_i.cs_registers_i.mscratch_n), .csr_mepc_q_i(cv32e40p_top_i.core_i.cs_registers_i.mepc_q), @@ -446,12 +462,18 @@ module cv32e40p_tb_wrapper .rvfi_valid(rvfi_valid), .rvfi_insn(rvfi_insn), + .rvfi_start_cycle(rvfi_start_cycle), + .rvfi_start_time(rvfi_start_time), + .rvfi_stop_cycle(rvfi_stop_cycle), + .rvfi_stop_time(rvfi_stop_time), .rvfi_pc_rdata(rvfi_pc_rdata), + .rvfi_trap(rvfi_trap), .rvfi_rd_addr(rvfi_rd_addr), .rvfi_rd_wdata(rvfi_rd_wdata), .rvfi_frd_wvalid(rvfi_frd_wvalid), .rvfi_frd_addr(rvfi_frd_addr), .rvfi_frd_wdata(rvfi_frd_wdata), + .rvfi_2_rd(rvfi_2_rd), .rvfi_rs1_addr(rvfi_rs1_addr), .rvfi_rs2_addr(rvfi_rs2_addr), .rvfi_rs3_addr(rvfi_rs3_addr), diff --git a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_tracer.sv b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_tracer.sv index 8208f2e61..59ed6fd7c 100644 --- a/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_tracer.sv +++ b/hw/vendor/openhwgroup_cv32e40p/bhv/cv32e40p_tracer.sv @@ -1,24 +1,26 @@ -// Copyright (c) 2020 OpenHW Group -// -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://solderpad.org/licenses/ -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 - -// Traces the executed instructions -// -// Contributors: Andreas Traber, ETHZ -// Davide Schiavone, OpenHW Group -// Pascal Gouedo, Dolphin Design +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + +//////////////////////////////////////////////////////////////////////////////// +// Engineer: Andreas Traber - atraber@iis.ee.ethz.ch // +// // +// Additional contributions by: // +// Davide Schiavone - pschiavo@iis.ee.ethz.ch // +// // +// Design Name: RISC-V Tracer // +// Project Name: RI5CY // +// Language: SystemVerilog // +// // +// Description: Traces the executed instructions // +// // +//////////////////////////////////////////////////////////////////////////////// `ifdef CV32E40P_TRACE_EXECUTION @@ -183,7 +185,8 @@ module cv32e40p_tracer $sformat(info_tag, "CORE_TRACER %2d", hart_id_i); $display("[%s] Output filename is: %s", info_tag, fn); f = $fopen(fn, "w"); - $fwrite(f, "Time\tCycle\tPC\tInstr\tDecoded instruction\tRegister and memory contents\n"); + $fwrite(f, + " Time Cycle PC Instr Ctx Decoded instruction Register and memory contents\n"); end //initial begin diff --git a/hw/vendor/openhwgroup_cv32e40p/bhv/include/cv32e40p_rvfi_pkg.sv b/hw/vendor/openhwgroup_cv32e40p/bhv/include/cv32e40p_rvfi_pkg.sv index 688795690..f4044e6ec 100644 --- a/hw/vendor/openhwgroup_cv32e40p/bhv/include/cv32e40p_rvfi_pkg.sv +++ b/hw/vendor/openhwgroup_cv32e40p/bhv/include/cv32e40p_rvfi_pkg.sv @@ -1,24 +1,28 @@ -// Copyright (c) 2020 OpenHW Group +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// Includes to print info about the RVFI output -// -// Contributors: Davide Schiavone, OpenHW Group -// Halfdan Bechmann, Silicon Labs -// Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Davide Schiavone, OpenHW Group // +// Halfdan Bechmann, Silicon Labs // +// Yoann Pruvost, Dolphin Design // +// // +// Description: Package to print info on RVFI interface // +// // +//////////////////////////////////////////////////////////////////////////////////// package cv32e40p_rvfi_pkg; import cv32e40p_pkg::*; diff --git a/hw/vendor/openhwgroup_cv32e40p/bhv/include/cv32e40p_tracer_pkg.sv b/hw/vendor/openhwgroup_cv32e40p/bhv/include/cv32e40p_tracer_pkg.sv index c099ff4f4..b9ce57df4 100644 --- a/hw/vendor/openhwgroup_cv32e40p/bhv/include/cv32e40p_tracer_pkg.sv +++ b/hw/vendor/openhwgroup_cv32e40p/bhv/include/cv32e40p_tracer_pkg.sv @@ -1,23 +1,13 @@ -// Copyright (c) 2020 OpenHW Group -// -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// https://solderpad.org/licenses/ -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 - -// Tracer package -// -// Contributors: Steve Richmond, Silicon Labs -// Pascal Gouedo, Dolphin Design +// Copyright 2018 ETH Zurich and University of Bologna. +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. + package cv32e40p_tracer_pkg; import cv32e40p_pkg::*; @@ -196,8 +186,8 @@ package cv32e40p_tracer_pkg; parameter INSTR_CVEND0 = {12'b000000000000, 5'b?, 3'b100, 4'b0011, 1'b0, OPCODE_CUSTOM_1}; parameter INSTR_CVCOUNTI0 = {12'b?, 5'b00000, 3'b100, 4'b0100, 1'b0, OPCODE_CUSTOM_1}; parameter INSTR_CVCOUNT0 = {12'b000000000000, 5'b?, 3'b100, 4'b0101, 1'b0, OPCODE_CUSTOM_1}; - parameter INSTR_CVSETUPI0 = {12'b?, 5'b00000, 3'b100, 4'b0110, 1'b0, OPCODE_CUSTOM_1}; - parameter INSTR_CVSETUP0 = {12'b?, 5'b00000, 3'b100, 4'b0111, 1'b0, OPCODE_CUSTOM_1}; + parameter INSTR_CVSETUPI0 = {17'b?, 3'b100, 4'b0110, 1'b0, OPCODE_CUSTOM_1}; + parameter INSTR_CVSETUP0 = {12'b?, 5'b?, 3'b100, 4'b0111, 1'b0, OPCODE_CUSTOM_1}; parameter INSTR_CVSTARTI1 = {12'b?, 5'b00000, 3'b100, 4'b0000, 1'b1, OPCODE_CUSTOM_1}; parameter INSTR_CVSTART1 = {12'b000000000000, 5'b?, 3'b100, 4'b0001, 1'b1, OPCODE_CUSTOM_1}; @@ -205,8 +195,8 @@ package cv32e40p_tracer_pkg; parameter INSTR_CVEND1 = {12'b000000000000, 5'b?, 3'b100, 4'b0011, 1'b1, OPCODE_CUSTOM_1}; parameter INSTR_CVCOUNTI1 = {12'b?, 5'b00000, 3'b100, 4'b0100, 1'b1, OPCODE_CUSTOM_1}; parameter INSTR_CVCOUNT1 = {12'b000000000000, 5'b?, 3'b100, 4'b0101, 1'b1, OPCODE_CUSTOM_1}; - parameter INSTR_CVSETUPI1 = {12'b?, 5'b00000, 3'b100, 4'b0110, 1'b1, OPCODE_CUSTOM_1}; - parameter INSTR_CVSETUP1 = {12'b?, 5'b00000, 3'b100, 4'b0111, 1'b1, OPCODE_CUSTOM_1}; + parameter INSTR_CVSETUPI1 = {17'b?, 3'b100, 4'b0110, 1'b1, OPCODE_CUSTOM_1}; + parameter INSTR_CVSETUP1 = {12'b?, 5'b?, 3'b100, 4'b0111, 1'b1, OPCODE_CUSTOM_1}; parameter INSTR_FF1 = {7'b0100001, 5'b0, 5'b?, 3'b011, 5'b?, OPCODE_CUSTOM_1}; @@ -449,8 +439,8 @@ package cv32e40p_tracer_pkg; parameter INSTR_CVSHUFFLE2H = {5'b11100, 1'b0, 1'b0, 5'b?, 5'b?, 3'b000, 5'b?, OPCODE_CUSTOM_3}; parameter INSTR_CVSHUFFLE2B = {5'b11100, 1'b0, 1'b0, 5'b?, 5'b?, 3'b001, 5'b?, OPCODE_CUSTOM_3}; - parameter INSTR_CVPACK = {5'b11101, 1'b0, 1'b0, 5'b?, 5'b?, 3'b000, 5'b?, OPCODE_CUSTOM_3}; - parameter INSTR_CVPACKH = {5'b11101, 1'b0, 1'b1, 5'b?, 5'b?, 3'b000, 5'b?, OPCODE_CUSTOM_3}; + parameter INSTR_CVPACK = {5'b11110, 1'b0, 1'b0, 5'b?, 5'b?, 3'b000, 5'b?, OPCODE_CUSTOM_3}; + parameter INSTR_CVPACKH = {5'b11110, 1'b0, 1'b1, 5'b?, 5'b?, 3'b000, 5'b?, OPCODE_CUSTOM_3}; parameter INSTR_CVPACKHIB = {5'b11111, 1'b0, 1'b1, 5'b?, 5'b?, 3'b001, 5'b?, OPCODE_CUSTOM_3}; parameter INSTR_CVPACKLOB = {5'b11111, 1'b0, 1'b0, 5'b?, 5'b?, 3'b001, 5'b?, OPCODE_CUSTOM_3}; diff --git a/hw/vendor/openhwgroup_cv32e40p/bhv/insn_trace.sv b/hw/vendor/openhwgroup_cv32e40p/bhv/insn_trace.sv index 3db2a7ee0..8cdc06d9e 100644 --- a/hw/vendor/openhwgroup_cv32e40p/bhv/insn_trace.sv +++ b/hw/vendor/openhwgroup_cv32e40p/bhv/insn_trace.sv @@ -1,5 +1,26 @@ -// Copyright 2022 Dolphin Design -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +// Copyright 2024 OpenHW Group and Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Yoann Pruvost, Dolphin Design // +// // +// Description: Macros and Functions to print information on RVFI interface // +// // +//////////////////////////////////////////////////////////////////////////////////// `define DEFINE_CSR(CSR_NAME) \ logic ``CSR_NAME``_we; \ @@ -23,6 +44,10 @@ class insn_trace_t; bit m_valid; logic [63:0] m_order; + integer m_start_cycle; + integer m_stop_cycle; + time m_start_time; + time m_stop_time; bit m_skip_order; //next order was used by trap; logic [31:0] m_pc_rdata; logic [31:0] m_insn; @@ -65,12 +90,15 @@ bit m_move_down_pipe; int m_instret_cnt; + int m_instret_smaple_trigger; //We need to sample minstret from csr 2 cycle after id is doen + + bit m_sample_csr_write_in_ex; struct { logic [31:0] addr ; - logic [ 3:0] rmask; + logic [31:0] rmask; logic [31:0] rdata; - logic [ 3:0] wmask; + logic [31:0] wmask; logic [31:0] wdata; } m_mem; @@ -100,9 +128,28 @@ `DEFINE_CSR(mscratch) `DEFINE_CSR(mepc) `DEFINE_CSR(mcause) + `DEFINE_CSR(mcycle) `DEFINE_CSR(minstret) bit got_minstret; - + `DEFINE_CSR(mcycleh) + `DEFINE_CSR(minstreth) + `DEFINE_CSR(cycle) + `DEFINE_CSR(instret) + // bit got_minstret; + `DEFINE_CSR(cycleh) + `DEFINE_CSR(instreth) + + logic [31:0][ 1:0] mhpmcounter_we; + logic [31:0][63:0] mhpmcounter_rdata; + logic [31:0][63:0] mhpmcounter_rmask; + logic [31:0][63:0] mhpmcounter_wdata; + logic [31:0][63:0] mhpmcounter_wmask; + + logic [31:0] mhpmevent_we; + logic [31:0][31:0] mhpmevent_rdata; + logic [31:0][31:0] mhpmevent_rmask; + logic [31:0][31:0] mhpmevent_wdata; + logic [31:0][31:0] mhpmevent_wmask; `DEFINE_CSR(mip) //mnxti //mintstatus @@ -145,32 +192,38 @@ function new(); - this.m_order = 0; - this.m_skip_order = 1'b0; - this.m_valid = 1'b0; - this.m_move_down_pipe = 1'b0; - this.m_data_missaligned = 1'b0; - this.m_got_first_data = 1'b0; - this.m_got_ex_reg = 1'b0; - this.m_intr = '0; - this.m_dbg_taken = 1'b0; - this.m_dbg_cause = '0; - this.m_is_ebreak = '0; - this.m_is_illegal = '0; - this.m_is_irq = '0; - this.m_is_memory = 1'b0; - this.m_is_load = 1'b0; - this.m_is_apu = 1'b0; - this.m_is_apu_ok = 1'b0; - this.m_apu_req_id = 0; - this.m_mem_req_id[0] = 0; - this.m_mem_req_id[1] = 0; - this.m_mem_req_id_valid = '0; - this.m_trap = 1'b0; - this.m_fflags_we_non_apu = 1'b0; - this.m_frm_we_non_apu = 1'b0; - this.m_fcsr_we_non_apu = 1'b0; - this.m_instret_cnt = 0; + this.m_order = 0; + this.m_start_cycle = 0; + this.m_stop_cycle = 0; + this.m_start_time = 0; + this.m_stop_time = 0; + this.m_skip_order = 1'b0; + this.m_valid = 1'b0; + this.m_move_down_pipe = 1'b0; + this.m_data_missaligned = 1'b0; + this.m_got_first_data = 1'b0; + this.m_got_ex_reg = 1'b0; + this.m_intr = '0; + this.m_dbg_taken = 1'b0; + this.m_dbg_cause = '0; + this.m_is_ebreak = '0; + this.m_is_illegal = '0; + this.m_is_irq = '0; + this.m_is_memory = 1'b0; + this.m_is_load = 1'b0; + this.m_is_apu = 1'b0; + this.m_is_apu_ok = 1'b0; + this.m_apu_req_id = 0; + this.m_mem_req_id[0] = 0; + this.m_mem_req_id[1] = 0; + this.m_mem_req_id_valid = '0; + this.m_trap = 1'b0; + this.m_fflags_we_non_apu = 1'b0; + this.m_frm_we_non_apu = 1'b0; + this.m_fcsr_we_non_apu = 1'b0; + this.m_instret_cnt = 0; + this.m_instret_smaple_trigger = 0; + this.m_sample_csr_write_in_ex = 1'b1; endfunction function void get_mnemonic(); @@ -610,12 +663,12 @@ INSTR_CVCMPLEB : this.m_mnemonic = "cv.cmple.b"; INSTR_CVCMPLESCB : this.m_mnemonic = "cv.cmple.sc.b"; INSTR_CVCMPLESCIB : this.m_mnemonic = "cv.cmple.sci.b"; - INSTR_CVCMPGTUH : this.m_mnemonic = "cv.cmptu.h"; - INSTR_CVCMPGTUSCH : this.m_mnemonic = "cv.cmptu.sc.h"; - INSTR_CVCMPGTUSCIH : this.m_mnemonic = "cv.cmptu.sci.h"; - INSTR_CVCMPGTUB : this.m_mnemonic = "cv.cmptu.b"; - INSTR_CVCMPGTUSCB : this.m_mnemonic = "cv.cmptu.sc.b"; - INSTR_CVCMPGTUSCIB : this.m_mnemonic = "cv.cmptu.sci.b"; + INSTR_CVCMPGTUH : this.m_mnemonic = "cv.cmpgtu.h"; + INSTR_CVCMPGTUSCH : this.m_mnemonic = "cv.cmpgtu.sc.h"; + INSTR_CVCMPGTUSCIH : this.m_mnemonic = "cv.cmpgtu.sci.h"; + INSTR_CVCMPGTUB : this.m_mnemonic = "cv.cmpgtu.b"; + INSTR_CVCMPGTUSCB : this.m_mnemonic = "cv.cmpgtu.sc.b"; + INSTR_CVCMPGTUSCIB : this.m_mnemonic = "cv.cmpgtu.sci.b"; INSTR_CVCMPGEUH : this.m_mnemonic = "cv.cmpgeu.h"; INSTR_CVCMPGEUSCH : this.m_mnemonic = "cv.cmpgeu.sc.h"; INSTR_CVCMPGEUSCIH : this.m_mnemonic = "cv.cmpgeu.sci.h"; @@ -844,7 +897,18 @@ `INIT_CSR(mscratch) `INIT_CSR(mepc) `INIT_CSR(mcause) + `INIT_CSR(mcycle) `INIT_CSR(minstret) + `INIT_CSR(mcycleh) + `INIT_CSR(minstreth) + `INIT_CSR(cycle) + `INIT_CSR(instret) + `INIT_CSR(cycleh) + `INIT_CSR(instreth) + this.m_csr.mhpmcounter_we = '0; + this.m_csr.mhpmcounter_wmask = '0; + this.m_csr.mhpmevent_we = '0; + this.m_csr.mhpmevent_wmask = '0; `INIT_CSR(mip) `INIT_CSR(tdata1) `INIT_CSR(tdata2) @@ -872,40 +936,46 @@ this.m_valid = 1'b1; this.m_stage = ID; this.m_order = this.m_order + 64'h1; + this.m_start_cycle = cycles; + this.m_stop_cycle = 0; + this.m_start_time = $time; + this.m_stop_time = 0; if(this.m_skip_order) begin this.m_order = this.m_order + 64'h1; end - this.m_skip_order = 1'b0; - this.m_pc_rdata = r_pipe_freeze_trace.pc_id; - this.m_is_illegal = 1'b0; - this.m_is_irq = 1'b0; - this.m_is_memory = 1'b0; - this.m_is_load = 1'b0; - this.m_is_apu = 1'b0; - this.m_is_apu_ok = 1'b0; - this.m_apu_req_id = 0; - this.m_mem_req_id[0] = 0; - this.m_mem_req_id[1] = 0; - this.m_mem_req_id_valid = '0; - this.m_data_missaligned = 1'b0; - this.m_got_first_data = 1'b0; - this.m_got_ex_reg = 1'b0; - this.m_got_regs_write = 1'b0; - this.m_move_down_pipe = 1'b0; - this.m_instret_cnt = 0; - this.m_rd_addr[0] = '0; - this.m_rd_addr[1] = '0; - this.m_2_rd_insn = 1'b0; - this.m_rs1_addr = '0; - this.m_rs2_addr = '0; - this.m_rs3_addr = '0; - this.m_ex_fw = '0; - this.m_csr.got_minstret = '0; - this.m_dbg_taken = '0; - this.m_trap = 1'b0; - this.m_fflags_we_non_apu = 1'b0; - this.m_frm_we_non_apu = 1'b0; - this.m_fcsr_we_non_apu = 1'b0; + this.m_skip_order = 1'b0; + this.m_pc_rdata = r_pipe_freeze_trace.pc_id; + this.m_is_illegal = 1'b0; + this.m_is_irq = 1'b0; + this.m_is_memory = 1'b0; + this.m_is_load = 1'b0; + this.m_is_apu = 1'b0; + this.m_is_apu_ok = 1'b0; + this.m_apu_req_id = 0; + this.m_mem_req_id[0] = 0; + this.m_mem_req_id[1] = 0; + this.m_mem_req_id_valid = '0; + this.m_data_missaligned = 1'b0; + this.m_got_first_data = 1'b0; + this.m_got_ex_reg = 1'b0; + this.m_got_regs_write = 1'b0; + this.m_move_down_pipe = 1'b0; + this.m_instret_cnt = 0; + this.m_instret_smaple_trigger = 0; + this.m_sample_csr_write_in_ex = 1'b1; + this.m_rd_addr[0] = '0; + this.m_rd_addr[1] = '0; + this.m_2_rd_insn = 1'b0; + this.m_rs1_addr = '0; + this.m_rs2_addr = '0; + this.m_rs3_addr = '0; + this.m_ex_fw = '0; + this.m_csr.got_minstret = '0; + this.m_dbg_taken = '0; + this.m_trap = 1'b0; + this.m_fflags_we_non_apu = 1'b0; + this.m_frm_we_non_apu = 1'b0; + this.m_fcsr_we_non_apu = 1'b0; this.m_csr.mcause_we = '0; if (is_compressed_id_i) begin this.m_insn[31:16] = '0; @@ -944,47 +1014,53 @@ endfunction function void copy_full(insn_trace_t m_source); - this.m_valid = m_source.m_valid; - this.m_stage = m_source.m_stage; - this.m_order = m_source.m_order; - this.m_pc_rdata = m_source.m_pc_rdata; - this.m_insn = m_source.m_insn; - this.m_mnemonic = m_source.m_mnemonic; - this.m_is_memory = m_source.m_is_memory; - this.m_is_load = m_source.m_is_load; - this.m_is_apu = m_source.m_is_apu; - this.m_is_apu_ok = m_source.m_is_apu_ok; - this.m_apu_req_id = m_source.m_apu_req_id; - this.m_mem_req_id = m_source.m_mem_req_id; - this.m_mem_req_id_valid = m_source.m_mem_req_id_valid; - this.m_data_missaligned = m_source.m_data_missaligned; - this.m_got_first_data = m_source.m_got_first_data; - this.m_got_ex_reg = m_source.m_got_ex_reg; - this.m_dbg_taken = m_source.m_dbg_taken; - this.m_dbg_cause = m_source.m_dbg_cause; - this.m_is_ebreak = m_source.m_is_ebreak; - this.m_is_illegal = m_source.m_is_illegal; - this.m_is_irq = m_source.m_is_irq; - this.m_instret_cnt = m_source.m_instret_cnt; - this.m_rs1_addr = m_source.m_rs1_addr; - this.m_rs2_addr = m_source.m_rs2_addr; - this.m_rs3_addr = m_source.m_rs3_addr; - this.m_rs1_rdata = m_source.m_rs1_rdata; - this.m_rs2_rdata = m_source.m_rs2_rdata; - this.m_rs3_rdata = m_source.m_rs3_rdata; - - this.m_ex_fw = m_source.m_ex_fw; - this.m_rd_addr = m_source.m_rd_addr; - this.m_2_rd_insn = m_source.m_2_rd_insn; - this.m_rd_wdata = m_source.m_rd_wdata; - - this.m_intr = m_source.m_intr; - this.m_trap = m_source.m_trap; - this.m_fflags_we_non_apu = m_source.m_fflags_we_non_apu; - this.m_frm_we_non_apu = m_source.m_frm_we_non_apu ; - this.m_fcsr_we_non_apu = m_source.m_fcsr_we_non_apu; - - this.m_mem = m_source.m_mem; + this.m_valid = m_source.m_valid; + this.m_stage = m_source.m_stage; + this.m_order = m_source.m_order; + this.m_start_cycle = m_source.m_start_cycle; + this.m_stop_cycle = m_source.m_stop_cycle; + this.m_start_time = m_source.m_start_time; + this.m_stop_time = m_source.m_stop_time; + this.m_pc_rdata = m_source.m_pc_rdata; + this.m_insn = m_source.m_insn; + this.m_mnemonic = m_source.m_mnemonic; + this.m_is_memory = m_source.m_is_memory; + this.m_is_load = m_source.m_is_load; + this.m_is_apu = m_source.m_is_apu; + this.m_is_apu_ok = m_source.m_is_apu_ok; + this.m_apu_req_id = m_source.m_apu_req_id; + this.m_mem_req_id = m_source.m_mem_req_id; + this.m_mem_req_id_valid = m_source.m_mem_req_id_valid; + this.m_data_missaligned = m_source.m_data_missaligned; + this.m_got_first_data = m_source.m_got_first_data; + this.m_got_ex_reg = m_source.m_got_ex_reg; + this.m_dbg_taken = m_source.m_dbg_taken; + this.m_dbg_cause = m_source.m_dbg_cause; + this.m_is_ebreak = m_source.m_is_ebreak; + this.m_is_illegal = m_source.m_is_illegal; + this.m_is_irq = m_source.m_is_irq; + this.m_instret_cnt = m_source.m_instret_cnt; + this.m_instret_smaple_trigger = m_source.m_instret_smaple_trigger; + this.m_sample_csr_write_in_ex = m_source.m_sample_csr_write_in_ex; + this.m_rs1_addr = m_source.m_rs1_addr; + this.m_rs2_addr = m_source.m_rs2_addr; + this.m_rs3_addr = m_source.m_rs3_addr; + this.m_rs1_rdata = m_source.m_rs1_rdata; + this.m_rs2_rdata = m_source.m_rs2_rdata; + this.m_rs3_rdata = m_source.m_rs3_rdata; + + this.m_ex_fw = m_source.m_ex_fw; + this.m_rd_addr = m_source.m_rd_addr; + this.m_2_rd_insn = m_source.m_2_rd_insn; + this.m_rd_wdata = m_source.m_rd_wdata; + + this.m_intr = m_source.m_intr; + this.m_trap = m_source.m_trap; + this.m_fflags_we_non_apu = m_source.m_fflags_we_non_apu; + this.m_frm_we_non_apu = m_source.m_frm_we_non_apu ; + this.m_fcsr_we_non_apu = m_source.m_fcsr_we_non_apu; + + this.m_mem = m_source.m_mem; //CRS `ASSIGN_CSR(mstatus) `ASSIGN_CSR(mstatus_fs) @@ -995,8 +1071,26 @@ `ASSIGN_CSR(mscratch) `ASSIGN_CSR(mepc) `ASSIGN_CSR(mcause) + `ASSIGN_CSR(mcycle) `ASSIGN_CSR(minstret) this.m_csr.got_minstret = m_source.m_csr.got_minstret; + `ASSIGN_CSR(mcycleh) + `ASSIGN_CSR(minstreth) + `ASSIGN_CSR(cycle) + `ASSIGN_CSR(instret) + // this.m_csr.got_minstret = m_source.m_csr.got_minstret; + `ASSIGN_CSR(cycleh) + `ASSIGN_CSR(instreth) + this.m_csr.mhpmcounter_we = m_source.m_csr.mhpmcounter_we; + this.m_csr.mhpmcounter_rdata = m_source.m_csr.mhpmcounter_rdata; + this.m_csr.mhpmcounter_rmask = m_source.m_csr.mhpmcounter_rmask; + this.m_csr.mhpmcounter_wdata = m_source.m_csr.mhpmcounter_wdata; + this.m_csr.mhpmcounter_wmask = m_source.m_csr.mhpmcounter_wmask; + this.m_csr.mhpmevent_we = m_source.m_csr.mhpmevent_we; + this.m_csr.mhpmevent_rdata = m_source.m_csr.mhpmevent_rdata; + this.m_csr.mhpmevent_rmask = m_source.m_csr.mhpmevent_rmask; + this.m_csr.mhpmevent_wdata = m_source.m_csr.mhpmevent_wdata; + this.m_csr.mhpmevent_wmask = m_source.m_csr.mhpmevent_wmask; `ASSIGN_CSR(mip) `ASSIGN_CSR(tdata1) `ASSIGN_CSR(tdata2) diff --git a/hw/vendor/openhwgroup_cv32e40p/bhv/pipe_freeze_trace.sv b/hw/vendor/openhwgroup_cv32e40p/bhv/pipe_freeze_trace.sv index 58051ab8e..39a16fa62 100644 --- a/hw/vendor/openhwgroup_cv32e40p/bhv/pipe_freeze_trace.sv +++ b/hw/vendor/openhwgroup_cv32e40p/bhv/pipe_freeze_trace.sv @@ -1,27 +1,29 @@ -// Copyright (c) 2023 OpenHW Group +// Copyright 2024 OpenHW Group and Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 // -// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. // You may obtain a copy of the License at // -// https://solderpad.org/licenses/ +// https://solderpad.org/licenses/SHL-2.1/ // -// Unless required by applicable law or agreed to in writing, software +// Unless required by applicable law or agreed to in writing, any work // distributed under the License is distributed on an "AS IS" BASIS, // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. // See the License for the specific language governing permissions and // limitations under the License. -// -// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 -// CV32E40P -// -// Contributors: Yoann Pruvost, Dolphin Design +//////////////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Yoann Pruvost, Dolphin Design // +// // +// Description: Structures, Functions and Task used to store all information // +// coming from the core pipeline at every posedge. // +// Those information will then be processed by RVFI. // +// // +//////////////////////////////////////////////////////////////////////////////////// -/* - * This struct is used to store all information comming from the core at every posedge - * The information will then be processed - */ typedef struct { logic is_decoding; logic is_illegal; @@ -64,6 +66,9 @@ typedef struct { logic is_compressed_id; logic ebrk_insn_dec; + logic ecall_insn_dec; + logic mret_insn_dec; + logic mret_dec; logic [5:0] csr_cause; @@ -112,6 +117,9 @@ typedef struct { logic [31:0] data_wdata_ex; logic lsu_split_q_ex; + logic mult_ready; + logic alu_ready; + //// WB probes //// logic [31:0] pc_wb; logic wb_ready; @@ -198,6 +206,8 @@ typedef struct { logic mcause_we; logic dcsr_we; + logic fregs_we; + logic jvt_we; Status_t mstatus_n; Status_t mstatus_q; @@ -341,6 +351,7 @@ function compute_csr_we(); r_pipe_freeze_trace.csr.fflags_we = 1'b0; r_pipe_freeze_trace.csr.frm_we = 1'b0; r_pipe_freeze_trace.csr.fcsr_we = 1'b0; + r_pipe_freeze_trace.csr.mhpmevent_we = '0; r_pipe_freeze_trace.csr.dpc_we = csr_dpc_we_i; if (r_pipe_freeze_trace.csr.we) begin case (r_pipe_freeze_trace.csr.addr) @@ -348,18 +359,33 @@ function compute_csr_we(); r_pipe_freeze_trace.csr.mstatus_we = 1'b1; r_pipe_freeze_trace.csr.mstatus_fs_we = 1'b1; end - CSR_MISA: r_pipe_freeze_trace.csr.misa_we = 1'b1; - CSR_MTVEC: r_pipe_freeze_trace.csr.mtvec_we = 1'b1; - CSR_MSCRATCH: r_pipe_freeze_trace.csr.mscratch_we = 1'b1; - CSR_MEPC: r_pipe_freeze_trace.csr.mepc_we = 1'b1; - CSR_MCAUSE: r_pipe_freeze_trace.csr.mcause_we = 1'b1; - CSR_DCSR: r_pipe_freeze_trace.csr.dcsr_we = 1'b1; - CSR_FFLAGS: r_pipe_freeze_trace.csr.fflags_we = 1'b1; - CSR_FRM: r_pipe_freeze_trace.csr.frm_we = 1'b1; - CSR_FCSR: r_pipe_freeze_trace.csr.fcsr_we = 1'b1; - CSR_DPC: r_pipe_freeze_trace.csr.dpc_we = 1'b1; + CSR_MISA: r_pipe_freeze_trace.csr.misa_we = 1'b1; + CSR_MTVEC: r_pipe_freeze_trace.csr.mtvec_we = 1'b1; + CSR_MSCRATCH: r_pipe_freeze_trace.csr.mscratch_we = 1'b1; + CSR_MEPC: r_pipe_freeze_trace.csr.mepc_we = 1'b1; + CSR_MCAUSE: r_pipe_freeze_trace.csr.mcause_we = 1'b1; + CSR_DCSR: r_pipe_freeze_trace.csr.dcsr_we = 1'b1; + CSR_FFLAGS: begin + r_pipe_freeze_trace.csr.fflags_we = 1'b1; + r_pipe_freeze_trace.csr.mstatus_fs_we = 1'b1; + end + CSR_FRM: begin + r_pipe_freeze_trace.csr.frm_we = 1'b1; + r_pipe_freeze_trace.csr.mstatus_fs_we = 1'b1; + end + CSR_FCSR: begin + r_pipe_freeze_trace.csr.fcsr_we = 1'b1; + r_pipe_freeze_trace.csr.mstatus_fs_we = 1'b1; + end + CSR_DPC: r_pipe_freeze_trace.csr.dpc_we = 1'b1; + CSR_DSCRATCH0: r_pipe_freeze_trace.csr.dscratch0_we = 1'b1; + CSR_DSCRATCH1: r_pipe_freeze_trace.csr.dscratch1_we = 1'b1; endcase end + + if (csr_mhpmevent_we_i) begin + r_pipe_freeze_trace.csr.mhpmevent_we[r_pipe_freeze_trace.csr.addr[4:0]] = 1'b1; + end // CSR_MCAUSE: r_pipe_freeze_trace.csr.mcause_we = r_pipe_freeze_trace.csr.mcause_n != r_pipe_freeze_trace.csr.mcause_q; //for debug purpose endfunction /* @@ -416,6 +442,9 @@ task monitor_pipeline(); r_pipe_freeze_trace.jump_target_id = jump_target_id_i; r_pipe_freeze_trace.is_compressed_id = is_compressed_id_i; r_pipe_freeze_trace.ebrk_insn_dec = ebrk_insn_dec_i; + r_pipe_freeze_trace.ecall_insn_dec = ecall_insn_dec_i; + r_pipe_freeze_trace.mret_insn_dec = mret_insn_dec_i; + r_pipe_freeze_trace.mret_dec = mret_dec_i; r_pipe_freeze_trace.csr_cause = csr_cause_i; r_pipe_freeze_trace.debug_csr_save = debug_csr_save_i; r_pipe_freeze_trace.minstret = minstret_i; @@ -462,6 +491,8 @@ task monitor_pipeline(); r_pipe_freeze_trace.data_wdata_ex = data_wdata_ex_i; r_pipe_freeze_trace.lsu_split_q_ex = lsu_split_q_ex_i; + r_pipe_freeze_trace.mult_ready = mult_ready_i; + r_pipe_freeze_trace.alu_ready = alu_ready_i; //// WB probes //// r_pipe_freeze_trace.pc_wb = pc_wb_i; r_pipe_freeze_trace.wb_ready = wb_ready_i; @@ -526,6 +557,8 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.we = csr_we_i; r_pipe_freeze_trace.csr.wdata_int = csr_wdata_int_i; + r_pipe_freeze_trace.csr.fregs_we = csr_fregs_we_i; + r_pipe_freeze_trace.csr.jvt_we = csr_jvt_we_i; r_pipe_freeze_trace.csr.mstatus_n = csr_mstatus_n_i; r_pipe_freeze_trace.csr.mstatus_q = csr_mstatus_q_i; @@ -550,7 +583,6 @@ task monitor_pipeline(); r_pipe_freeze_trace.csr.mcountinhibit_we = csr_mcountinhibit_we_i; r_pipe_freeze_trace.csr.mhpmevent_n = csr_mhpmevent_n_i; r_pipe_freeze_trace.csr.mhpmevent_q = csr_mhpmevent_q_i; - r_pipe_freeze_trace.csr.mhpmevent_we = csr_mhpmevent_we_i; r_pipe_freeze_trace.csr.mscratch_n = csr_mscratch_n_i; r_pipe_freeze_trace.csr.mscratch_q = csr_mscratch_q_i; r_pipe_freeze_trace.csr.mepc_n = csr_mepc_n_i; @@ -650,10 +682,6 @@ task monitor_pipeline(); if (r_pipe_freeze_trace.csr.fcsr_we) begin r_pipe_freeze_trace.csr.fflags_we = 1'b1; r_pipe_freeze_trace.csr.frm_we = 1'b1; - end else begin - if (r_pipe_freeze_trace.csr.fflags_we || r_pipe_freeze_trace.csr.frm_we) begin - r_pipe_freeze_trace.csr.fcsr_we = 1'b1; - end end if (csr_fcsr_fflags_we_i) begin diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_controller.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_controller.sv index 549802690..d1c86d6c8 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_controller.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_controller.sv @@ -491,6 +491,7 @@ module cv32e40p_controller import cv32e40p_pkg::*; if ( (debug_req_pending || trigger_match_i) & ~debug_mode_q ) begin //Serving the debug + is_decoding_o = COREV_PULP ? 1'b0 : 1'b1; halt_if_o = 1'b1; halt_id_o = 1'b1; ctrl_fsm_ns = DBG_FLUSH; @@ -596,7 +597,17 @@ module cv32e40p_controller import cv32e40p_pkg::*; csr_status_i: begin halt_if_o = 1'b1; - ctrl_fsm_ns = id_ready_i ? FLUSH_EX : DECODE; + if (~id_ready_i) begin + ctrl_fsm_ns = DECODE; + end else begin + ctrl_fsm_ns = FLUSH_EX; + if (hwlp_end0_eq_pc) begin + hwlp_dec_cnt_o[0] = 1'b1; + end + if (hwlp_end1_eq_pc) begin + hwlp_dec_cnt_o[1] = 1'b1; + end + end end data_load_event_i: begin @@ -616,7 +627,7 @@ module cv32e40p_controller import cv32e40p_pkg::*; ctrl_fsm_ns = hwlp_end0_eq_pc_plus4 || hwlp_end1_eq_pc_plus4 ? DECODE : DECODE_HWLOOP; // we can be at the end of HWloop due to a return from interrupt or ecall or ebreak or exceptions - if(hwlp_end0_eq_pc && hwlp_counter0_gt_1) begin + if (hwlp_end0_eq_pc && hwlp_counter0_gt_1) begin pc_mux_o = PC_HWLOOP; if (~jump_done_q) begin pc_set_o = 1'b1; @@ -712,6 +723,7 @@ module cv32e40p_controller import cv32e40p_pkg::*; if ( (debug_req_pending || trigger_match_i) & ~debug_mode_q ) begin //Serving the debug + is_decoding_o = COREV_PULP ? 1'b0 : 1'b1; halt_if_o = 1'b1; halt_id_o = 1'b1; ctrl_fsm_ns = DBG_FLUSH; @@ -764,7 +776,7 @@ module cv32e40p_controller import cv32e40p_pkg::*; ebrk_insn_i: begin halt_if_o = 1'b1; - halt_id_o = 1'b1; + halt_id_o = 1'b0; if (debug_mode_q) // we got back to the park loop in the debug rom @@ -776,20 +788,30 @@ module cv32e40p_controller import cv32e40p_pkg::*; else begin // otherwise just a normal ebreak exception - ctrl_fsm_ns = FLUSH_EX; + ctrl_fsm_ns = id_ready_i ? FLUSH_EX : DECODE_HWLOOP; end end ecall_insn_i: begin halt_if_o = 1'b1; - halt_id_o = 1'b1; - ctrl_fsm_ns = FLUSH_EX; + halt_id_o = 1'b0; + ctrl_fsm_ns = id_ready_i ? FLUSH_EX : DECODE_HWLOOP; end csr_status_i: begin halt_if_o = 1'b1; - ctrl_fsm_ns = id_ready_i ? FLUSH_EX : DECODE_HWLOOP; + if (~id_ready_i) begin + ctrl_fsm_ns = DECODE_HWLOOP; + end else begin + ctrl_fsm_ns = FLUSH_EX; + if (hwlp_end0_eq_pc) begin + hwlp_dec_cnt_o[0] = 1'b1; + end + if (hwlp_end1_eq_pc) begin + hwlp_dec_cnt_o[1] = 1'b1; + end + end end data_load_event_i: begin @@ -1065,16 +1087,10 @@ module cv32e40p_controller import cv32e40p_pkg::*; end csr_status_i: begin - - if(hwlp_end0_eq_pc && hwlp_counter0_gt_1) begin - pc_mux_o = PC_HWLOOP; - pc_set_o = 1'b1; - hwlp_dec_cnt_o[0] = 1'b1; - end - if(hwlp_end1_eq_pc && hwlp_counter1_gt_1) begin - pc_mux_o = PC_HWLOOP; - pc_set_o = 1'b1; - hwlp_dec_cnt_o[1] = 1'b1; + if ((hwlp_end0_eq_pc && !hwlp_counter0_eq_0) || + (hwlp_end1_eq_pc && !hwlp_counter1_eq_0)) begin + pc_mux_o = PC_HWLOOP; + pc_set_o = 1'b1; end end @@ -1559,7 +1575,7 @@ endgenerate // HWLoop 0 and 1 having target address constraints property p_hwlp_same_target_address; - @(posedge clk) (hwlp_counter_i[1] > 1 && hwlp_counter_i[0] > 1) |-> ( hwlp_end_addr_i[1] - 4 >= hwlp_end_addr_i[0] - 4 + 8 ); + @(posedge clk) (hwlp_counter_i[1] > 1 && hwlp_counter_i[0] > 1 && pc_id_i >= hwlp_start_addr_i[0] && pc_id_i <= hwlp_end_addr_i[0] - 4) |-> ( hwlp_end_addr_i[1] - 4 >= hwlp_end_addr_i[0] - 4 + 8 ); endproperty a_hwlp_same_target_address : assert property(p_hwlp_same_target_address) else $warning("%t, HWLoops target address do not respect constraints", $time); diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_core.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_core.sv index e6f9f7098..4275cf9c1 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_core.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_core.sv @@ -70,7 +70,8 @@ module cv32e40p_core output logic [31:0] data_wdata_o, input logic [31:0] data_rdata_i, - // apu-interconnect + // CVFPU interface + output logic apu_busy_o, // handshake signals output logic apu_req_o, input logic apu_gnt_i, @@ -158,11 +159,11 @@ module cv32e40p_core logic [31:0] jump_target_id, jump_target_ex; logic branch_in_ex; logic branch_decision; + logic [ 1:0] ctrl_transfer_insn_in_dec; logic ctrl_busy; logic if_busy; logic lsu_busy; - logic apu_busy; logic [31:0] pc_ex; // PC of last executed branch or cv.elw @@ -201,6 +202,7 @@ module cv32e40p_core logic [ C_RM-1:0] frm_csr; logic [ C_FFLAG-1:0] fflags_csr; logic fflags_we; + logic fregs_we; // APU logic apu_en_ex; @@ -228,6 +230,7 @@ module cv32e40p_core logic regfile_we_ex; logic [ 5:0] regfile_waddr_fw_wb_o; // From WB to ID logic regfile_we_wb; + logic regfile_we_wb_power; logic [ 31:0] regfile_wdata; logic [ 5:0] regfile_alu_waddr_ex; @@ -235,6 +238,7 @@ module cv32e40p_core logic [ 5:0] regfile_alu_waddr_fw; logic regfile_alu_we_fw; + logic regfile_alu_we_fw_power; logic [ 31:0] regfile_alu_wdata_fw; // CSR control @@ -395,7 +399,7 @@ module cv32e40p_core .if_busy_i (if_busy), .ctrl_busy_i(ctrl_busy), .lsu_busy_i (lsu_busy), - .apu_busy_i (apu_busy), + .apu_busy_i (apu_busy_o), // PULP cluster .pulp_clock_en_i (pulp_clock_en_i), @@ -540,9 +544,10 @@ module cv32e40p_core .instr_req_o (instr_req_int), // Jumps and branches - .branch_in_ex_o (branch_in_ex), - .branch_decision_i(branch_decision), - .jump_target_o (jump_target_id), + .branch_in_ex_o (branch_in_ex), + .branch_decision_i (branch_decision), + .jump_target_o (jump_target_id), + .ctrl_transfer_insn_in_dec_o(ctrl_transfer_insn_in_dec), // IF and ID control signals .clear_instr_valid_o(clear_instr_valid), @@ -629,7 +634,7 @@ module cv32e40p_core .apu_write_regs_valid_o (apu_write_regs_valid), .apu_write_dep_i (apu_write_dep), .apu_perf_dep_o (perf_apu_dep), - .apu_busy_i (apu_busy), + .apu_busy_i (apu_busy_o), // CSR ID/EX .csr_access_ex_o (csr_access_ex), @@ -699,13 +704,15 @@ module cv32e40p_core .wake_from_sleep_o(wake_from_sleep), // Forward Signals - .regfile_waddr_wb_i(regfile_waddr_fw_wb_o), // Write address ex-wb pipeline - .regfile_we_wb_i (regfile_we_wb), // write enable for the register file - .regfile_wdata_wb_i(regfile_wdata), // write data to commit in the register file + .regfile_waddr_wb_i (regfile_waddr_fw_wb_o), // Write address ex-wb pipeline + .regfile_we_wb_i (regfile_we_wb), // write enable for the register file + .regfile_we_wb_power_i(regfile_we_wb_power), + .regfile_wdata_wb_i (regfile_wdata), // write data to commit in the register file - .regfile_alu_waddr_fw_i(regfile_alu_waddr_fw), - .regfile_alu_we_fw_i (regfile_alu_we_fw), - .regfile_alu_wdata_fw_i(regfile_alu_wdata_fw), + .regfile_alu_waddr_fw_i (regfile_alu_waddr_fw), + .regfile_alu_we_fw_i (regfile_alu_we_fw), + .regfile_alu_we_fw_power_i(regfile_alu_we_fw_power), + .regfile_alu_wdata_fw_i (regfile_alu_wdata_fw), // from ALU .mult_multicycle_i(mult_multicycle), @@ -737,6 +744,7 @@ module cv32e40p_core // // ///////////////////////////////////////////////////// cv32e40p_ex_stage #( + .COREV_PULP (COREV_PULP), .FPU (FPU), .APU_NARGS_CPU (APU_NARGS_CPU), .APU_WOP_CPU (APU_WOP_CPU), @@ -785,6 +793,8 @@ module cv32e40p_core .data_misaligned_ex_i(data_misaligned_ex), // from ID/EX pipeline .data_misaligned_i (data_misaligned), + .ctrl_transfer_insn_in_dec_i(ctrl_transfer_insn_in_dec), + // FPU .fpu_fflags_we_o(fflags_we), .fpu_fflags_o (fflags_csr), @@ -808,9 +818,9 @@ module cv32e40p_core .apu_perf_cont_o(perf_apu_cont), .apu_perf_wb_o (perf_apu_wb), .apu_ready_wb_o (apu_ready_wb), - .apu_busy_o (apu_busy), + .apu_busy_o (apu_busy_o), - // apu-interconnect + // CVFPU interface // handshake signals .apu_req_o (apu_req_o), .apu_gnt_i (apu_gnt_i), @@ -838,18 +848,20 @@ module cv32e40p_core .regfile_we_i (regfile_we_ex), // Output of ex stage pipeline - .regfile_waddr_wb_o(regfile_waddr_fw_wb_o), - .regfile_we_wb_o (regfile_we_wb), - .regfile_wdata_wb_o(regfile_wdata), + .regfile_waddr_wb_o (regfile_waddr_fw_wb_o), + .regfile_we_wb_o (regfile_we_wb), + .regfile_we_wb_power_o(regfile_we_wb_power), + .regfile_wdata_wb_o (regfile_wdata), // To IF: Jump and branch target and decision .jump_target_o (jump_target_ex), .branch_decision_o(branch_decision), // To ID stage: Forwarding signals - .regfile_alu_waddr_fw_o(regfile_alu_waddr_fw), - .regfile_alu_we_fw_o (regfile_alu_we_fw), - .regfile_alu_wdata_fw_o(regfile_alu_wdata_fw), + .regfile_alu_waddr_fw_o (regfile_alu_waddr_fw), + .regfile_alu_we_fw_o (regfile_alu_we_fw), + .regfile_alu_we_fw_power_o(regfile_alu_we_fw_power), + .regfile_alu_wdata_fw_o (regfile_alu_wdata_fw), // stall control .is_decoding_i (is_decoding), @@ -969,6 +981,7 @@ module cv32e40p_core .frm_o (frm_csr), .fflags_i (fflags_csr), .fflags_we_i(fflags_we), + .fregs_we_i (fregs_we), // Interrupt related control signals .mie_bypass_o (mie_bypass), @@ -1037,13 +1050,16 @@ module cv32e40p_core ); // CSR access - assign csr_addr = csr_addr_int; - assign csr_wdata = alu_operand_a_ex; - assign csr_op = csr_op_ex; + assign csr_addr = csr_addr_int; + assign csr_wdata = alu_operand_a_ex; + assign csr_op = csr_op_ex; assign csr_addr_int = csr_num_e'(csr_access_ex ? alu_operand_b_ex[11:0] : '0); - + // Floating-Point registers write + assign fregs_we = (FPU == 1 & ZFINX == 0) ? ((regfile_alu_we_fw && regfile_alu_waddr_fw[5]) || + (regfile_we_wb && regfile_waddr_fw_wb_o[5])) + : 1'b0; /////////////////////////// // ____ __ __ ____ // diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_cs_registers.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_cs_registers.sv index 25c777e21..920305c8d 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_cs_registers.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_cs_registers.sv @@ -68,6 +68,7 @@ module cv32e40p_cs_registers output logic [ 2:0] frm_o, input logic [C_FFLAG-1:0] fflags_i, input logic fflags_we_i, + input logic fregs_we_i, // Interrupts output logic [31:0] mie_bypass_o, @@ -212,6 +213,7 @@ module cv32e40p_cs_registers logic [31:0] exception_pc; Status_t mstatus_q, mstatus_n; + logic mstatus_we_int; FS_t mstatus_fs_q, mstatus_fs_n; logic [5:0] mcause_q, mcause_n; logic [5:0] ucause_q, ucause_n; @@ -507,7 +509,7 @@ module cv32e40p_cs_registers // mimpid, Machine Implementation ID CSR_MIMPID: begin - csr_rdata_int = (FPU || COREV_PULP || COREV_CLUSTER) ? 32'h1 : 'b0; + csr_rdata_int = (FPU == 1 || COREV_PULP == 1 || COREV_CLUSTER == 1) ? 32'h1 : 'b0; end // unimplemented, read 0 CSRs @@ -897,6 +899,7 @@ module cv32e40p_cs_registers dscratch0_n = dscratch0_q; dscratch1_n = dscratch1_q; + mstatus_we_int = 1'b0; mstatus_n = mstatus_q; mcause_n = mcause_q; ucause_n = '0; // Not used if PULP_SECURE == 0 @@ -957,7 +960,8 @@ module cv32e40p_cs_registers mprv: csr_wdata_int[MSTATUS_MPRV_BIT] }; if (FPU == 1 && ZFINX == 0) begin - mstatus_fs_n = FS_t'(csr_wdata_int[MSTATUS_FS_BIT_HIGH:MSTATUS_FS_BIT_LOW]); + mstatus_we_int = 1'b1; + mstatus_fs_n = FS_t'(csr_wdata_int[MSTATUS_FS_BIT_HIGH:MSTATUS_FS_BIT_LOW]); end end // mie: machine interrupt enable @@ -1027,7 +1031,7 @@ module cv32e40p_cs_registers if (ZFINX == 0) begin // FPU Register File/Flags implicit update or modified by CSR instructions - if (fflags_we_i || fcsr_update) begin + if ((fregs_we_i && !(mstatus_we_int && mstatus_fs_n != FS_DIRTY)) || fflags_we_i || fcsr_update) begin mstatus_fs_n = FS_DIRTY; end end diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_decoder.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_decoder.sv index d03027bae..c1608eb4a 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_decoder.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_decoder.sv @@ -1057,7 +1057,6 @@ module cv32e40p_decoder 5'b00000: begin fpu_op = cv32e40p_fpu_pkg::ADD; fp_op_group = ADDMUL; - apu_op_o = 2'b0; alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; end @@ -1066,7 +1065,6 @@ module cv32e40p_decoder fpu_op = cv32e40p_fpu_pkg::ADD; fpu_op_mod = 1'b1; fp_op_group = ADDMUL; - apu_op_o = 2'b1; alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; alu_op_c_mux_sel_o = OP_C_REGB_OR_FWD; end @@ -1085,7 +1083,6 @@ module cv32e40p_decoder regb_used_o = 1'b0; fpu_op = cv32e40p_fpu_pkg::SQRT; fp_op_group = DIVSQRT; - apu_op_o = 1'b1; // rs2 must be zero if (instr_rdata_i[24:20] != 5'b00000) illegal_insn_o = 1'b1; end @@ -1213,7 +1210,6 @@ module cv32e40p_decoder fpu_op = cv32e40p_fpu_pkg::F2I; fp_op_group = CONV; fpu_op_mod = instr_rdata_i[20]; // signed/unsigned switch - apu_op_o = 2'b1; unique case (instr_rdata_i[26:25]) //fix for casting to different formats other than FP32 2'b00: begin @@ -1249,7 +1245,6 @@ module cv32e40p_decoder fpu_op = cv32e40p_fpu_pkg::I2F; fp_op_group = CONV; fpu_op_mod = instr_rdata_i[20]; // signed/unsigned switch - apu_op_o = 2'b0; // bits [21:20] used, other bits must be 0 if (instr_rdata_i[24:21]) illegal_insn_o = 1'b1; // in RV32, no casts to L allowed. end @@ -1323,20 +1318,20 @@ module cv32e40p_decoder // check rounding mode if (check_fprm) begin unique case (instr_rdata_i[14:12]) inside - [3'b000:3'b100]: ; //legal rounding modes + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100: ; //legal rounding modes 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 if (~C_XF16ALT || fpu_dst_fmt_o != cv32e40p_fpu_pkg::FP16ALT) illegal_insn_o = 1'b1; // actual rounding mode from frm csr unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes - default : illegal_insn_o = 1'b1; + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100 : fp_rnd_mode_o = frm_i; //legal rounding modes + default : illegal_insn_o = 1'b1; endcase end 3'b111: begin // rounding mode from frm csr unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes - default : illegal_insn_o = 1'b1; + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100 : fp_rnd_mode_o = frm_i; //legal rounding modes + default : illegal_insn_o = 1'b1; endcase end default : illegal_insn_o = 1'b1; @@ -1364,6 +1359,7 @@ module cv32e40p_decoder NONCOMP : apu_lat_o = (FPU_OTHERS_LAT<2) ? FPU_OTHERS_LAT+1 : 2'h3; // CONV uses the same latency for all formats CONV : apu_lat_o = (FPU_OTHERS_LAT<2) ? FPU_OTHERS_LAT+1 : 2'h3; + default: ; endcase // Set FPnew OP and OPMOD as the APU op @@ -1425,25 +1421,21 @@ module cv32e40p_decoder unique case (instr_rdata_i[6:0]) // fmadd.fmt - FP Fused multiply-add OPCODE_OP_FMADD : begin - fpu_op = cv32e40p_fpu_pkg::FMADD; - apu_op_o = 2'b00; + fpu_op = cv32e40p_fpu_pkg::FMADD; end // fmsub.fmt - FP Fused multiply-subtract OPCODE_OP_FMSUB : begin - fpu_op = cv32e40p_fpu_pkg::FMADD; - fpu_op_mod = 1'b1; - apu_op_o = 2'b01; + fpu_op = cv32e40p_fpu_pkg::FMADD; + fpu_op_mod = 1'b1; end // fnmsub.fmt - FP Negated fused multiply-subtract OPCODE_OP_FNMSUB : begin - fpu_op = cv32e40p_fpu_pkg::FNMSUB; - apu_op_o = 2'b10; + fpu_op = cv32e40p_fpu_pkg::FNMSUB; end // fnmadd.fmt - FP Negated fused multiply-add OPCODE_OP_FNMADD : begin - fpu_op = cv32e40p_fpu_pkg::FNMSUB; - fpu_op_mod = 1'b1; - apu_op_o = 2'b11; + fpu_op = cv32e40p_fpu_pkg::FNMSUB; + fpu_op_mod = 1'b1; end default : ; endcase @@ -1459,19 +1451,19 @@ module cv32e40p_decoder // check rounding mode unique case (instr_rdata_i[14:12]) inside - [3'b000:3'b100]: ; //legal rounding modes + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100: ; //legal rounding modes 3'b101: begin // Alternative Half-Precsision encded as fmt=10 and rm=101 if (~C_XF16ALT || fpu_dst_fmt_o != cv32e40p_fpu_pkg::FP16ALT) illegal_insn_o = 1'b1; // actual rounding mode from frm csr unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100 : fp_rnd_mode_o = frm_i; //legal rounding modes default : illegal_insn_o = 1'b1; endcase end 3'b111: begin // rounding mode from frm csr unique case (frm_i) inside - [3'b000:3'b100] : fp_rnd_mode_o = frm_i; //legal rounding modes + 3'b000, 3'b001, 3'b010, 3'b011, 3'b100 : fp_rnd_mode_o = frm_i; //legal rounding modes default : illegal_insn_o = 1'b1; endcase end @@ -1493,6 +1485,7 @@ module cv32e40p_decoder // Set FPnew OP and OPMOD as the APU op apu_op_o = {fpu_vec_op, fpu_op_mod, fpu_op}; + // No FPU or (ZFINX == 0 && MSTATUS.FS == FS_OFF) end else begin illegal_insn_o = 1'b1; @@ -1900,15 +1893,14 @@ module cv32e40p_decoder alu_op_b_mux_sel_o = OP_B_REGA_OR_FWD; unique case (instr_rdata_i[27:25]) - 3'b000: alu_operator_o = ALU_ADD; // cv.addNr - 3'b001: alu_operator_o = ALU_ADDU; // cv.adduNr - 3'b010: alu_operator_o = ALU_ADDR; // cv.addRNr - 3'b011: alu_operator_o = ALU_ADDUR; // cv.adduRNr - 3'b100: alu_operator_o = ALU_SUB; // cv.subNr - 3'b101: alu_operator_o = ALU_SUBU; // cv.subuNr - 3'b110: alu_operator_o = ALU_SUBR; // cv.subRNr - 3'b111: alu_operator_o = ALU_SUBUR; // cv.subuRNr - default: alu_operator_o = ALU_ADD; + 3'b001: alu_operator_o = ALU_ADDU; // cv.adduNr + 3'b010: alu_operator_o = ALU_ADDR; // cv.addRNr + 3'b011: alu_operator_o = ALU_ADDUR; // cv.adduRNr + 3'b100: alu_operator_o = ALU_SUB; // cv.subNr + 3'b101: alu_operator_o = ALU_SUBU; // cv.subuNr + 3'b110: alu_operator_o = ALU_SUBR; // cv.subRNr + 3'b111: alu_operator_o = ALU_SUBUR; // cv.subuRNr + default: alu_operator_o = ALU_ADD; // cv.addNr endcase end @@ -2085,7 +2077,6 @@ module cv32e40p_decoder // decide between using unsigned and rounding, and combinations unique case ({instr_rdata_i[31:30], instr_rdata_i[12]}) - {2'b00, 1'b0}: alu_operator_o = ALU_ADD; // cv.addN {2'b01, 1'b0}: alu_operator_o = ALU_ADDU; // cv.adduN {2'b10, 1'b0}: alu_operator_o = ALU_ADDR; // cv.addRN {2'b11, 1'b0}: alu_operator_o = ALU_ADDUR; // cv.adduRN @@ -2093,12 +2084,12 @@ module cv32e40p_decoder {2'b01, 1'b1}: alu_operator_o = ALU_SUBU; // cv.subuN {2'b10, 1'b1}: alu_operator_o = ALU_SUBR; // cv.subRN {2'b11, 1'b1}: alu_operator_o = ALU_SUBUR; // cv.subuRN - default : alu_operator_o = ALU_ADD; + default : alu_operator_o = ALU_ADD; // cv.addN endcase end - 2'b10, 2'b11: begin + default: begin // MUL/MAC with subword selection alu_en = 1'b0; mult_int_en = 1'b1; @@ -2126,7 +2117,6 @@ module cv32e40p_decoder mult_operator_o = MUL_I; end end - default: illegal_insn_o = 1'b1; endcase end else begin illegal_insn_o = 1'b1; @@ -2267,6 +2257,11 @@ module cv32e40p_decoder instr_rdata_i[25] != 1'b0) begin illegal_insn_o = 1'b1; end + // Imm6 restrictions + if ((instr_rdata_i[14:12] == 3'b110 && instr_rdata_i[24:23] != 2'b0) || + (instr_rdata_i[14:12] == 3'b111 && instr_rdata_i[24:22] != 3'b0)) begin + illegal_insn_o = 1'b1; + end end 6'b01001_0: begin // cv.sra alu_operator_o = ALU_SRA; @@ -2278,6 +2273,11 @@ module cv32e40p_decoder instr_rdata_i[25] != 1'b0) begin illegal_insn_o = 1'b1; end + // Imm6 restrictions + if ((instr_rdata_i[14:12] == 3'b110 && instr_rdata_i[24:23] != 2'b0) || + (instr_rdata_i[14:12] == 3'b111 && instr_rdata_i[24:22] != 3'b0)) begin + illegal_insn_o = 1'b1; + end end 6'b01010_0: begin // cv.sll alu_operator_o = ALU_SLL; @@ -2289,6 +2289,11 @@ module cv32e40p_decoder instr_rdata_i[25] != 1'b0) begin illegal_insn_o = 1'b1; end + // Imm6 restrictions + if ((instr_rdata_i[14:12] == 3'b110 && instr_rdata_i[24:23] != 2'b0) || + (instr_rdata_i[14:12] == 3'b111 && instr_rdata_i[24:22] != 3'b0)) begin + illegal_insn_o = 1'b1; + end end 6'b01011_0: begin // cv.or alu_operator_o = ALU_OR; @@ -2425,6 +2430,11 @@ module cv32e40p_decoder end default: illegal_insn_o = 1'b1; endcase + // Imm6 restrictions + if ((instr_rdata_i[12] == 1'b0 && instr_rdata_i[24:20] != 5'b0) || + (instr_rdata_i[12] == 1'b1 && instr_rdata_i[24:21] != 4'b0)) begin + illegal_insn_o = 1'b1; + end end 6'b11000_0: begin // cv.shuffle, cv.shuffleI0 alu_operator_o = ALU_SHUF; @@ -2439,6 +2449,10 @@ module cv32e40p_decoder instr_rdata_i[25] != 1'b0) begin illegal_insn_o = 1'b1; end + // Imm6 restriction + if (instr_rdata_i[14:12] == 3'b110 && instr_rdata_i[24:21] != 4'b0) begin + illegal_insn_o = 1'b1; + end end 6'b11001_0, 6'b11010_0, diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_ex_stage.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_ex_stage.sv index c51ef7f81..488a83dc2 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_ex_stage.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_ex_stage.sv @@ -33,6 +33,7 @@ module cv32e40p_ex_stage import cv32e40p_pkg::*; import cv32e40p_apu_core_pkg::*; #( + parameter COREV_PULP = 0, parameter FPU = 0, parameter APU_NARGS_CPU = 3, parameter APU_WOP_CPU = 6, @@ -81,6 +82,8 @@ module cv32e40p_ex_stage input logic data_misaligned_ex_i, input logic data_misaligned_i, + input logic [1:0] ctrl_transfer_insn_in_dec_i, + // FPU signals output logic fpu_fflags_we_o, output logic [APU_NUSFLAGS_CPU-1:0] fpu_fflags_o, @@ -138,11 +141,13 @@ module cv32e40p_ex_stage // Output of EX stage pipeline output logic [ 5:0] regfile_waddr_wb_o, output logic regfile_we_wb_o, + output logic regfile_we_wb_power_o, output logic [31:0] regfile_wdata_wb_o, // Forwarding ports : to ID stage output logic [ 5:0] regfile_alu_waddr_fw_o, output logic regfile_alu_we_fw_o, + output logic regfile_alu_we_fw_power_o, output logic [31:0] regfile_alu_wdata_fw_o, // forward to RF and ID/EX pipe, ALU & MUL // To IF: Jump and branch target and decision @@ -190,22 +195,27 @@ module cv32e40p_ex_stage // ALU write port mux always_comb begin - regfile_alu_wdata_fw_o = '0; - regfile_alu_waddr_fw_o = '0; - regfile_alu_we_fw_o = '0; - wb_contention = 1'b0; + regfile_alu_wdata_fw_o = '0; + regfile_alu_waddr_fw_o = '0; + regfile_alu_we_fw_o = 1'b0; + regfile_alu_we_fw_power_o = 1'b0; + wb_contention = 1'b0; - // APU single cycle operations, and multicycle operations (>2cycles) are written back on ALU port + // APU single cycle operations, and multicycle operations (> 2cycles) are written back on ALU port if (apu_valid & (apu_singlecycle | apu_multicycle)) begin - regfile_alu_we_fw_o = 1'b1; - regfile_alu_waddr_fw_o = apu_waddr; - regfile_alu_wdata_fw_o = apu_result; + regfile_alu_we_fw_o = 1'b1; + regfile_alu_we_fw_power_o = 1'b1; + regfile_alu_waddr_fw_o = apu_waddr; + regfile_alu_wdata_fw_o = apu_result; if (regfile_alu_we_i & ~apu_en_i) begin wb_contention = 1'b1; end end else begin - regfile_alu_we_fw_o = regfile_alu_we_i & ~apu_en_i; // private fpu incomplete? + regfile_alu_we_fw_o = regfile_alu_we_i & ~apu_en_i; + regfile_alu_we_fw_power_o = (COREV_PULP == 0) ? regfile_alu_we_i & ~apu_en_i : + regfile_alu_we_i & ~apu_en_i & + mult_ready & alu_ready & lsu_ready_ex_i; regfile_alu_waddr_fw_o = regfile_alu_waddr_i; if (alu_en_i) regfile_alu_wdata_fw_o = alu_result; if (mult_en_i) regfile_alu_wdata_fw_o = mult_result; @@ -215,21 +225,24 @@ module cv32e40p_ex_stage // LSU write port mux always_comb begin - regfile_we_wb_o = 1'b0; - regfile_waddr_wb_o = regfile_waddr_lsu; - regfile_wdata_wb_o = lsu_rdata_i; - wb_contention_lsu = 1'b0; + regfile_we_wb_o = 1'b0; + regfile_we_wb_power_o = 1'b0; + regfile_waddr_wb_o = regfile_waddr_lsu; + regfile_wdata_wb_o = lsu_rdata_i; + wb_contention_lsu = 1'b0; if (regfile_we_lsu) begin - regfile_we_wb_o = 1'b1; + regfile_we_wb_o = 1'b1; + regfile_we_wb_power_o = (COREV_PULP == 0) ? 1'b1 : ~data_misaligned_ex_i & wb_ready_i; if (apu_valid & (!apu_singlecycle & !apu_multicycle)) begin wb_contention_lsu = 1'b1; end // APU two-cycle operations are written back on LSU port end else if (apu_valid & (!apu_singlecycle & !apu_multicycle)) begin - regfile_we_wb_o = 1'b1; - regfile_waddr_wb_o = apu_waddr; - regfile_wdata_wb_o = apu_result; + regfile_we_wb_o = 1'b1; + regfile_we_wb_power_o = 1'b1; + regfile_waddr_wb_o = apu_waddr; + regfile_wdata_wb_o = apu_result; end end @@ -371,11 +384,20 @@ module cv32e40p_ex_stage apu_result_q <= 'b0; apu_flags_q <= 'b0; end else begin - if (apu_rvalid_i && apu_multicycle && (data_misaligned_i || data_misaligned_ex_i || (data_req_i && regfile_alu_we_i) || (mulh_active && (mult_operator_i == MUL_H)))) begin + if (apu_rvalid_i && apu_multicycle && + (data_misaligned_i || data_misaligned_ex_i || + ((data_req_i || data_rvalid_i) && regfile_alu_we_i) || + (mulh_active && (mult_operator_i == MUL_H)) || + ((ctrl_transfer_insn_in_dec_i == BRANCH_JALR) && + regfile_alu_we_i && ~apu_read_dep_for_jalr_o))) begin apu_rvalid_q <= 1'b1; apu_result_q <= apu_result_i; apu_flags_q <= apu_flags_i; - end else if (apu_rvalid_q && !(data_misaligned_i || data_misaligned_ex_i || ((data_req_i || data_rvalid_i) && regfile_alu_we_i) || (mulh_active && (mult_operator_i == MUL_H)))) begin + end else if (apu_rvalid_q && !(data_misaligned_i || data_misaligned_ex_i || + ((data_req_i || data_rvalid_i) && regfile_alu_we_i) || + (mulh_active && (mult_operator_i == MUL_H)) || + ((ctrl_transfer_insn_in_dec_i == BRANCH_JALR) && + regfile_alu_we_i && ~apu_read_dep_for_jalr_o))) begin apu_rvalid_q <= 1'b0; end end @@ -383,7 +405,12 @@ module cv32e40p_ex_stage assign apu_req_o = apu_req; assign apu_gnt = apu_gnt_i; - assign apu_valid = (apu_multicycle && (data_misaligned_i || data_misaligned_ex_i || ((data_req_i || data_rvalid_i) && regfile_alu_we_i) || (mulh_active && (mult_operator_i == MUL_H)))) ? 1'b0 : (apu_rvalid_i || apu_rvalid_q); + assign apu_valid = (apu_multicycle && (data_misaligned_i || data_misaligned_ex_i || + ((data_req_i || data_rvalid_i) && regfile_alu_we_i) || + (mulh_active && (mult_operator_i == MUL_H)) || + ((ctrl_transfer_insn_in_dec_i == BRANCH_JALR) && + regfile_alu_we_i && ~apu_read_dep_for_jalr_o))) + ? 1'b0 : (apu_rvalid_i || apu_rvalid_q); assign apu_operands_o = apu_operands_i; assign apu_op_o = apu_op_i; assign apu_result = apu_rvalid_q ? apu_result_q : apu_result_i; diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_fp_wrapper.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_fp_wrapper.sv index 042fa0f22..839d58aae 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_fp_wrapper.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_fp_wrapper.sv @@ -14,7 +14,7 @@ module cv32e40p_fp_wrapper import cv32e40p_apu_core_pkg::*; #( - parameter FPU_ADDMUL_LAT = 0, // Floating-Point ADDition/MULtiplication computing lane pipeline registers number + parameter FPU_ADDMUL_LAT = 0, // Floating-Point ADDition/MULtiplication computing lane pipeline registers number parameter FPU_OTHERS_LAT = 0 // Floating-Point COMParison/CONVersion computing lanes pipeline registers number ) ( // Clock and Reset @@ -111,7 +111,7 @@ module cv32e40p_fp_wrapper .int_fmt_i (fpnew_pkg::int_format_e'(fpu_int_fmt)), .vectorial_op_i(fpu_vec_op), .tag_i (1'b0), - .simd_mask_i ('b0), + .simd_mask_i (1'b0), .in_valid_i (apu_req_i), .in_ready_o (apu_gnt_o), .flush_i (1'b0), diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_id_stage.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_id_stage.sv index 7811eabfd..f323cbe5f 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_id_stage.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_id_stage.sv @@ -70,6 +70,7 @@ module cv32e40p_id_stage output logic branch_in_ex_o, input logic branch_decision_i, output logic [31:0] jump_target_o, + output logic [ 1:0] ctrl_transfer_insn_in_dec_o, // IF and ID stage signals output logic clear_instr_valid_o, @@ -225,10 +226,12 @@ module cv32e40p_id_stage // Forward Signals input logic [5:0] regfile_waddr_wb_i, input logic regfile_we_wb_i, + input logic regfile_we_wb_power_i, input logic [31:0] regfile_wdata_wb_i, // From wb_stage: selects data from data memory, ex_stage result and sp rdata input logic [ 5:0] regfile_alu_waddr_fw_i, input logic regfile_alu_we_fw_i, + input logic regfile_alu_we_fw_power_i, input logic [31:0] regfile_alu_wdata_fw_i, // from ALU @@ -809,6 +812,9 @@ module cv32e40p_id_stage if (ctrl_transfer_target_mux_sel == JT_JALR) begin apu_read_regs[0] = regfile_addr_ra_id; apu_read_regs_valid[0] = 1'b1; + end else begin + apu_read_regs[0] = regfile_addr_ra_id; + apu_read_regs_valid[0] = 1'b0; end end // OP_A_CURRPC: OP_A_REGA_OR_FWD: begin @@ -948,12 +954,12 @@ module cv32e40p_id_stage // Write port a .waddr_a_i(regfile_waddr_wb_i), .wdata_a_i(regfile_wdata_wb_i), - .we_a_i (regfile_we_wb_i), + .we_a_i (regfile_we_wb_power_i), // Write port b .waddr_b_i(regfile_alu_waddr_fw_i), .wdata_b_i(regfile_alu_wdata_fw_i), - .we_b_i (regfile_alu_we_fw_i) + .we_b_i (regfile_alu_we_fw_power_i) ); @@ -1087,7 +1093,7 @@ module cv32e40p_id_stage .debug_wfi_no_sleep_i(debug_wfi_no_sleep), // jump/branches - .ctrl_transfer_insn_in_dec_o (ctrl_transfer_insn_in_dec), + .ctrl_transfer_insn_in_dec_o (ctrl_transfer_insn_in_dec_o), .ctrl_transfer_insn_in_id_o (ctrl_transfer_insn_in_id), .ctrl_transfer_target_mux_sel_o(ctrl_transfer_target_mux_sel), @@ -1187,7 +1193,7 @@ module cv32e40p_id_stage // jump/branch control .branch_taken_ex_i (branch_taken_ex), .ctrl_transfer_insn_in_id_i (ctrl_transfer_insn_in_id), - .ctrl_transfer_insn_in_dec_i(ctrl_transfer_insn_in_dec), + .ctrl_transfer_insn_in_dec_i(ctrl_transfer_insn_in_dec_o), // Interrupt signals .irq_wu_ctrl_i (irq_wu_ctrl), @@ -1508,9 +1514,13 @@ module cv32e40p_id_stage if (id_valid_o) begin // unstall the whole pipeline alu_en_ex_o <= alu_en; if (alu_en) begin - alu_operator_ex_o <= alu_operator; - alu_operand_a_ex_o <= alu_operand_a; - alu_operand_b_ex_o <= alu_operand_b; + alu_operator_ex_o <= alu_operator; + alu_operand_a_ex_o <= alu_operand_a; + if (alu_op_b_mux_sel == OP_B_REGB_OR_FWD && (alu_operator == ALU_CLIP || alu_operator == ALU_CLIPU)) begin + alu_operand_b_ex_o <= {1'b0, alu_operand_b[30:0]}; + end else begin + alu_operand_b_ex_o <= alu_operand_b; + end alu_operand_c_ex_o <= alu_operand_c; bmask_a_ex_o <= bmask_a_id; bmask_b_ex_o <= bmask_b_id; diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_load_store_unit.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_load_store_unit.sv index 7c08ffe11..f9c4db4a8 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_load_store_unit.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_load_store_unit.sv @@ -121,18 +121,18 @@ module cv32e40p_load_store_unit #( 2'b00: begin // Writing a word if (misaligned_st == 1'b0) begin // non-misaligned case case (data_addr_int[1:0]) - 2'b00: data_be = 4'b1111; - 2'b01: data_be = 4'b1110; - 2'b10: data_be = 4'b1100; - 2'b11: data_be = 4'b1000; + 2'b00: data_be = 4'b1111; + 2'b01: data_be = 4'b1110; + 2'b10: data_be = 4'b1100; + default: data_be = 4'b1000; endcase ; // case (data_addr_int[1:0]) end else begin // misaligned case case (data_addr_int[1:0]) - 2'b00: data_be = 4'b0000; // this is not used, but included for completeness - 2'b01: data_be = 4'b0001; - 2'b10: data_be = 4'b0011; - 2'b11: data_be = 4'b0111; + 2'b01: data_be = 4'b0001; + 2'b10: data_be = 4'b0011; + 2'b11: data_be = 4'b0111; + default: data_be = 4'b0000; // this is not used, but included for completeness endcase ; // case (data_addr_int[1:0]) end @@ -141,10 +141,10 @@ module cv32e40p_load_store_unit #( 2'b01: begin // Writing a half word if (misaligned_st == 1'b0) begin // non-misaligned case case (data_addr_int[1:0]) - 2'b00: data_be = 4'b0011; - 2'b01: data_be = 4'b0110; - 2'b10: data_be = 4'b1100; - 2'b11: data_be = 4'b1000; + 2'b00: data_be = 4'b0011; + 2'b01: data_be = 4'b0110; + 2'b10: data_be = 4'b1100; + default: data_be = 4'b1000; endcase ; // case (data_addr_int[1:0]) end else begin // misaligned case @@ -154,10 +154,10 @@ module cv32e40p_load_store_unit #( 2'b10, 2'b11: begin // Writing a byte case (data_addr_int[1:0]) - 2'b00: data_be = 4'b0001; - 2'b01: data_be = 4'b0010; - 2'b10: data_be = 4'b0100; - 2'b11: data_be = 4'b1000; + 2'b00: data_be = 4'b0001; + 2'b01: data_be = 4'b0010; + 2'b10: data_be = 4'b0100; + default: data_be = 4'b1000; endcase ; // case (data_addr_int[1:0]) end diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_register_file_latch.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_register_file_latch.sv index d8e2f4aa0..57383e6a5 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_register_file_latch.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_register_file_latch.sv @@ -172,8 +172,8 @@ module cv32e40p_register_file #( mem[0] = '0; for (k = 1; k < NUM_WORDS; k++) begin : w_WordIter - if (~rst_n) mem[k] = '0; - else if (mem_clocks[k] == 1'b1) mem[k] = waddr_onehot_b_q[k] ? wdata_b_q : wdata_a_q; + if (~rst_n) mem[k] <= '0; + else if (mem_clocks[k] == 1'b1) mem[k] <= waddr_onehot_b_q[k] ? wdata_b_q : wdata_a_q; end end @@ -182,9 +182,9 @@ module cv32e40p_register_file #( always_latch begin : latch_wdata_fp if (FPU == 1) begin for (l = 0; l < NUM_FP_WORDS; l++) begin : w_WordIter - if (~rst_n) mem_fp[l] = '0; + if (~rst_n) mem_fp[l] <= '0; else if (mem_clocks[l+NUM_WORDS] == 1'b1) - mem_fp[l] = waddr_onehot_b_q[l+NUM_WORDS] ? wdata_b_q : wdata_a_q; + mem_fp[l] <= waddr_onehot_b_q[l+NUM_WORDS] ? wdata_b_q : wdata_a_q; end end end diff --git a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_top.sv b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_top.sv index 7ddd2d5a2..43eee77bd 100644 --- a/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_top.sv +++ b/hw/vendor/openhwgroup_cv32e40p/rtl/cv32e40p_top.sv @@ -1,15 +1,27 @@ -// Copyright 2018 ETH Zurich and University of Bologna. -// Copyright and related rights are licensed under the Solderpad Hardware -// License, Version 0.51 (the "License"); you may not use this file except in -// compliance with the License. You may obtain a copy of the License at -// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -// or agreed to in writing, software, hardware and materials distributed under -// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR -// CONDITIONS OF ANY KIND, either express or implied. See the License for the -// specific language governing permissions and limitations under the License. - -// Top file instantiating a CV32E40P core and an optional FPU -// Contributor: Davide Schiavone +// Copyright 2024 Dolphin Design +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 +// +// Licensed under the Solderpad Hardware License v 2.1 (the "License"); +// you may not use this file except in compliance with the License, or, +// at your option, the Apache License version 2.0. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/SHL-2.1/ +// +// Unless required by applicable law or agreed to in writing, any work +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +///////////////////////////////////////////////////////////////////////////// +// // +// Contributors: Pascal Gouedo, Dolphin Design // +// // +// Description: Top level module of CV32E40P instantiating the Core and // +// an optional CVFPU with its clock gating cell. // +// // +///////////////////////////////////////////////////////////////////////////// module cv32e40p_top #( parameter COREV_PULP = 0, // PULP ISA Extension (incl. custom CSRs and hardware loop, excl. cv.elw) @@ -70,7 +82,7 @@ module cv32e40p_top #( import cv32e40p_apu_core_pkg::*; // Core to FPU - logic clk; + logic apu_busy; logic apu_req; logic [ APU_NARGS_CPU-1:0][31:0] apu_operands; logic [ APU_WOP_CPU-1:0] apu_op; @@ -82,6 +94,8 @@ module cv32e40p_top #( logic [ 31:0] apu_rdata; logic [APU_NUSFLAGS_CPU-1:0] apu_rflags; + logic apu_clk_en, apu_clk; + // Instantiate the Core cv32e40p_core #( .COREV_PULP (COREV_PULP), @@ -119,6 +133,7 @@ module cv32e40p_top #( .data_wdata_o (data_wdata_o), .data_rdata_i (data_rdata_i), + .apu_busy_o (apu_busy), .apu_req_o (apu_req), .apu_gnt_i (apu_gnt), .apu_operands_o(apu_operands), @@ -143,12 +158,15 @@ module cv32e40p_top #( generate if (FPU) begin : fpu_gen + + assign apu_clk_en = apu_req | apu_busy; + // FPU clock gate cv32e40p_clock_gate core_clock_gate_i ( .clk_i (clk_i), - .en_i (!core_sleep_o), + .en_i (apu_clk_en), .scan_cg_en_i(scan_cg_en_i), - .clk_o (clk) + .clk_o (apu_clk) ); // Instantiate the FPU wrapper @@ -156,7 +174,7 @@ module cv32e40p_top #( .FPU_ADDMUL_LAT(FPU_ADDMUL_LAT), .FPU_OTHERS_LAT(FPU_OTHERS_LAT) ) fp_wrapper_i ( - .clk_i (clk), + .clk_i (apu_clk), .rst_ni (rst_ni), .apu_req_i (apu_req), .apu_gnt_o (apu_gnt), diff --git a/hw/vendor/openhwgroup_cv32e40x.core b/hw/vendor/openhwgroup_cv32e40x.core index 70a248de8..7a12b181f 100644 --- a/hw/vendor/openhwgroup_cv32e40x.core +++ b/hw/vendor/openhwgroup_cv32e40x.core @@ -67,3 +67,4 @@ targets: filesets: - files_rtl - target_sim? (files_clk_gate) + - target_sim_sc? (files_clk_gate) diff --git a/hw/vendor/openhwgroup_cve2.lock.hjson b/hw/vendor/openhwgroup_cve2.lock.hjson index a9251111f..fc70c877c 100644 --- a/hw/vendor/openhwgroup_cve2.lock.hjson +++ b/hw/vendor/openhwgroup_cve2.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/openhwgroup/cve2.git - rev: f5b21e71c9b511477b04ef81d1292858c51ac20c + rev: 7f3bb9fcb28e55b227c966734d8e81ddef58b7e3 } } diff --git a/hw/vendor/openhwgroup_cve2.vendor.hjson b/hw/vendor/openhwgroup_cve2.vendor.hjson index d398c1eb3..28cb1df64 100644 --- a/hw/vendor/openhwgroup_cve2.vendor.hjson +++ b/hw/vendor/openhwgroup_cve2.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/openhwgroup/cve2.git", - rev: "f5b21e71c9b511477b04ef81d1292858c51ac20c", + rev: "7f3bb9fcb28e55b227c966734d8e81ddef58b7e3", }, patch_dir: "patches/openhwgroup_cv32e20", diff --git a/hw/vendor/patches/openhwgroup_cv32e20/cv32e20_core.patch b/hw/vendor/patches/openhwgroup_cv32e20/cv32e20_core.patch new file mode 100644 index 000000000..9727dec16 --- /dev/null +++ b/hw/vendor/patches/openhwgroup_cv32e20/cv32e20_core.patch @@ -0,0 +1,12 @@ +diff --git a/cve2_top.core b/cve2_top.core +index cb4e23b4..0d9d2c5c 100644 +--- a/cve2_top.core ++++ b/cve2_top.core +@@ -75,6 +75,7 @@ targets: + - tool_veriblelint ? (files_lint_verible) + - files_rtl + - target_sim ? (files_clk_gate) ++ - target_sim_sc ? (files_clk_gate) + toplevel: cve2_top + parameters: + - tool_vivado ? (FPGA_XILINX=true) diff --git a/hw/vendor/pulp_platform_fpnew.lock.hjson b/hw/vendor/pulp_platform_fpnew.lock.hjson index bc1ec5555..de40549d5 100644 --- a/hw/vendor/pulp_platform_fpnew.lock.hjson +++ b/hw/vendor/pulp_platform_fpnew.lock.hjson @@ -9,6 +9,6 @@ upstream: { url: https://github.com/pulp-platform/fpnew.git - rev: d6e581628f3517a1fb1257507d3214e599f7859d + rev: 79e453139072df42c9ec8f697132ba485d74e23d } } diff --git a/hw/vendor/pulp_platform_fpnew.vendor.hjson b/hw/vendor/pulp_platform_fpnew.vendor.hjson index 7ae0d55cc..1fe09cca5 100644 --- a/hw/vendor/pulp_platform_fpnew.vendor.hjson +++ b/hw/vendor/pulp_platform_fpnew.vendor.hjson @@ -7,7 +7,7 @@ upstream: { url: "https://github.com/pulp-platform/fpnew.git", - rev: "d6e581628f3517a1fb1257507d3214e599f7859d", + rev: "79e453139072df42c9ec8f697132ba485d74e23d", }, exclude_from_upstream: [ diff --git a/hw/vendor/pulp_platform_gpio.core b/hw/vendor/pulp_platform_gpio.core index 630f318a3..2d79c6110 100644 --- a/hw/vendor/pulp_platform_gpio.core +++ b/hw/vendor/pulp_platform_gpio.core @@ -39,8 +39,10 @@ targets: - rtl - "gpio-test? (testbench)" - target_sim? (clock-gate) + - target_sim_sc? (clock-gate) - target_asic_synthesis? (clock-gate) - target_asic_yosys_synthesis? (clock-gate) - target_nexys-a7-100t? (no-clock-gate) - target_pynq-z2? (no-clock-gate) - target_pynq-z2-arm-emulation? (no-clock-gate) + - target_zcu104? (no-clock-gate) diff --git a/hw/vendor/pulp_platform_tech_cells_generic.core b/hw/vendor/pulp_platform_tech_cells_generic.core index 6a70b8e22..02d8b12c0 100644 --- a/hw/vendor/pulp_platform_tech_cells_generic.core +++ b/hw/vendor/pulp_platform_tech_cells_generic.core @@ -20,3 +20,4 @@ targets: default: filesets: - target_sim? (rtl_sim) + - target_sim_sc? (rtl_sim) diff --git a/hw/vendor/waiver/lint/cv32e40px.vlt b/hw/vendor/waiver/lint/cv32e40px.vlt index a6665860b..f2eebf7be 100644 --- a/hw/vendor/waiver/lint/cv32e40px.vlt +++ b/hw/vendor/waiver/lint/cv32e40px.vlt @@ -114,3 +114,6 @@ lint_off -rule WIDTH -file "*/rtl/cv32e40px_decoder.sv" -match "Logical operator lint_off -rule WIDTH -file "*/rtl/cv32e40px_controller.sv" -match "Logical operator LOGAND expects 1 bit on the LHS, but LHS's VARREF 'FPU' generates 32 bits.*" lint_off -rule WIDTH -file "*/rtl/cv32e40px_cs_registers.sv" -match "Logical operator LOGOR expects 1 bit on the LHS, but LHS's VARREF 'FPU' generates 32 bits.*" lint_off -rule LATCH -file "*/rtl/cv32e40px_id_stage.sv" -match "Latch inferred for signal*apu_read_regs*" +lint_off -rule WIDTH -file "*/rtl/cv32e40px_register_file_ff.sv" -match "Logical operator COND expects 1 bit on the Conditional Test*" +lint_off -rule WIDTH -file "*/rtl/cv32e40px_decoder.sv" -match "Logical operator LOGNOT expects 1 bit on the LHS*" +lint_off -rule WIDTH -file "*/rtl/cv32e40px_ex_stage.sv" -match "Logical operator LOGNOT expects 1 bit on the LHS*" diff --git a/logo/x-heep.png b/logo/x-heep.png deleted file mode 100644 index 76814a075..000000000 Binary files a/logo/x-heep.png and /dev/null differ diff --git a/mcu_cfg.hjson b/mcu_cfg.hjson index f1d435dff..2e6f33f19 100644 --- a/mcu_cfg.hjson +++ b/mcu_cfg.hjson @@ -7,26 +7,7 @@ cpu_type: cv32e20 - bus_type: onetoM - - ram: { - address: 0x00000000, #only tried with 0, cannot be changed for now - numbanks: 2, #each bank is 32kB, cannot be changed for now - numbanks_interleaved: 0, - }, - linker_script: { - #value used for the on-chip linker script, the on-flash linker script is generated using FLASH values and the whole RAM values - onchip_ls: { - code: { - address: 0x00000000, - lenght: 0x00000C800, #minimum size for freeRTOS and clang - } - data: { - address: 0x00000C800, - lenght: whatisleft, #keyword used to calculate the size as: ram.length - code.lenght - } - }, stack_size: 0x800, heap_size: 0x800, } @@ -56,10 +37,10 @@ offset: 0x00028000, length: 0x00008000, }, - spi_host: { + dma: { offset: 0x00030000, length: 0x00010000, - path: "./hw/vendor/lowrisc_opentitan_spi_host/data/spi_host.hjson" + path: "./hw/ip/dma/data/dma.hjson" }, power_manager: { offset: 0x00040000, @@ -70,30 +51,25 @@ offset: 0x00050000, length: 0x00010000, }, - dma: { - offset: 0x00060000, - length: 0x00010000, - path: "./hw/ip/dma/data/dma.hjson" - }, fast_intr_ctrl: { - offset: 0x00070000, + offset: 0x00060000, length: 0x00010000, path: "./hw/ip/fast_intr_ctrl/data/fast_intr_ctrl.hjson" }, ext_peripheral: { - offset: 0x00080000, + offset: 0x00070000, length: 0x00010000, }, pad_control: { - offset: 0x00090000, + offset: 0x00080000, length: 0x00010000, }, gpio_ao: { - offset: 0x000A0000, + offset: 0x00090000, length: 0x00010000, }, uart: { - offset: 0x000B0000, + offset: 0x000A0000, length: 0x00010000, path: "./hw/vendor/lowrisc_opentitan/hw/ip/uart/data/uart.hjson" }, @@ -108,6 +84,12 @@ is_included: "yes", path: "./hw/vendor/lowrisc_opentitan/hw/ip/rv_plic/data/rv_plic.hjson" }, + spi_host: { + offset: 0x00010000, + length: 0x00010000, + is_included: "yes", + path: "./hw/vendor/lowrisc_opentitan_spi_host/data/spi_host.hjson" + }, gpio: { offset: 0x00020000, length: 0x00010000, @@ -143,6 +125,7 @@ is_included: "yes", path: "./hw/ip/i2s/data/i2s.hjson" }, + }, flash_mem: { diff --git a/mcu_cfg_minimal.hjson b/mcu_cfg_minimal.hjson index ed74849a6..271bcf7ac 100644 --- a/mcu_cfg_minimal.hjson +++ b/mcu_cfg_minimal.hjson @@ -7,26 +7,7 @@ cpu_type: cv32e20 - bus_type: onetoM - - ram: { - address: 0x00000000, #only tried with 0, cannot be changed for now - numbanks: 2, #each bank is 32kB, cannot be changed for now - numbanks_interleaved: 0, - }, - linker_script: { - #value used for the on-chip linker script, the on-flash linker script is generated using FLASH values and the whole RAM values - onchip_ls: { - code: { - address: 0x00000000, - lenght: 0x00000C800, #minimum size for freeRTOS and clang - } - data: { - address: 0x00000C800, - lenght: whatisleft, #keyword used to calculate the size as: ram.length - code.lenght - } - }, stack_size: 0x800, heap_size: 0x800, } @@ -56,10 +37,10 @@ offset: 0x00028000, length: 0x00008000, }, - spi_host: { + dma: { offset: 0x00030000, length: 0x00010000, - path: "./hw/vendor/lowrisc_opentitan_spi_host/data/spi_host.hjson" + path: "./hw/ip/dma/data/dma.hjson" }, power_manager: { offset: 0x00040000, @@ -70,30 +51,25 @@ offset: 0x00050000, length: 0x00010000, }, - dma: { - offset: 0x00060000, - length: 0x00010000, - path: "./hw/ip/dma/data/dma.hjson" - }, fast_intr_ctrl: { - offset: 0x00070000, + offset: 0x00060000, length: 0x00010000, path: "./hw/ip/fast_intr_ctrl/data/fast_intr_ctrl.hjson" }, ext_peripheral: { - offset: 0x00080000, + offset: 0x00070000, length: 0x00010000, }, pad_control: { - offset: 0x00090000, + offset: 0x00080000, length: 0x00010000, }, gpio_ao: { - offset: 0x000A0000, + offset: 0x00090000, length: 0x00010000, }, uart: { - offset: 0x000B0000, + offset: 0x000A0000, length: 0x00010000, path: "./hw/vendor/lowrisc_opentitan/hw/ip/uart/data/uart.hjson" }, @@ -108,6 +84,12 @@ is_included: "no", path: "./hw/vendor/lowrisc_opentitan/hw/ip/rv_plic/data/rv_plic.hjson" }, + spi_host: { + offset: 0x00010000, + length: 0x00010000, + is_included: "no", + path: "./hw/vendor/lowrisc_opentitan_spi_host/data/spi_host.hjson" + }, gpio: { offset: 0x00020000, length: 0x00010000, diff --git a/pad_cfg.hjson b/pad_cfg.hjson index cc7303a1a..a469f1c81 100644 --- a/pad_cfg.hjson +++ b/pad_cfg.hjson @@ -2,8 +2,21 @@ // Solderpad Hardware License, Version 0.51, see LICENSE for details. // SPDX-License-Identifier: SHL-0.51 // Derived from Occamy: https://github.com/pulp-platform/snitch/blob/master/hw/system/occamy/src/occamy_cfg.hjson -// Peripherals configuration for core-v-mini-mcu. - +// +// Pads configuration for core-v-mini-mcu. Read by mcu_gen.py. +// +// The pads contains the list of all the pads available in the design. +// Each pad is defined by its name and can have the following attributes: +// num: (mandatory) - the number of pads of this type +// type: (mandatory) - the type of the pad +// num_offset: (optional) - the offset to the first pad of this type (default 0) +// mapping: (optional) - the mapping of the pad in the design. Useful for ASICs (default top) +// active: (optional) - the active level of the pad (default high) +// driven_manually: (optional) - the pad is driven manually (default False) +// mux: (optional) - the muxing options for the pad +// skip_declaration: (optional) - skip the declaration of the pad in the top level (default False) +// keep_internal: (optional) - keep the pad internal to the design (default False) +// // Add this field at the same level of pads (not inside) if you want to define PADs attributes // attributes: { // bits: 7:0 diff --git a/sw/CMakeLists.txt b/sw/CMakeLists.txt index a90f9fb8b..7b6b70c42 100644 --- a/sw/CMakeLists.txt +++ b/sw/CMakeLists.txt @@ -63,19 +63,19 @@ FILE(GLOB_RECURSE new_list FOLLOW_SYMLINKS ${SOURCE_PATH}*.h) SET(dir_list_str "") FOREACH(file_path ${new_list}) SET(add 0) # This variable is set to 1 if the file_pth needs to be added to the list - if(${file_path} MATCHES "/device/") - if(${file_path} MATCHES "/target/") # Add it if its not in target, or if its in target/${TARGET} - if(${file_path} MATCHES ${TARGET}) + if(${file_path} MATCHES "${SOURCE_PATH}device/") + if(${file_path} MATCHES "${SOURCE_PATH}device/target/") # Add it if its not in target, or if its in target/${TARGET} + if(${file_path} MATCHES "${SOURCE_PATH}device/target/${TARGET}") SET(add 1) endif() else() SET(add 1) endif() - elseif(${file_path} MATCHES ${PROJECT}) + elseif(${file_path} MATCHES ${SOURCE_PATH}applications/${PROJECT}/) SET(add 1) - elseif( ( ${file_path} MATCHES "/freertos/" ) AND ( ${PROJECT} MATCHES "freertos" ) ) + elseif( ( ${file_path} MATCHES "${SOURCE_PATH}freertos/" ) AND ( ${PROJECT} MATCHES "freertos" ) ) SET(add 1) - elseif( ${file_path} MATCHES "/external/" ) + elseif( ${file_path} MATCHES "${SOURCE_PATH}external/" ) SET(add 1) endif() @@ -118,23 +118,31 @@ SET(INCLUDE_FOLDERS "-I ${RISCV}/${COMPILER_PREFIX}elf/include \ # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # # Preliminary list of source files inside the source path -# Make a list of the source files that need to be linked -FILE(GLOB_RECURSE new_list FOLLOW_SYMLINKS ${SOURCE_PATH}*.c) +# Make a list of the .c source files that need to be linked +FILE(GLOB_RECURSE c_files FOLLOW_SYMLINKS ${SOURCE_PATH}*.c) +# Make a list of the .s source files that need to be linked +FILE(GLOB_RECURSE s_files FOLLOW_SYMLINKS ${SOURCE_PATH}*.s) +# Make a list of the .S source files that need to be linked +FILE(GLOB_RECURSE S_files FOLLOW_SYMLINKS ${SOURCE_PATH}*.S) +SET(new_list ${c_files} ${s_files} ${S_files}) + SET( c_dir_list "" ) SET( app_found 0 ) FOREACH(file_path IN LISTS new_list) SET(add 0) # This variable is set to 1 if the file_pth needs to be added to the list - if(${file_path} MATCHES "/device/") - SET(add 1) - elseif( ${file_path} MATCHES "/external/" ) - SET(add 1) - elseif( ( ${file_path} MATCHES "/${PROJECT}/" ) AND ( NOT ${file_path} MATCHES ${MAINFILE} ) ) + if(${file_path} MATCHES "${SOURCE_PATH}device/") + if(NOT ${file_path} MATCHES "\\.S$") + SET(add 1) + endif() + elseif( (${file_path} MATCHES "${SOURCE_PATH}external/") AND ( NOT ${file_path} MATCHES "exclude" ) ) SET(add 1) - elseif( ( ${file_path} MATCHES "/${PROJECT}/" ) AND ( ${file_path} MATCHES ${MAINFILE} ) ) + elseif( ${file_path} MATCHES "${SOURCE_PATH}applications/${PROJECT}/.*${MAINFILE}\." ) # look for main.* SET(app_found 1) + elseif( ${file_path} MATCHES "${SOURCE_PATH}applications/${PROJECT}/" ) # other sources + SET(add 1) endif() - if( add EQUAL 1 ) # If the file path mathced one of the criterion, add it to the list + if( add EQUAL 1 ) # If the file path matched one of the criteria, add it to the list SET(c_dir_list ${c_dir_list} "${file_path}\ ") string(REPLACE ";" "" c_dir_list ${c_dir_list}) @@ -148,18 +156,26 @@ ENDFOREACH() if( app_found EQUAL 0 ) SET(SOURCE_PATH ${ROOT_PROJECT}) - # Make a list of the source files that need to be linked - FILE(GLOB_RECURSE new_list FOLLOW_SYMLINKS ${SOURCE_PATH}*.c) + # Make a list of the .c source files that need to be linked + FILE(GLOB_RECURSE c_files FOLLOW_SYMLINKS ${SOURCE_PATH}*.c) + # Make a list of the .s source files that need to be linked + FILE(GLOB_RECURSE s_files FOLLOW_SYMLINKS ${SOURCE_PATH}*.s) + # Make a list of the .S source files that need to be linked + FILE(GLOB_RECURSE S_files FOLLOW_SYMLINKS ${SOURCE_PATH}*.S) + SET(new_list ${c_files} ${s_files} ${S_files}) + SET(c_dir_list "") FOREACH(file_path IN LISTS new_list) SET(add 0) # This variable is set to 1 if the file_pth needs to be added to the list - if(${file_path} MATCHES "/device/") - SET(add 1) - elseif( ( ${file_path} MATCHES "/${PROJECT}/" ) AND ( NOT ${file_path} MATCHES ${MAINFILE} ) ) + if(${file_path} MATCHES "${ROOT_PROJECT}device/") + if(NOT ${file_path} MATCHES "\\.S$") + SET(add 1) + endif() + elseif( ( ${file_path} MATCHES "${ROOT_PROJECT}/applications/${PROJECT}/" ) AND ( NOT ${file_path} MATCHES "${ROOT_PROJECT}applications/${PROJECT}/.*${MAINFILE}\." ) AND ( NOT ${file_path} MATCHES "exclude" ) ) SET(add 1) endif() - if( add EQUAL 1 ) # If the file path mathced one of the criterion, add it to the list + if( add EQUAL 1 ) # If the file path matched one of the criteria, add it to the list SET(c_dir_list ${c_dir_list} "${file_path}\ ") string(REPLACE ";" "" c_dir_list ${c_dir_list}) @@ -303,20 +319,31 @@ endif() # Set CMAKE flags # specify the C standard -set(COMPILER_LINKER_FLAGS "\ - -march=${CMAKE_SYSTEM_PROCESSOR} \ - -w -Os -g -nostdlib \ - -ffunction-sections \ - -DHOST_BUILD \ - -D${CRT_TYPE} \ - -D${CRTO} \ - -DportasmHANDLE_INTERRUPT=vSystemIrqHandler\ -") +if(NOT ${PROJECT} MATCHES "coremark") + set(COMPILER_LINKER_FLAGS "\ + -march=${CMAKE_SYSTEM_PROCESSOR} \ + -w -O2 -g -nostdlib \ + -ffunction-sections \ + -DHOST_BUILD \ + -D${CRT_TYPE} \ + -D${CRTO} \ + -DportasmHANDLE_INTERRUPT=vSystemIrqHandler\ + ") +else() + set(COMPILER_LINKER_FLAGS "\ + -march=${CMAKE_SYSTEM_PROCESSOR} \ + -w -O3 -g -nostdlib -falign-functions=16 -funroll-all-loops -falign-jumps=4 -finline-functions -Wall -static -pedantic -DPERFORMANCE_RUN=1 -DITERATIONS=1 -DHAS_STDIO=1 -DHAS_PRINTF=1 \ + -ffunction-sections \ + -DHOST_BUILD \ + -D${CRT_TYPE} \ + -D${CRTO} \ + -DportasmHANDLE_INTERRUPT=vSystemIrqHandler\ + ") +endif() set(CMAKE_C_FLAGS ${COMPILER_LINKER_FLAGS}) if (${COMPILER} MATCHES "clang") set( CMAKE_C_FLAGS "${CMAKE_C_FLAGS} --target=riscv32 \ - -mabi=ilp32 \ --gcc-toolchain=${RISCV} \ --sysroot=${RISCV}/${COMPILER_PREFIX}elf \ -static \ @@ -435,4 +462,4 @@ endforeach() SET(DCMAKE_EXPORT_COMPILE_COMMANDS ON) -#message( FATAL_ERROR "You can not do this at all, CMake will exit." ) \ No newline at end of file +#message( FATAL_ERROR "You can not do this at all, CMake will exit." ) diff --git a/sw/applications/coremark/core_list_join.c b/sw/applications/coremark/core_list_join.c new file mode 100644 index 000000000..64527aa5e --- /dev/null +++ b/sw/applications/coremark/core_list_join.c @@ -0,0 +1,612 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include "coremark.h" +/* +Topic: Description + Benchmark using a linked list. + + Linked list is a common data structure used in many applications. + + For our purposes, this will excercise the memory units of the processor. + In particular, usage of the list pointers to find and alter data. + + We are not using Malloc since some platforms do not support this +library. + + Instead, the memory block being passed in is used to create a list, + and the benchmark takes care not to add more items then can be + accomodated by the memory block. The porting layer will make sure + that we have a valid memory block. + + All operations are done in place, without using any extra memory. + + The list itself contains list pointers and pointers to data items. + Data items contain the following: + + idx - An index that captures the initial order of the list. + data - Variable data initialized based on the input parameters. The 16b +are divided as follows: o Upper 8b are backup of original data. o Bit 7 +indicates if the lower 7 bits are to be used as is or calculated. o Bits 0-2 +indicate type of operation to perform to get a 7b value. o Bits 3-6 provide +input for the operation. + +*/ + +/* local functions */ + +list_head *core_list_find(list_head *list, list_data *info); +list_head *core_list_reverse(list_head *list); +list_head *core_list_remove(list_head *item); +list_head *core_list_undo_remove(list_head *item_removed, + list_head *item_modified); +list_head *core_list_insert_new(list_head * insert_point, + list_data * info, + list_head **memblock, + list_data **datablock, + list_head * memblock_end, + list_data * datablock_end); +typedef ee_s32 (*list_cmp)(list_data *a, list_data *b, core_results *res); +list_head *core_list_mergesort(list_head * list, + list_cmp cmp, + core_results *res); + +ee_s16 +calc_func(ee_s16 *pdata, core_results *res) +{ + ee_s16 data = *pdata; + ee_s16 retval; + ee_u8 optype + = (data >> 7) + & 1; /* bit 7 indicates if the function result has been cached */ + if (optype) /* if cached, use cache */ + return (data & 0x007f); + else + { /* otherwise calculate and cache the result */ + ee_s16 flag = data & 0x7; /* bits 0-2 is type of function to perform */ + ee_s16 dtype + = ((data >> 3) + & 0xf); /* bits 3-6 is specific data for the operation */ + dtype |= dtype << 4; /* replicate the lower 4 bits to get an 8b value */ + switch (flag) + { + case 0: + if (dtype < 0x22) /* set min period for bit corruption */ + dtype = 0x22; + retval = core_bench_state(res->size, + res->memblock[3], + res->seed1, + res->seed2, + dtype, + res->crc); + if (res->crcstate == 0) + res->crcstate = retval; + break; + case 1: + retval = core_bench_matrix(&(res->mat), dtype, res->crc); + if (res->crcmatrix == 0) + res->crcmatrix = retval; + break; + default: + retval = data; + break; + } + res->crc = crcu16(retval, res->crc); + retval &= 0x007f; + *pdata = (data & 0xff00) | 0x0080 | retval; /* cache the result */ + return retval; + } +} +/* Function: cmp_complex + Compare the data item in a list cell. + + Can be used by mergesort. +*/ +ee_s32 +cmp_complex(list_data *a, list_data *b, core_results *res) +{ + ee_s16 val1 = calc_func(&(a->data16), res); + ee_s16 val2 = calc_func(&(b->data16), res); + return val1 - val2; +} + +/* Function: cmp_idx + Compare the idx item in a list cell, and regen the data. + + Can be used by mergesort. +*/ +ee_s32 +cmp_idx(list_data *a, list_data *b, core_results *res) +{ + if (res == NULL) + { + a->data16 = (a->data16 & 0xff00) | (0x00ff & (a->data16 >> 8)); + b->data16 = (b->data16 & 0xff00) | (0x00ff & (b->data16 >> 8)); + } + return a->idx - b->idx; +} + +void +copy_info(list_data *to, list_data *from) +{ + to->data16 = from->data16; + to->idx = from->idx; +} + +/* Benchmark for linked list: + - Try to find multiple data items. + - List sort + - Operate on data from list (crc) + - Single remove/reinsert + * At the end of this function, the list is back to original state +*/ +ee_u16 +core_bench_list(core_results *res, ee_s16 finder_idx) +{ + ee_u16 retval = 0; + ee_u16 found = 0, missed = 0; + list_head *list = res->list; + ee_s16 find_num = res->seed3; + list_head *this_find; + list_head *finder, *remover; + list_data info; + ee_s16 i; + + info.idx = finder_idx; + /* find values in the list, and change the list each time + * (reverse and cache if value found) */ + for (i = 0; i < find_num; i++) + { + info.data16 = (i & 0xff); + this_find = core_list_find(list, &info); + list = core_list_reverse(list); + if (this_find == NULL) + { + missed++; + retval += (list->next->info->data16 >> 8) & 1; + } + else + { + found++; + if (this_find->info->data16 & 0x1) /* use found value */ + retval += (this_find->info->data16 >> 9) & 1; + /* and cache next item at the head of the list (if any) */ + if (this_find->next != NULL) + { + finder = this_find->next; + this_find->next = finder->next; + finder->next = list->next; + list->next = finder; + } + } + if (info.idx >= 0) + info.idx++; +#if CORE_DEBUG + ee_printf("List find %d: [%d,%d,%d]\n", i, retval, missed, found); +#endif + } + retval += found * 4 - missed; + /* sort the list by data content and remove one item*/ + if (finder_idx > 0) + list = core_list_mergesort(list, cmp_complex, res); + remover = core_list_remove(list->next); + /* CRC data content of list from location of index N forward, and then undo + * remove */ + finder = core_list_find(list, &info); + if (!finder) + finder = list->next; + while (finder) + { + retval = crc16(list->info->data16, retval); + finder = finder->next; + } +#if CORE_DEBUG + ee_printf("List sort 1: %04x\n", retval); +#endif + remover = core_list_undo_remove(remover, list->next); + /* sort the list by index, in effect returning the list to original state */ + list = core_list_mergesort(list, cmp_idx, NULL); + /* CRC data content of list */ + finder = list->next; + while (finder) + { + retval = crc16(list->info->data16, retval); + finder = finder->next; + } +#if CORE_DEBUG + ee_printf("List sort 2: %04x\n", retval); +#endif + return retval; +} +/* Function: core_list_init + Initialize list with data. + + Parameters: + blksize - Size of memory to be initialized. + memblock - Pointer to memory block. + seed - Actual values chosen depend on the seed parameter. + The seed parameter MUST be supplied from a source that cannot be + determined at compile time + + Returns: + Pointer to the head of the list. + +*/ +list_head * +core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed) +{ + /* calculated pointers for the list */ + ee_u32 per_item = 16 + sizeof(struct list_data_s); + ee_u32 size = (blksize / per_item) + - 2; /* to accomodate systems with 64b pointers, and make sure + same code is executed, set max list elements */ + list_head *memblock_end = memblock + size; + list_data *datablock = (list_data *)(memblock_end); + list_data *datablock_end = datablock + size; + /* some useful variables */ + ee_u32 i; + list_head *finder, *list = memblock; + list_data info; + + /* create a fake items for the list head and tail */ + list->next = NULL; + list->info = datablock; + list->info->idx = 0x0000; + list->info->data16 = (ee_s16)0x8080; + memblock++; + datablock++; + info.idx = 0x7fff; + info.data16 = (ee_s16)0xffff; + core_list_insert_new( + list, &info, &memblock, &datablock, memblock_end, datablock_end); + + /* then insert size items */ + for (i = 0; i < size; i++) + { + ee_u16 datpat = ((ee_u16)(seed ^ i) & 0xf); + ee_u16 dat + = (datpat << 3) | (i & 0x7); /* alternate between algorithms */ + info.data16 = (dat << 8) | dat; /* fill the data with actual data and + upper bits with rebuild value */ + core_list_insert_new( + list, &info, &memblock, &datablock, memblock_end, datablock_end); + } + /* and now index the list so we know initial seed order of the list */ + finder = list->next; + i = 1; + while (finder->next != NULL) + { + if (i < size / 5) /* first 20% of the list in order */ + finder->info->idx = i++; + else + { + ee_u16 pat = (ee_u16)(i++ ^ seed); /* get a pseudo random number */ + finder->info->idx = 0x3fff + & (((i & 0x07) << 8) + | pat); /* make sure the mixed items end up + after the ones in sequence */ + } + finder = finder->next; + } + list = core_list_mergesort(list, cmp_idx, NULL); +#if CORE_DEBUG + ee_printf("Initialized list:\n"); + finder = list; + while (finder) + { + ee_printf( + "[%04x,%04x]", finder->info->idx, (ee_u16)finder->info->data16); + finder = finder->next; + } + ee_printf("\n"); +#endif + return list; +} + +/* Function: core_list_insert + Insert an item to the list + + Parameters: + insert_point - where to insert the item. + info - data for the cell. + memblock - pointer for the list header + datablock - pointer for the list data + memblock_end - end of region for list headers + datablock_end - end of region for list data + + Returns: + Pointer to new item. +*/ +list_head * +core_list_insert_new(list_head * insert_point, + list_data * info, + list_head **memblock, + list_data **datablock, + list_head * memblock_end, + list_data * datablock_end) +{ + list_head *newitem; + + if ((*memblock + 1) >= memblock_end) + return NULL; + if ((*datablock + 1) >= datablock_end) + return NULL; + + newitem = *memblock; + (*memblock)++; + newitem->next = insert_point->next; + insert_point->next = newitem; + + newitem->info = *datablock; + (*datablock)++; + copy_info(newitem->info, info); + + return newitem; +} + +/* Function: core_list_remove + Remove an item from the list. + + Operation: + For a singly linked list, remove by copying the data from the next item + over to the current cell, and unlinking the next item. + + Note: + since there is always a fake item at the end of the list, no need to + check for NULL. + + Returns: + Removed item. +*/ +list_head * +core_list_remove(list_head *item) +{ + list_data *tmp; + list_head *ret = item->next; + /* swap data pointers */ + tmp = item->info; + item->info = ret->info; + ret->info = tmp; + /* and eliminate item */ + item->next = item->next->next; + ret->next = NULL; + return ret; +} + +/* Function: core_list_undo_remove + Undo a remove operation. + + Operation: + Since we want each iteration of the benchmark to be exactly the same, + we need to be able to undo a remove. + Link the removed item back into the list, and switch the info items. + + Parameters: + item_removed - Return value from the + item_modified - List item that was modified during + + Returns: + The item that was linked back to the list. + +*/ +list_head * +core_list_undo_remove(list_head *item_removed, list_head *item_modified) +{ + list_data *tmp; + /* swap data pointers */ + tmp = item_removed->info; + item_removed->info = item_modified->info; + item_modified->info = tmp; + /* and insert item */ + item_removed->next = item_modified->next; + item_modified->next = item_removed; + return item_removed; +} + +/* Function: core_list_find + Find an item in the list + + Operation: + Find an item by idx (if not 0) or specific data value + + Parameters: + list - list head + info - idx or data to find + + Returns: + Found item, or NULL if not found. +*/ +list_head * +core_list_find(list_head *list, list_data *info) +{ + if (info->idx >= 0) + { + while (list && (list->info->idx != info->idx)) + list = list->next; + return list; + } + else + { + while (list && ((list->info->data16 & 0xff) != info->data16)) + list = list->next; + return list; + } +} +/* Function: core_list_reverse + Reverse a list + + Operation: + Rearrange the pointers so the list is reversed. + + Parameters: + list - list head + info - idx or data to find + + Returns: + Found item, or NULL if not found. +*/ + +list_head * +core_list_reverse(list_head *list) +{ + list_head *next = NULL, *tmp; + while (list) + { + tmp = list->next; + list->next = next; + next = list; + list = tmp; + } + return next; +} +/* Function: core_list_mergesort + Sort the list in place without recursion. + + Description: + Use mergesort, as for linked list this is a realistic solution. + Also, since this is aimed at embedded, care was taken to use iterative + rather then recursive algorithm. The sort can either return the list to + original order (by idx) , or use the data item to invoke other other + algorithms and change the order of the list. + + Parameters: + list - list to be sorted. + cmp - cmp function to use + + Returns: + New head of the list. + + Note: + We have a special header for the list that will always be first, + but the algorithm could theoretically modify where the list starts. + + */ +list_head * +core_list_mergesort(list_head *list, list_cmp cmp, core_results *res) +{ + list_head *p, *q, *e, *tail; + ee_s32 insize, nmerges, psize, qsize, i; + + insize = 1; + + while (1) + { + p = list; + list = NULL; + tail = NULL; + + nmerges = 0; /* count number of merges we do in this pass */ + + while (p) + { + nmerges++; /* there exists a merge to be done */ + /* step `insize' places along from p */ + q = p; + psize = 0; + for (i = 0; i < insize; i++) + { + psize++; + q = q->next; + if (!q) + break; + } + + /* if q hasn't fallen off end, we have two lists to merge */ + qsize = insize; + + /* now we have two lists; merge them */ + while (psize > 0 || (qsize > 0 && q)) + { + + /* decide whether next element of merge comes from p or q */ + if (psize == 0) + { + /* p is empty; e must come from q. */ + e = q; + q = q->next; + qsize--; + } + else if (qsize == 0 || !q) + { + /* q is empty; e must come from p. */ + e = p; + p = p->next; + psize--; + } + else if (cmp(p->info, q->info, res) <= 0) + { + /* First element of p is lower (or same); e must come from + * p. */ + e = p; + p = p->next; + psize--; + } + else + { + /* First element of q is lower; e must come from q. */ + e = q; + q = q->next; + qsize--; + } + + /* add the next element to the merged list */ + if (tail) + { + tail->next = e; + } + else + { + list = e; + } + tail = e; + } + + /* now p has stepped `insize' places along, and q has too */ + p = q; + } + + tail->next = NULL; + + /* If we have done only one merge, we're finished. */ + if (nmerges <= 1) /* allow for nmerges==0, the empty list case */ + return list; + + /* Otherwise repeat, merging lists twice the size */ + insize *= 2; + } +#if COMPILER_REQUIRES_SORT_RETURN + return list; +#endif +} diff --git a/sw/applications/coremark/core_matrix.c b/sw/applications/coremark/core_matrix.c new file mode 100644 index 000000000..9c7901c3c --- /dev/null +++ b/sw/applications/coremark/core_matrix.c @@ -0,0 +1,376 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include "coremark.h" +/* +Topic: Description + Matrix manipulation benchmark + + This very simple algorithm forms the basis of many more complex +algorithms. + + The tight inner loop is the focus of many optimizations (compiler as +well as hardware based) and is thus relevant for embedded processing. + + The total available data space will be divided to 3 parts: + NxN Matrix A - initialized with small values (upper 3/4 of the bits all +zero). NxN Matrix B - initialized with medium values (upper half of the bits all +zero). NxN Matrix C - used for the result. + + The actual values for A and B must be derived based on input that is not +available at compile time. +*/ +ee_s16 matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val); +ee_s16 matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval); +void matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val); +void matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); +void matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); +void matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B); +void matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val); + +#define matrix_test_next(x) (x + 1) +#define matrix_clip(x, y) ((y) ? (x)&0x0ff : (x)&0x0ffff) +#define matrix_big(x) (0xf000 | (x)) +#define bit_extract(x, from, to) (((x) >> (from)) & (~(0xffffffff << (to)))) + +#if CORE_DEBUG +void +printmat(MATDAT *A, ee_u32 N, char *name) +{ + ee_u32 i, j; + ee_printf("Matrix %s [%dx%d]:\n", name, N, N); + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + if (j != 0) + ee_printf(","); + ee_printf("%d", A[i * N + j]); + } + ee_printf("\n"); + } +} +void +printmatC(MATRES *C, ee_u32 N, char *name) +{ + ee_u32 i, j; + ee_printf("Matrix %s [%dx%d]:\n", name, N, N); + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + if (j != 0) + ee_printf(","); + ee_printf("%d", C[i * N + j]); + } + ee_printf("\n"); + } +} +#endif +/* Function: core_bench_matrix + Benchmark function + + Iterate N times, + changing the matrix values slightly by a constant amount each time. +*/ +ee_u16 +core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc) +{ + ee_u32 N = p->N; + MATRES *C = p->C; + MATDAT *A = p->A; + MATDAT *B = p->B; + MATDAT val = (MATDAT)seed; + + crc = crc16(matrix_test(N, C, A, B, val), crc); + + return crc; +} + +/* Function: matrix_test + Perform matrix manipulation. + + Parameters: + N - Dimensions of the matrix. + C - memory for result matrix. + A - input matrix + B - operator matrix (not changed during operations) + + Returns: + A CRC value that captures all results calculated in the function. + In particular, crc of the value calculated on the result matrix + after each step by . + + Operation: + + 1 - Add a constant value to all elements of a matrix. + 2 - Multiply a matrix by a constant. + 3 - Multiply a matrix by a vector. + 4 - Multiply a matrix by a matrix. + 5 - Add a constant value to all elements of a matrix. + + After the last step, matrix A is back to original contents. +*/ +ee_s16 +matrix_test(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B, MATDAT val) +{ + ee_u16 crc = 0; + MATDAT clipval = matrix_big(val); + + matrix_add_const(N, A, val); /* make sure data changes */ +#if CORE_DEBUG + printmat(A, N, "matrix_add_const"); +#endif + matrix_mul_const(N, C, A, val); + crc = crc16(matrix_sum(N, C, clipval), crc); +#if CORE_DEBUG + printmatC(C, N, "matrix_mul_const"); +#endif + matrix_mul_vect(N, C, A, B); + crc = crc16(matrix_sum(N, C, clipval), crc); +#if CORE_DEBUG + printmatC(C, N, "matrix_mul_vect"); +#endif + matrix_mul_matrix(N, C, A, B); + crc = crc16(matrix_sum(N, C, clipval), crc); +#if CORE_DEBUG + printmatC(C, N, "matrix_mul_matrix"); +#endif + matrix_mul_matrix_bitextract(N, C, A, B); + crc = crc16(matrix_sum(N, C, clipval), crc); +#if CORE_DEBUG + printmatC(C, N, "matrix_mul_matrix_bitextract"); +#endif + + matrix_add_const(N, A, -val); /* return matrix to initial value */ + return crc; +} + +/* Function : matrix_init + Initialize the memory block for matrix benchmarking. + + Parameters: + blksize - Size of memory to be initialized. + memblk - Pointer to memory block. + seed - Actual values chosen depend on the seed parameter. + p - pointers to containing initialized matrixes. + + Returns: + Matrix dimensions. + + Note: + The seed parameter MUST be supplied from a source that cannot be + determined at compile time +*/ +ee_u32 +core_init_matrix(ee_u32 blksize, void *memblk, ee_s32 seed, mat_params *p) +{ + ee_u32 N = 0; + MATDAT *A; + MATDAT *B; + ee_s32 order = 1; + MATDAT val; + ee_u32 i = 0, j = 0; + if (seed == 0) + seed = 1; + while (j < blksize) + { + i++; + j = i * i * 2 * 4; + } + N = i - 1; + A = (MATDAT *)align_mem(memblk); + B = A + N * N; + + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + seed = ((order * seed) % 65536); + val = (seed + order); + val = matrix_clip(val, 0); + B[i * N + j] = val; + val = (val + order); + val = matrix_clip(val, 1); + A[i * N + j] = val; + order++; + } + } + + p->A = A; + p->B = B; + p->C = (MATRES *)align_mem(B + N * N); + p->N = N; +#if CORE_DEBUG + printmat(A, N, "A"); + printmat(B, N, "B"); +#endif + return N; +} + +/* Function: matrix_sum + Calculate a function that depends on the values of elements in the + matrix. + + For each element, accumulate into a temporary variable. + + As long as this value is under the parameter clipval, + add 1 to the result if the element is bigger then the previous. + + Otherwise, reset the accumulator and add 10 to the result. +*/ +ee_s16 +matrix_sum(ee_u32 N, MATRES *C, MATDAT clipval) +{ + MATRES tmp = 0, prev = 0, cur = 0; + ee_s16 ret = 0; + ee_u32 i, j; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + cur = C[i * N + j]; + tmp += cur; + if (tmp > clipval) + { + ret += 10; + tmp = 0; + } + else + { + ret += (cur > prev) ? 1 : 0; + } + prev = cur; + } + } + return ret; +} + +/* Function: matrix_mul_const + Multiply a matrix by a constant. + This could be used as a scaler for instance. +*/ +void +matrix_mul_const(ee_u32 N, MATRES *C, MATDAT *A, MATDAT val) +{ + ee_u32 i, j; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + C[i * N + j] = (MATRES)A[i * N + j] * (MATRES)val; + } + } +} + +/* Function: matrix_add_const + Add a constant value to all elements of a matrix. +*/ +void +matrix_add_const(ee_u32 N, MATDAT *A, MATDAT val) +{ + ee_u32 i, j; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + A[i * N + j] += val; + } + } +} + +/* Function: matrix_mul_vect + Multiply a matrix by a vector. + This is common in many simple filters (e.g. fir where a vector of + coefficients is applied to the matrix.) +*/ +void +matrix_mul_vect(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) +{ + ee_u32 i, j; + for (i = 0; i < N; i++) + { + C[i] = 0; + for (j = 0; j < N; j++) + { + C[i] += (MATRES)A[i * N + j] * (MATRES)B[j]; + } + } +} + +/* Function: matrix_mul_matrix + Multiply a matrix by a matrix. + Basic code is used in many algorithms, mostly with minor changes such as + scaling. +*/ +void +matrix_mul_matrix(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) +{ + ee_u32 i, j, k; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + C[i * N + j] = 0; + for (k = 0; k < N; k++) + { + C[i * N + j] += (MATRES)A[i * N + k] * (MATRES)B[k * N + j]; + } + } + } +} + +/* Function: matrix_mul_matrix_bitextract + Multiply a matrix by a matrix, and extract some bits from the result. + Basic code is used in many algorithms, mostly with minor changes such as + scaling. +*/ +void +matrix_mul_matrix_bitextract(ee_u32 N, MATRES *C, MATDAT *A, MATDAT *B) +{ + ee_u32 i, j, k; + for (i = 0; i < N; i++) + { + for (j = 0; j < N; j++) + { + C[i * N + j] = 0; + for (k = 0; k < N; k++) + { + MATRES tmp = (MATRES)A[i * N + k] * (MATRES)B[k * N + j]; + C[i * N + j] += bit_extract(tmp, 2, 4) * bit_extract(tmp, 5, 7); + } + } + } +} diff --git a/sw/applications/coremark/core_portme.c b/sw/applications/coremark/core_portme.c new file mode 100644 index 000000000..3b40763c7 --- /dev/null +++ b/sw/applications/coremark/core_portme.c @@ -0,0 +1,104 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include "csr.h" +#include "x-heep.h" + +#include "coremark.h" + +ee_u32 default_num_contexts = 1; + +static CORETIMETYPE start_time_val, stop_time_val; + +#if VALIDATION_RUN +volatile ee_s32 seed1_volatile = 0x3415; +volatile ee_s32 seed2_volatile = 0x3415; +volatile ee_s32 seed3_volatile = 0x66; +#endif +#if PERFORMANCE_RUN +volatile ee_s32 seed1_volatile = 0x0; +volatile ee_s32 seed2_volatile = 0x0; +volatile ee_s32 seed3_volatile = 0x66; +#endif +#if PROFILE_RUN +volatile ee_s32 seed1_volatile = 0x8; +volatile ee_s32 seed2_volatile = 0x8; +volatile ee_s32 seed3_volatile = 0x8; +#endif +volatile ee_s32 seed4_volatile = ITERATIONS; +volatile ee_s32 seed5_volatile = 0; + +void +portable_init(core_portable *p, int *argc, char *argv[]) +{ + // Don't need to do anything here atm. + (void)p; + (void)argc; + (void)argv; +} + +void +portable_fini(core_portable *p) +{ + // Don't need to do anything here atm. + (void)p; +} + +void +start_time(void) +{ + // Enable mcycle counter and read value + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + + CSR_READ(CSR_REG_MCYCLE, &start_time_val); +} + +void +stop_time(void) +{ + CSR_READ(CSR_REG_MCYCLE, &stop_time_val); +} + +CORE_TICKS +get_time(void) +{ + return (stop_time_val - start_time_val); +} + +secs_ret +time_in_secs(CORE_TICKS ticks) +{ + return ticks*1E-6; // Normalized to 1 MHz clock period +} diff --git a/sw/applications/coremark/core_portme.h b/sw/applications/coremark/core_portme.h new file mode 100644 index 000000000..5a15066a5 --- /dev/null +++ b/sw/applications/coremark/core_portme.h @@ -0,0 +1,93 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include +#include + +typedef signed short ee_s16; +typedef unsigned short ee_u16; +typedef signed int ee_s32; +typedef double ee_f32; +typedef unsigned char ee_u8; +typedef unsigned int ee_u32; +typedef ee_u32 ee_ptr_int; +typedef size_t ee_size_t; + +typedef ee_u32 CORE_TICKS; + +typedef struct CORE_PORTABLE_S +{ + ee_u8 portable_id; +} core_portable; + +#ifndef MULTITHREAD +#define MULTITHREAD 1 // 1 means single-core +#define USE_PTHREAD 0 +#define USE_FORK 0 +#define USE_SOCKET 0 +#endif + +#ifndef COMPILER_VERSION +#ifdef __GNUC__ +#define COMPILER_VERSION "GCC"__VERSION__ +#else +#define COMPILER_VERSION "Undefined non-gcc compiler used" +#endif +#endif + +#ifndef COMPILER_FLAGS +#define COMPILER_FLAGS FLAGS_STR +#endif + +#ifndef MEM_LOCATION +#define MEM_LOCATION "" +#endif + +#ifndef SEED_METHOD +#define SEED_METHOD SEED_VOLATILE +#endif + +#ifndef HAS_PRINTF +#define HAS_PRINTF 1 +#endif + +#define align_mem(x) (void *)(4 + (((ee_ptr_int)(x)-1) & ~3)) + +#define CORETIMETYPE ee_u32 + +extern ee_u32 default_num_contexts; + +void portable_init(core_portable *p, int *argc, char *argv[]); +void portable_fini(core_portable *p); diff --git a/sw/applications/coremark/core_state.c b/sw/applications/coremark/core_state.c new file mode 100644 index 000000000..b5c3d461c --- /dev/null +++ b/sw/applications/coremark/core_state.c @@ -0,0 +1,347 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include "coremark.h" +/* local functions */ +enum CORE_STATE core_state_transition(ee_u8 **instr, ee_u32 *transition_count); + +/* +Topic: Description + Simple state machines like this one are used in many embedded products. + + For more complex state machines, sometimes a state transition table +implementation is used instead, trading speed of direct coding for ease of +maintenance. + + Since the main goal of using a state machine in CoreMark is to excercise +the switch/if behaviour, we are using a small moore machine. + + In particular, this machine tests type of string input, + trying to determine whether the input is a number or something else. + (see core_state.png). +*/ + +/* Function: core_bench_state + Benchmark function + + Go over the input twice, once direct, and once after introducing some + corruption. +*/ +ee_u16 +core_bench_state(ee_u32 blksize, + ee_u8 *memblock, + ee_s16 seed1, + ee_s16 seed2, + ee_s16 step, + ee_u16 crc) +{ + ee_u32 final_counts[NUM_CORE_STATES]; + ee_u32 track_counts[NUM_CORE_STATES]; + ee_u8 *p = memblock; + ee_u32 i; + +#if CORE_DEBUG + ee_printf("State Bench: %d,%d,%d,%04x\n", seed1, seed2, step, crc); +#endif + for (i = 0; i < NUM_CORE_STATES; i++) + { + final_counts[i] = track_counts[i] = 0; + } + /* run the state machine over the input */ + while (*p != 0) + { + enum CORE_STATE fstate = core_state_transition(&p, track_counts); + final_counts[fstate]++; +#if CORE_DEBUG + ee_printf("%d,", fstate); + } + ee_printf("\n"); +#else + } +#endif + p = memblock; + while (p < (memblock + blksize)) + { /* insert some corruption */ + if (*p != ',') + *p ^= (ee_u8)seed1; + p += step; + } + p = memblock; + /* run the state machine over the input again */ + while (*p != 0) + { + enum CORE_STATE fstate = core_state_transition(&p, track_counts); + final_counts[fstate]++; +#if CORE_DEBUG + ee_printf("%d,", fstate); + } + ee_printf("\n"); +#else + } +#endif + p = memblock; + while (p < (memblock + blksize)) + { /* undo corruption is seed1 and seed2 are equal */ + if (*p != ',') + *p ^= (ee_u8)seed2; + p += step; + } + /* end timing */ + for (i = 0; i < NUM_CORE_STATES; i++) + { + crc = crcu32(final_counts[i], crc); + crc = crcu32(track_counts[i], crc); + } + return crc; +} + +/* Default initialization patterns */ +static ee_u8 *intpat[4] + = { (ee_u8 *)"5012", (ee_u8 *)"1234", (ee_u8 *)"-874", (ee_u8 *)"+122" }; +static ee_u8 *floatpat[4] = { (ee_u8 *)"35.54400", + (ee_u8 *)".1234500", + (ee_u8 *)"-110.700", + (ee_u8 *)"+0.64400" }; +static ee_u8 *scipat[4] = { (ee_u8 *)"5.500e+3", + (ee_u8 *)"-.123e-2", + (ee_u8 *)"-87e+832", + (ee_u8 *)"+0.6e-12" }; +static ee_u8 *errpat[4] = { (ee_u8 *)"T0.3e-1F", + (ee_u8 *)"-T.T++Tq", + (ee_u8 *)"1T3.4e4z", + (ee_u8 *)"34.0e-T^" }; + +/* Function: core_init_state + Initialize the input data for the state machine. + + Populate the input with several predetermined strings, interspersed. + Actual patterns chosen depend on the seed parameter. + + Note: + The seed parameter MUST be supplied from a source that cannot be + determined at compile time +*/ +void +core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p) +{ + ee_u32 total = 0, next = 0, i; + ee_u8 *buf = 0; +#if CORE_DEBUG + ee_u8 *start = p; + ee_printf("State: %d,%d\n", size, seed); +#endif + size--; + next = 0; + while ((total + next + 1) < size) + { + if (next > 0) + { + for (i = 0; i < next; i++) + *(p + total + i) = buf[i]; + *(p + total + i) = ','; + total += next + 1; + } + seed++; + switch (seed & 0x7) + { + case 0: /* int */ + case 1: /* int */ + case 2: /* int */ + buf = intpat[(seed >> 3) & 0x3]; + next = 4; + break; + case 3: /* float */ + case 4: /* float */ + buf = floatpat[(seed >> 3) & 0x3]; + next = 8; + break; + case 5: /* scientific */ + case 6: /* scientific */ + buf = scipat[(seed >> 3) & 0x3]; + next = 8; + break; + case 7: /* invalid */ + buf = errpat[(seed >> 3) & 0x3]; + next = 8; + break; + default: /* Never happen, just to make some compilers happy */ + break; + } + } + size++; + while (total < size) + { /* fill the rest with 0 */ + *(p + total) = 0; + total++; + } +#if CORE_DEBUG + ee_printf("State Input: %s\n", start); +#endif +} + +static ee_u8 +ee_isdigit(ee_u8 c) +{ + ee_u8 retval; + retval = ((c >= '0') & (c <= '9')) ? 1 : 0; + return retval; +} + +/* Function: core_state_transition + Actual state machine. + + The state machine will continue scanning until either: + 1 - an invalid input is detcted. + 2 - a valid number has been detected. + + The input pointer is updated to point to the end of the token, and the + end state is returned (either specific format determined or invalid). +*/ + +enum CORE_STATE +core_state_transition(ee_u8 **instr, ee_u32 *transition_count) +{ + ee_u8 * str = *instr; + ee_u8 NEXT_SYMBOL; + enum CORE_STATE state = CORE_START; + for (; *str && state != CORE_INVALID; str++) + { + NEXT_SYMBOL = *str; + if (NEXT_SYMBOL == ',') /* end of this input */ + { + str++; + break; + } + switch (state) + { + case CORE_START: + if (ee_isdigit(NEXT_SYMBOL)) + { + state = CORE_INT; + } + else if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-') + { + state = CORE_S1; + } + else if (NEXT_SYMBOL == '.') + { + state = CORE_FLOAT; + } + else + { + state = CORE_INVALID; + transition_count[CORE_INVALID]++; + } + transition_count[CORE_START]++; + break; + case CORE_S1: + if (ee_isdigit(NEXT_SYMBOL)) + { + state = CORE_INT; + transition_count[CORE_S1]++; + } + else if (NEXT_SYMBOL == '.') + { + state = CORE_FLOAT; + transition_count[CORE_S1]++; + } + else + { + state = CORE_INVALID; + transition_count[CORE_S1]++; + } + break; + case CORE_INT: + if (NEXT_SYMBOL == '.') + { + state = CORE_FLOAT; + transition_count[CORE_INT]++; + } + else if (!ee_isdigit(NEXT_SYMBOL)) + { + state = CORE_INVALID; + transition_count[CORE_INT]++; + } + break; + case CORE_FLOAT: + if (NEXT_SYMBOL == 'E' || NEXT_SYMBOL == 'e') + { + state = CORE_S2; + transition_count[CORE_FLOAT]++; + } + else if (!ee_isdigit(NEXT_SYMBOL)) + { + state = CORE_INVALID; + transition_count[CORE_FLOAT]++; + } + break; + case CORE_S2: + if (NEXT_SYMBOL == '+' || NEXT_SYMBOL == '-') + { + state = CORE_EXPONENT; + transition_count[CORE_S2]++; + } + else + { + state = CORE_INVALID; + transition_count[CORE_S2]++; + } + break; + case CORE_EXPONENT: + if (ee_isdigit(NEXT_SYMBOL)) + { + state = CORE_SCIENTIFIC; + transition_count[CORE_EXPONENT]++; + } + else + { + state = CORE_INVALID; + transition_count[CORE_EXPONENT]++; + } + break; + case CORE_SCIENTIFIC: + if (!ee_isdigit(NEXT_SYMBOL)) + { + state = CORE_INVALID; + transition_count[CORE_INVALID]++; + } + break; + default: + break; + } + } + *instr = str; + return state; +} diff --git a/sw/applications/coremark/core_util.c b/sw/applications/coremark/core_util.c new file mode 100644 index 000000000..0544e6214 --- /dev/null +++ b/sw/applications/coremark/core_util.c @@ -0,0 +1,266 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +#include "coremark.h" +/* Function: get_seed + Get a values that cannot be determined at compile time. + + Since different embedded systems and compilers are used, 3 different + methods are provided: 1 - Using a volatile variable. This method is only + valid if the compiler is forced to generate code that reads the value of a + volatile variable from memory at run time. Please note, if using this method, + you would need to modify core_portme.c to generate training profile. 2 - + Command line arguments. This is the preferred method if command line + arguments are supported. 3 - System function. If none of the first 2 methods + is available on the platform, a system function which is not a stub can be + used. + + e.g. read the value on GPIO pins connected to switches, or invoke + special simulator functions. +*/ +#if (SEED_METHOD == SEED_VOLATILE) +extern volatile ee_s32 seed1_volatile; +extern volatile ee_s32 seed2_volatile; +extern volatile ee_s32 seed3_volatile; +extern volatile ee_s32 seed4_volatile; +extern volatile ee_s32 seed5_volatile; +ee_s32 +get_seed_32(int i) +{ + ee_s32 retval; + switch (i) + { + case 1: + retval = seed1_volatile; + break; + case 2: + retval = seed2_volatile; + break; + case 3: + retval = seed3_volatile; + break; + case 4: + retval = seed4_volatile; + break; + case 5: + retval = seed5_volatile; + break; + default: + retval = 0; + break; + } + return retval; +} +#elif (SEED_METHOD == SEED_ARG) +ee_s32 +parseval(char *valstring) +{ + ee_s32 retval = 0; + ee_s32 neg = 1; + int hexmode = 0; + if (*valstring == '-') + { + neg = -1; + valstring++; + } + if ((valstring[0] == '0') && (valstring[1] == 'x')) + { + hexmode = 1; + valstring += 2; + } + /* first look for digits */ + if (hexmode) + { + while (((*valstring >= '0') && (*valstring <= '9')) + || ((*valstring >= 'a') && (*valstring <= 'f'))) + { + ee_s32 digit = *valstring - '0'; + if (digit > 9) + digit = 10 + *valstring - 'a'; + retval *= 16; + retval += digit; + valstring++; + } + } + else + { + while ((*valstring >= '0') && (*valstring <= '9')) + { + ee_s32 digit = *valstring - '0'; + retval *= 10; + retval += digit; + valstring++; + } + } + /* now add qualifiers */ + if (*valstring == 'K') + retval *= 1024; + if (*valstring == 'M') + retval *= 1024 * 1024; + + retval *= neg; + return retval; +} + +ee_s32 +get_seed_args(int i, int argc, char *argv[]) +{ + if (argc > i) + return parseval(argv[i]); + return 0; +} + +#elif (SEED_METHOD == SEED_FUNC) +/* If using OS based function, you must define and implement the functions below + * in core_portme.h and core_portme.c ! */ +ee_s32 +get_seed_32(int i) +{ + ee_s32 retval; + switch (i) + { + case 1: + retval = portme_sys1(); + break; + case 2: + retval = portme_sys2(); + break; + case 3: + retval = portme_sys3(); + break; + case 4: + retval = portme_sys4(); + break; + case 5: + retval = portme_sys5(); + break; + default: + retval = 0; + break; + } + return retval; +} +#endif + +/* Function: crc* + Service functions to calculate 16b CRC code. + +*/ +ee_u16 +crcu8(ee_u8 data, ee_u16 crc) +{ + ee_u8 i = 0, x16 = 0, carry = 0; + + for (i = 0; i < 8; i++) + { + x16 = (ee_u8)((data & 1) ^ ((ee_u8)crc & 1)); + data >>= 1; + + if (x16 == 1) + { + crc ^= 0x4002; + carry = 1; + } + else + carry = 0; + crc >>= 1; + if (carry) + crc |= 0x8000; + else + crc &= 0x7fff; + } + return crc; +} +ee_u16 +crcu16(ee_u16 newval, ee_u16 crc) +{ + crc = crcu8((ee_u8)(newval), crc); + crc = crcu8((ee_u8)((newval) >> 8), crc); + return crc; +} +ee_u16 +crcu32(ee_u32 newval, ee_u16 crc) +{ + crc = crc16((ee_s16)newval, crc); + crc = crc16((ee_s16)(newval >> 16), crc); + return crc; +} +ee_u16 +crc16(ee_s16 newval, ee_u16 crc) +{ + return crcu16((ee_u16)newval, crc); +} + +ee_u8 +check_data_types() +{ + ee_u8 retval = 0; + if (sizeof(ee_u8) != 1) + { + ee_printf("ERROR: ee_u8 is not an 8b datatype!\n"); + retval++; + } + if (sizeof(ee_u16) != 2) + { + ee_printf("ERROR: ee_u16 is not a 16b datatype!\n"); + retval++; + } + if (sizeof(ee_s16) != 2) + { + ee_printf("ERROR: ee_s16 is not a 16b datatype!\n"); + retval++; + } + if (sizeof(ee_s32) != 4) + { + ee_printf("ERROR: ee_s32 is not a 32b datatype!\n"); + retval++; + } + if (sizeof(ee_u32) != 4) + { + ee_printf("ERROR: ee_u32 is not a 32b datatype!\n"); + retval++; + } + if (sizeof(ee_ptr_int) != sizeof(int *)) + { + ee_printf( + "ERROR: ee_ptr_int is not a datatype that holds an int pointer!\n"); + retval++; + } + if (retval > 0) + { + ee_printf("ERROR: Please modify the datatypes in core_portme.h!\n"); + } + return retval; +} diff --git a/sw/applications/coremark/coremark.h b/sw/applications/coremark/coremark.h new file mode 100644 index 000000000..4f2b074df --- /dev/null +++ b/sw/applications/coremark/coremark.h @@ -0,0 +1,211 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +/* Topic: Description + This file contains declarations of the various benchmark functions. +*/ + +/* Configuration: TOTAL_DATA_SIZE + Define total size for data algorithms will operate on +*/ +#ifndef TOTAL_DATA_SIZE +#define TOTAL_DATA_SIZE 2 * 1000 +#endif + +#define SEED_ARG 0 +#define SEED_FUNC 1 +#define SEED_VOLATILE 2 + +#define MEM_STATIC 0 +#define MEM_MALLOC 1 +#define MEM_STACK 2 + +#include "core_portme.h" + +#if HAS_STDIO +#include +#endif +#if HAS_PRINTF +/* By default, printfs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 + +#if TARGET_SIM && PRINTF_IN_SIM + #define ee_printf(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA && !TARGET_SIM + #define ee_printf(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define ee_printf printf +#endif + +#endif + +/* Actual benchmark execution in iterate */ +void *iterate(void *pres); + +/* Typedef: secs_ret + For machines that have floating point support, get number of seconds as + a double. Otherwise an unsigned int. +*/ +#if HAS_FLOAT +typedef double secs_ret; +#else +typedef ee_u32 secs_ret; +#endif + +#if MAIN_HAS_NORETURN +#define MAIN_RETURN_VAL +#define MAIN_RETURN_TYPE void +#else +#define MAIN_RETURN_VAL 0 +#define MAIN_RETURN_TYPE int +#endif + +void start_time(void); +void stop_time(void); +CORE_TICKS get_time(void); +secs_ret time_in_secs(CORE_TICKS ticks); + +/* Misc useful functions */ +ee_u16 crcu8(ee_u8 data, ee_u16 crc); +ee_u16 crc16(ee_s16 newval, ee_u16 crc); +ee_u16 crcu16(ee_u16 newval, ee_u16 crc); +ee_u16 crcu32(ee_u32 newval, ee_u16 crc); +ee_u8 check_data_types(void); +void * portable_malloc(ee_size_t size); +void portable_free(void *p); +ee_s32 parseval(char *valstring); + +/* Algorithm IDS */ +#define ID_LIST (1 << 0) +#define ID_MATRIX (1 << 1) +#define ID_STATE (1 << 2) +#define ALL_ALGORITHMS_MASK (ID_LIST | ID_MATRIX | ID_STATE) +#define NUM_ALGORITHMS 3 + +/* list data structures */ +typedef struct list_data_s +{ + ee_s16 data16; + ee_s16 idx; +} list_data; + +typedef struct list_head_s +{ + struct list_head_s *next; + struct list_data_s *info; +} list_head; + +/*matrix benchmark related stuff */ +#define MATDAT_INT 1 +#if MATDAT_INT +typedef ee_s16 MATDAT; +typedef ee_s32 MATRES; +#else +typedef ee_f16 MATDAT; +typedef ee_f32 MATRES; +#endif + +typedef struct MAT_PARAMS_S +{ + int N; + MATDAT *A; + MATDAT *B; + MATRES *C; +} mat_params; + +/* state machine related stuff */ +/* List of all the possible states for the FSM */ +typedef enum CORE_STATE +{ + CORE_START = 0, + CORE_INVALID, + CORE_S1, + CORE_S2, + CORE_INT, + CORE_FLOAT, + CORE_EXPONENT, + CORE_SCIENTIFIC, + NUM_CORE_STATES +} core_state_e; + +/* Helper structure to hold results */ +typedef struct RESULTS_S +{ + /* inputs */ + ee_s16 seed1; /* Initializing seed */ + ee_s16 seed2; /* Initializing seed */ + ee_s16 seed3; /* Initializing seed */ + void * memblock[4]; /* Pointer to safe memory location */ + ee_u32 size; /* Size of the data */ + ee_u32 iterations; /* Number of iterations to execute */ + ee_u32 execs; /* Bitmask of operations to execute */ + struct list_head_s *list; + mat_params mat; + /* outputs */ + ee_u16 crc; + ee_u16 crclist; + ee_u16 crcmatrix; + ee_u16 crcstate; + ee_s16 err; + /* ultithread specific */ + core_portable port; +} core_results; + +/* Multicore execution handling */ +#if (MULTITHREAD > 1) +ee_u8 core_start_parallel(core_results *res); +ee_u8 core_stop_parallel(core_results *res); +#endif + +/* list benchmark functions */ +list_head *core_list_init(ee_u32 blksize, list_head *memblock, ee_s16 seed); +ee_u16 core_bench_list(core_results *res, ee_s16 finder_idx); + +/* state benchmark functions */ +void core_init_state(ee_u32 size, ee_s16 seed, ee_u8 *p); +ee_u16 core_bench_state(ee_u32 blksize, + ee_u8 *memblock, + ee_s16 seed1, + ee_s16 seed2, + ee_s16 step, + ee_u16 crc); + +/* matrix benchmark functions */ +ee_u32 core_init_matrix(ee_u32 blksize, + void * memblk, + ee_s32 seed, + mat_params *p); +ee_u16 core_bench_matrix(mat_params *p, ee_s16 seed, ee_u16 crc); diff --git a/sw/applications/coremark/main.c b/sw/applications/coremark/main.c new file mode 100644 index 000000000..12136ada7 --- /dev/null +++ b/sw/applications/coremark/main.c @@ -0,0 +1,459 @@ +/* +Copyright 2018 Embedded Microprocessor Benchmark Consortium (EEMBC) + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +Original Author: Shay Gal-on +*/ + +// Copyright 2020 OpenHW Group +// Copyright 2020 Silicon Labs, Inc. +// +// Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// https://solderpad.org/licenses/ +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +// +// SPDX-License-Identifier:Apache-2.0 WITH SHL-2.0 + +/* File: core_main.c + This file contains the framework to acquire a block of memory, seed + initial parameters, tun t he benchmark and report the results. +*/ +#include "coremark.h" + +/* Function: iterate + Run the benchmark for a specified number of iterations. + + Operation: + For each type of benchmarked algorithm: + a - Initialize the data block for the algorithm. + b - Execute the algorithm N times. + + Returns: + NULL. +*/ +static ee_u16 list_known_crc[] = { (ee_u16)0xd4b0, + (ee_u16)0x3340, + (ee_u16)0x6a79, + (ee_u16)0xe714, + (ee_u16)0xe3c1 }; +static ee_u16 matrix_known_crc[] = { (ee_u16)0xbe52, + (ee_u16)0x1199, + (ee_u16)0x5608, + (ee_u16)0x1fd7, + (ee_u16)0x0747 }; +static ee_u16 state_known_crc[] = { (ee_u16)0x5e47, + (ee_u16)0x39bf, + (ee_u16)0xe5a4, + (ee_u16)0x8e3a, + (ee_u16)0x8d84 }; +void * +iterate(void *pres) +{ + ee_u32 i; + ee_u16 crc; + core_results *res = (core_results *)pres; + ee_u32 iterations = res->iterations; + res->crc = 0; + res->crclist = 0; + res->crcmatrix = 0; + res->crcstate = 0; + + for (i = 0; i < iterations; i++) + { + crc = core_bench_list(res, 1); + res->crc = crcu16(crc, res->crc); + crc = core_bench_list(res, -1); + res->crc = crcu16(crc, res->crc); + if (i == 0) + res->crclist = res->crc; + } + return NULL; +} + +#if (SEED_METHOD == SEED_ARG) +ee_s32 get_seed_args(int i, int argc, char *argv[]); +#define get_seed(x) (ee_s16) get_seed_args(x, argc, argv) +#define get_seed_32(x) get_seed_args(x, argc, argv) +#else /* via function or volatile */ +ee_s32 get_seed_32(int i); +#define get_seed(x) (ee_s16) get_seed_32(x) +#endif + +#if (MEM_METHOD == MEM_STATIC) +ee_u8 static_memblk[TOTAL_DATA_SIZE]; +#endif +char *mem_name[3] = { "Static", "Heap", "Stack" }; +/* Function: main + Main entry routine for the benchmark. + This function is responsible for the following steps: + + 1 - Initialize input seeds from a source that cannot be determined at + compile time. 2 - Initialize memory block for use. 3 - Run and time the + benchmark. 4 - Report results, testing the validity of the output if the + seeds are known. + + Arguments: + 1 - first seed : Any value + 2 - second seed : Must be identical to first for iterations to be + identical 3 - third seed : Any value, should be at least an order of + magnitude less then the input size, but bigger then 32. 4 - Iterations : + Special, if set to 0, iterations will be automatically determined such that + the benchmark will run between 10 to 100 secs + +*/ + +#if MAIN_HAS_NOARGC +MAIN_RETURN_TYPE +main(void) +{ + int argc = 0; + char *argv[1]; +#else +MAIN_RETURN_TYPE +main(int argc, char *argv[]) +{ +#endif + ee_u16 i, j = 0, num_algorithms = 0; + ee_s16 known_id = -1, total_errors = 0; + ee_u16 seedcrc = 0; + CORE_TICKS total_time; + core_results results[MULTITHREAD]; +#if (MEM_METHOD == MEM_STACK) + ee_u8 stack_memblock[TOTAL_DATA_SIZE * MULTITHREAD]; +#endif + /* first call any initializations needed */ + portable_init(&(results[0].port), &argc, argv); + /* First some checks to make sure benchmark will run ok */ + if (sizeof(struct list_head_s) > 128) + { + ee_printf("list_head structure too big for comparable data!\n"); + return MAIN_RETURN_VAL; + } + results[0].seed1 = get_seed(1); + results[0].seed2 = get_seed(2); + results[0].seed3 = get_seed(3); + results[0].iterations = get_seed_32(4); +#if CORE_DEBUG + results[0].iterations = 1; +#endif + results[0].execs = get_seed_32(5); + if (results[0].execs == 0) + { /* if not supplied, execute all algorithms */ + results[0].execs = ALL_ALGORITHMS_MASK; + } + /* put in some default values based on one seed only for easy testing */ + if ((results[0].seed1 == 0) && (results[0].seed2 == 0) + && (results[0].seed3 == 0)) + { /* perfromance run */ + results[0].seed1 = 0; + results[0].seed2 = 0; + results[0].seed3 = 0x66; + } + if ((results[0].seed1 == 1) && (results[0].seed2 == 0) + && (results[0].seed3 == 0)) + { /* validation run */ + results[0].seed1 = 0x3415; + results[0].seed2 = 0x3415; + results[0].seed3 = 0x66; + } +#if (MEM_METHOD == MEM_STATIC) + results[0].memblock[0] = (void *)static_memblk; + results[0].size = TOTAL_DATA_SIZE; + results[0].err = 0; +#if (MULTITHREAD > 1) +#error "Cannot use a static data area with multiple contexts!" +#endif +#elif (MEM_METHOD == MEM_MALLOC) + for (i = 0; i < MULTITHREAD; i++) + { + ee_s32 malloc_override = get_seed(7); + if (malloc_override != 0) + results[i].size = malloc_override; + else + results[i].size = TOTAL_DATA_SIZE; + results[i].memblock[0] = portable_malloc(results[i].size); + results[i].seed1 = results[0].seed1; + results[i].seed2 = results[0].seed2; + results[i].seed3 = results[0].seed3; + results[i].err = 0; + results[i].execs = results[0].execs; + } +#elif (MEM_METHOD == MEM_STACK) +for (i = 0; i < MULTITHREAD; i++) +{ + results[i].memblock[0] = stack_memblock + i * TOTAL_DATA_SIZE; + results[i].size = TOTAL_DATA_SIZE; + results[i].seed1 = results[0].seed1; + results[i].seed2 = results[0].seed2; + results[i].seed3 = results[0].seed3; + results[i].err = 0; + results[i].execs = results[0].execs; +} +#else +#error "Please define a way to initialize a memory block." +#endif + /* Data init */ + /* Find out how space much we have based on number of algorithms */ + for (i = 0; i < NUM_ALGORITHMS; i++) + { + if ((1 << (ee_u32)i) & results[0].execs) + num_algorithms++; + } + for (i = 0; i < MULTITHREAD; i++) + results[i].size = results[i].size / num_algorithms; + /* Assign pointers */ + for (i = 0; i < NUM_ALGORITHMS; i++) + { + ee_u32 ctx; + if ((1 << (ee_u32)i) & results[0].execs) + { + for (ctx = 0; ctx < MULTITHREAD; ctx++) + results[ctx].memblock[i + 1] + = (char *)(results[ctx].memblock[0]) + results[0].size * j; + j++; + } + } + /* call inits */ + for (i = 0; i < MULTITHREAD; i++) + { + if (results[i].execs & ID_LIST) + { + results[i].list = core_list_init( + results[0].size, results[i].memblock[1], results[i].seed1); + } + if (results[i].execs & ID_MATRIX) + { + core_init_matrix(results[0].size, + results[i].memblock[2], + (ee_s32)results[i].seed1 + | (((ee_s32)results[i].seed2) << 16), + &(results[i].mat)); + } + if (results[i].execs & ID_STATE) + { + core_init_state( + results[0].size, results[i].seed1, results[i].memblock[3]); + } + } + + /* automatically determine number of iterations if not set */ + if (results[0].iterations == 0) + { + secs_ret secs_passed = 0; + ee_u32 divisor; + results[0].iterations = 1; + while (secs_passed < (secs_ret)1) + { + results[0].iterations *= 10; + start_time(); + iterate(&results[0]); + stop_time(); + secs_passed = time_in_secs(get_time()); + } + /* now we know it executes for at least 1 sec, set actual run time at + * about 10 secs */ + divisor = (ee_u32)secs_passed; + if (divisor == 0) /* some machines cast float to int as 0 since this + conversion is not defined by ANSI, but we know at + least one second passed */ + divisor = 1; + results[0].iterations *= 1 + 10 / divisor; + } + /* perform actual benchmark */ + start_time(); +#if (MULTITHREAD > 1) + if (default_num_contexts > MULTITHREAD) + { + default_num_contexts = MULTITHREAD; + } + for (i = 0; i < default_num_contexts; i++) + { + results[i].iterations = results[0].iterations; + results[i].execs = results[0].execs; + core_start_parallel(&results[i]); + } + for (i = 0; i < default_num_contexts; i++) + { + core_stop_parallel(&results[i]); + } +#else + iterate(&results[0]); +#endif + stop_time(); + total_time = get_time(); + /* get a function of the input to report */ + seedcrc = crc16(results[0].seed1, seedcrc); + seedcrc = crc16(results[0].seed2, seedcrc); + seedcrc = crc16(results[0].seed3, seedcrc); + seedcrc = crc16(results[0].size, seedcrc); + + switch (seedcrc) + { /* test known output for common seeds */ + case 0x8a02: /* seed1=0, seed2=0, seed3=0x66, size 2000 per algorithm */ + known_id = 0; + ee_printf("6k performance run parameters for coremark.\n"); + break; + case 0x7b05: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 2000 per + algorithm */ + known_id = 1; + ee_printf("6k validation run parameters for coremark.\n"); + break; + case 0x4eaf: /* seed1=0x8, seed2=0x8, seed3=0x8, size 400 per algorithm + */ + known_id = 2; + ee_printf("Profile generation run parameters for coremark.\n"); + break; + case 0xe9f5: /* seed1=0, seed2=0, seed3=0x66, size 666 per algorithm */ + known_id = 3; + ee_printf("2K performance run parameters for coremark.\n"); + break; + case 0x18f2: /* seed1=0x3415, seed2=0x3415, seed3=0x66, size 666 per + algorithm */ + known_id = 4; + ee_printf("2K validation run parameters for coremark.\n"); + break; + default: + total_errors = -1; + break; + } + if (known_id >= 0) + { + for (i = 0; i < default_num_contexts; i++) + { + results[i].err = 0; + if ((results[i].execs & ID_LIST) + && (results[i].crclist != list_known_crc[known_id])) + { + ee_printf("[%u]ERROR! list crc 0x%04x - should be 0x%04x\n", + i, + results[i].crclist, + list_known_crc[known_id]); + results[i].err++; + } + if ((results[i].execs & ID_MATRIX) + && (results[i].crcmatrix != matrix_known_crc[known_id])) + { + ee_printf("[%u]ERROR! matrix crc 0x%04x - should be 0x%04x\n", + i, + results[i].crcmatrix, + matrix_known_crc[known_id]); + results[i].err++; + } + if ((results[i].execs & ID_STATE) + && (results[i].crcstate != state_known_crc[known_id])) + { + ee_printf("[%u]ERROR! state crc 0x%04x - should be 0x%04x\n", + i, + results[i].crcstate, + state_known_crc[known_id]); + results[i].err++; + } + total_errors += results[i].err; + } + } + total_errors += check_data_types(); + /* and report results */ + ee_printf("CoreMark Size : %lu\n", (long unsigned)results[0].size); + ee_printf("Total ticks : %lu\n", (long unsigned)total_time); +#if HAS_FLOAT + ee_printf("Total time (secs): %f\n", time_in_secs(total_time)); + if (time_in_secs(total_time) > 0) + ee_printf("Iterations/Sec : %f\n", + default_num_contexts * results[0].iterations + / time_in_secs(total_time)); +#else + ee_printf("Total time (secs): %d\n", time_in_secs(total_time)); + if (time_in_secs(total_time) > 0) + ee_printf("Iterations/Sec : %d\n", + default_num_contexts * results[0].iterations + / time_in_secs(total_time)); +#endif +/* if (time_in_secs(total_time) < 10) + { + ee_printf( + "ERROR! Must execute for at least 10 secs for a valid result!\n"); + total_errors++; + } +*/ + + ee_printf("Iterations : %lu\n", + (long unsigned)default_num_contexts * results[0].iterations); +#if (MULTITHREAD > 1) + ee_printf("Parallel %s : %d\n", PARALLEL_METHOD, default_num_contexts); +#endif + ee_printf("Memory location : %s\n", MEM_LOCATION); + /* output for verification */ + ee_printf("seedcrc : 0x%04x\n", seedcrc); + if (results[0].execs & ID_LIST) + for (i = 0; i < default_num_contexts; i++) + ee_printf("[%d]crclist : 0x%04x\n", i, results[i].crclist); + if (results[0].execs & ID_MATRIX) + for (i = 0; i < default_num_contexts; i++) + ee_printf("[%d]crcmatrix : 0x%04x\n", i, results[i].crcmatrix); + if (results[0].execs & ID_STATE) + for (i = 0; i < default_num_contexts; i++) + ee_printf("[%d]crcstate : 0x%04x\n", i, results[i].crcstate); + for (i = 0; i < default_num_contexts; i++) + ee_printf("[%d]crcfinal : 0x%04x\n", i, results[i].crc); + if (total_errors == 0) + { + ee_printf( + "Correct operation validated. See README.md for run and reporting " + "rules.\n"); +#if HAS_FLOAT + if (known_id == 3) + { + ee_printf("CoreMark 1.0 : %f / %s", + default_num_contexts * results[0].iterations + / time_in_secs(total_time), + COMPILER_VERSION); +// COMPILER_VERSION, +// COMPILER_FLAGS); +#if defined(MEM_LOCATION) && !defined(MEM_LOCATION_UNSPEC) + ee_printf(" / %s", MEM_LOCATION); +#else + ee_printf(" / %s", mem_name[MEM_METHOD]); +#endif + +#if (MULTITHREAD > 1) + ee_printf(" / %d:%s", default_num_contexts, PARALLEL_METHOD); +#endif + ee_printf("\n"); + } +#endif + } + if (total_errors > 0) + ee_printf("Errors detected\n"); + if (total_errors < 0) + ee_printf( + "Cannot validate operation for these seed values, please compare " + "with results on a known platform.\n"); + +#if (MEM_METHOD == MEM_MALLOC) + for (i = 0; i < MULTITHREAD; i++) + portable_free(results[i].memblock[0]); +#endif + /* And last call any target specific code for finalizing */ + portable_fini(&(results[0].port)); + + return MAIN_RETURN_VAL; +} diff --git a/sw/applications/example_asm/add.s b/sw/applications/example_asm/add.s new file mode 100644 index 000000000..2e61b29ac --- /dev/null +++ b/sw/applications/example_asm/add.s @@ -0,0 +1,6 @@ + .section .text + .globl add_asm_function + +add_asm_function: + add a0, a0, a1 # Add the values in a0 and a1, store the result in a0 + ret # Return from the function \ No newline at end of file diff --git a/sw/applications/example_asm/constants.h b/sw/applications/example_asm/constants.h new file mode 100644 index 000000000..58834c58d --- /dev/null +++ b/sw/applications/example_asm/constants.h @@ -0,0 +1 @@ +#define MULTIPLY_CONSTANT 2 \ No newline at end of file diff --git a/sw/applications/example_asm/main.c b/sw/applications/example_asm/main.c new file mode 100644 index 000000000..48ca9c9e5 --- /dev/null +++ b/sw/applications/example_asm/main.c @@ -0,0 +1,31 @@ +#include +#include +#include "constants.h" +#include "x-heep.h" + +/* By default, printfs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 + +#if TARGET_SIM && PRINTF_IN_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define PRINTF(...) +#endif + +extern int add_asm_function(int a, int b); +extern int mul_by_const_asm_function( int a); + +int main() { + int num1 = 10; + int num2 = 20; + int sum = add_asm_function(num1, num2); + int mul = mul_by_const_asm_function(num2); + + PRINTF("%d+%d=%d\n", num1, num2, sum); + PRINTF("%d*%d=%d\n", num2, MULTIPLY_CONSTANT, mul ); + + return (sum == num1+num2) && (mul == num2*MULTIPLY_CONSTANT) ? EXIT_SUCCESS : EXIT_FAILURE; +} \ No newline at end of file diff --git a/sw/applications/example_asm/multiply.S b/sw/applications/example_asm/multiply.S new file mode 100644 index 000000000..7130f4886 --- /dev/null +++ b/sw/applications/example_asm/multiply.S @@ -0,0 +1,9 @@ +#include "constants.h" + + .section .text + .globl mul_by_const_asm_function + +mul_by_const_asm_function: + li t0, MULTIPLY_CONSTANT # Load the constant into temporary register t0 + mul a0, a0, t0 # Multiply the value in a0 by the constant in t0 + ret # Return from the function \ No newline at end of file diff --git a/sw/applications/example_clock_gating/main.c b/sw/applications/example_clock_gating/main.c index 958e04921..91d1d8fe9 100644 --- a/sw/applications/example_clock_gating/main.c +++ b/sw/applications/example_clock_gating/main.c @@ -17,7 +17,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_dma/main.c b/sw/applications/example_dma/main.c index bbab36980..5bab0c4ee 100644 --- a/sw/applications/example_dma/main.c +++ b/sw/applications/example_dma/main.c @@ -5,58 +5,214 @@ #include #include - #include "dma.h" #include "core_v_mini_mcu.h" #include "x-heep.h" #include "csr.h" #include "rv_plic.h" -#define TEST_SINGULAR_MODE +// TEST DEFINES AND CONFIGURATION + +#define TEST_SINGLE_MODE +#define TEST_ADDRESS_MODE #define TEST_PENDING_TRANSACTION #define TEST_WINDOW -#define TEST_ADDRESS_MODE #define TEST_ADDRESS_MODE_EXTERNAL_DEVICE #define TEST_DATA_SIZE 16 #define TEST_DATA_LARGE 1024 -#define TRANSACTIONS_N 3 // Only possible to perform transaction at a time, others should be blocked -#define TEST_WINDOW_SIZE_DU 1024 // if put at <=71 the isr is too slow to react to the interrupt - +#define TRANSACTIONS_N 3 // Only possible to perform one transaction at a time, others should be blocked +#define TEST_WINDOW_SIZE_DU 1024 // if put at <=71 the isr is too slow to react to the interrupt - -#if TEST_DATA_LARGE < 2* TEST_DATA_SIZE - #errors("TEST_DATA_LARGE must be at least 2*TEST_DATA_SIZE") +#if TEST_DATA_LARGE < 2 * TEST_DATA_SIZE +#errors("TEST_DATA_LARGE must be at least 2*TEST_DATA_SIZE") #endif /* By default, printfs are activated for FPGA and disabled for simulation. */ -#define PRINTF_IN_FPGA 1 -#define PRINTF_IN_SIM 0 +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 #if TARGET_SIM && PRINTF_IN_SIM - #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA - #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__) +#elif PRINTF_IN_FPGA && !TARGET_SIM +#define PRINTF(fmt, ...) printf(fmt, ##__VA_ARGS__) #else - #define PRINTF(...) +#define PRINTF(...) #endif +// UTILITIES + +#define type2name(dma_type) \ + dma_type == DMA_DATA_TYPE_BYTE ? "8-bit" : dma_type == DMA_DATA_TYPE_HALF_WORD ? "16-bit" \ + : dma_type == DMA_DATA_TYPE_WORD ? "32-bit" \ + : "TYPE NOT VALID" + +dma_data_type_t C_type_2_dma_type(int C_type) +{ + switch (C_type) + { + case 1: + return DMA_DATA_TYPE_BYTE; + case 2: + return DMA_DATA_TYPE_HALF_WORD; + case 4: + return DMA_DATA_TYPE_WORD; + default: + return DMA_DATA_TYPE_WORD; + } +} + +#define WAIT_DMA \ + while (!dma_is_ready()) \ + { \ + CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); \ + if (dma_is_ready() == 0) \ + { \ + wait_for_interrupt(); \ + } \ + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); \ + } + +#define RUN_DMA \ + res = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY); \ + PRINTF("tran: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); \ + res = dma_load_transaction(&trans); \ + PRINTF("load: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); \ + res = dma_launch(&trans); \ + PRINTF("laun: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); + +// TEST MACROS + +#define PRINT_TEST(signed, data_size, dma_src_type, dma_dst_type) \ + PRINTF("TEST:\n\r"); \ + PRINTF("Data size: %d\n\r", data_size); \ + PRINTF("Signed: %d\n\r", signed); \ + PRINTF("Source type size: %s\n\r", type2name(dma_src_type)); \ + PRINTF("Destination type size: %s\n\r", type2name(dma_dst_type)); + +#define DEFINE_DATA(data_size, C_src_type, C_dst_type, signed) \ + C_src_type src[data_size] __attribute__((aligned(4))); \ + C_dst_type dst[data_size] __attribute__((aligned(4))); \ + if (data_size <= TEST_DATA_SIZE) \ + for (int i = 0; i < data_size; i++) \ + if (signed && (i % 2) == 0) \ + src[i] = (C_src_type)(-test_data_4B[i]); \ + else \ + src[i] = (C_src_type)test_data_4B[i]; + +#define CHECK_RESULTS(data_size) \ + for (int i = 0; i < data_size; i++) \ + { \ + if (src[i] != dst[i]) \ + { \ + PRINTF("[%d] Expected: %x Got : %x\n", i, src[i], dst[i]); \ + errors++; \ + } \ + } \ + if (errors != 0) \ + { \ + PRINTF("DMA failure: %d errors out of %d bytes checked\n\r", errors, trans.size_b); \ + return EXIT_FAILURE; \ + } + +#define INIT_TEST(signed, data_size, dma_src_type, dma_dst_type) \ + tgt_src.ptr = src; \ + tgt_src.inc_du = 1; \ + tgt_src.size_du = data_size; \ + tgt_src.trig = DMA_TRIG_MEMORY; \ + tgt_src.type = dma_src_type; \ + tgt_dst.ptr = dst; \ + tgt_dst.inc_du = 1; \ + tgt_dst.size_du = data_size; \ + tgt_dst.trig = DMA_TRIG_MEMORY; \ + tgt_dst.type = dma_dst_type; \ + trans.src = &tgt_src; \ + trans.dst = &tgt_dst; \ + trans.src_addr = &tgt_addr; \ + trans.src_type = dma_dst_type; \ + trans.dst_type = dma_dst_type; \ + trans.mode = DMA_TRANS_MODE_SINGLE; \ + trans.win_du = 0; \ + trans.sign_ext = signed; \ + trans.end = DMA_TRANS_END_INTR; + +#define TEST(C_src_type, C_dst_type, test_size, sign_extend) \ + PRINT_TEST(sign_extend, test_size, C_type_2_dma_type(sizeof(C_src_type)), C_type_2_dma_type(sizeof(C_dst_type))) \ + DEFINE_DATA(test_size, C_src_type, C_dst_type, sign_extend) \ + INIT_TEST(sign_extend, test_size, C_type_2_dma_type(sizeof(C_src_type)), C_type_2_dma_type(sizeof(C_dst_type))) \ + RUN_DMA \ + WAIT_DMA \ + CHECK_RESULTS(test_size) \ + PRINTF("\n\r") + +#define TEST_SINGLE \ + { \ + TEST(uint8_t, uint8_t, TEST_DATA_SIZE, 0); \ + errors += errors; \ + } \ + { \ + TEST(uint8_t, uint16_t, TEST_DATA_SIZE, 0); \ + errors += errors; \ + } \ + { \ + TEST(uint8_t, uint32_t, TEST_DATA_SIZE, 0); \ + errors += errors; \ + } \ + { \ + TEST(int8_t, int8_t, TEST_DATA_SIZE, 1); \ + errors += errors; \ + } \ + { \ + TEST(int8_t, int16_t, TEST_DATA_SIZE, 1); \ + errors += errors; \ + } \ + { \ + TEST(int8_t, int32_t, TEST_DATA_SIZE, 1); \ + errors += errors; \ + } \ + { \ + TEST(uint16_t, uint16_t, TEST_DATA_SIZE, 0); \ + errors += errors; \ + } \ + { \ + TEST(uint16_t, uint32_t, TEST_DATA_SIZE, 0); \ + errors += errors; \ + } \ + { \ + TEST(int16_t, int16_t, TEST_DATA_SIZE, 1); \ + errors += errors; \ + } \ + { \ + TEST(int16_t, int32_t, TEST_DATA_SIZE, 1); \ + errors += errors; \ + } \ + { \ + TEST(uint32_t, uint32_t, TEST_DATA_SIZE, 0); \ + errors += errors; \ + } \ + { \ + TEST(int32_t, int32_t, TEST_DATA_SIZE, 1); \ + errors += errors; \ + } + +// GLOBAL VARIABLES int32_t errors = 0; int8_t cycles = 0; -void dma_intr_handler_trans_done() +// INTERRUPT HANDLERS +void dma_intr_handler_trans_done(void) { cycles++; } - #ifdef TEST_WINDOW int32_t window_intr_flag; -void dma_intr_handler_window_done(void) { - window_intr_flag ++; +void dma_intr_handler_window_done(void) +{ + window_intr_flag++; } uint8_t dma_window_ratio_warning_threshold() @@ -66,94 +222,62 @@ uint8_t dma_window_ratio_warning_threshold() #endif // TEST_WINDOW - int main(int argc, char *argv[]) { - static uint32_t test_data_4B[TEST_DATA_SIZE] __attribute__ ((aligned (4))) = { - 0x76543210, 0xfedcba98, 0x579a6f90, 0x657d5bee, 0x758ee41f, 0x01234567, 0xfedbca98, 0x89abcdef, 0x679852fe, 0xff8252bb, 0x763b4521, 0x6875adaa, 0x09ac65bb, 0x666ba334, 0x55446677, 0x65ffba98}; - static uint32_t copied_data_4B[TEST_DATA_LARGE] __attribute__ ((aligned (4))) = { 0 }; - static uint32_t test_data_large[TEST_DATA_LARGE] __attribute__ ((aligned (4))) = { 0 }; + static uint32_t test_data_4B[TEST_DATA_SIZE] __attribute__((aligned(4))) = { + 0x76543210, 0xfedcba98, 0x579a6f90, 0x657d5bee, 0x758ee41f, 0x01234567, 0xfedbca98, 0x89abcdef, 0x679852fe, 0xff8252bb, 0x763b4521, 0x6875adaa, 0x09ac65bb, 0x666ba334, 0x55446677, 0x65ffba98}; + static uint32_t copied_data_4B[TEST_DATA_LARGE] __attribute__((aligned(4))) = {0}; + static uint32_t test_data_large[TEST_DATA_LARGE] __attribute__((aligned(4))) = {0}; - // this array will contain the even address of copied_data_4B - uint32_t* test_addr_4B_PTR = &test_data_large[0]; + // this array will contain the even address of copied_data_4B + uint32_t *test_addr_4B_PTR = &test_data_large[0]; // The DMA is initialized (i.e. Any current transaction is cleaned.) dma_init(NULL); - dma_config_flags_t res; - - dma_target_t tgt_src = { - .ptr = test_data_4B, - .inc_du = 1, - .size_du = TEST_DATA_SIZE, - .trig = DMA_TRIG_MEMORY, - .type = DMA_DATA_TYPE_WORD, - }; - dma_target_t tgt_dst = { - .ptr = copied_data_4B, - .inc_du = 1, - .size_du = TEST_DATA_SIZE, - .trig = DMA_TRIG_MEMORY, - }; - + dma_target_t tgt_src; + dma_target_t tgt_dst; dma_target_t tgt_addr = { - .ptr = test_addr_4B_PTR, - .inc_du = 1, - .size_du = TEST_DATA_SIZE, - .trig = DMA_TRIG_MEMORY, - }; - - dma_trans_t trans = { - .src = &tgt_src, - .dst = &tgt_dst, - .src_addr = &tgt_addr, - .mode = DMA_TRANS_MODE_SINGLE, - .win_du = 0, - .end = DMA_TRANS_END_INTR, - }; - // Create a target pointing at the buffer to be copied. Whole WORDs, no skippings, in memory, no environment. - -#ifdef TEST_SINGULAR_MODE + .ptr = test_addr_4B_PTR, + .inc_du = 1, + .size_du = TEST_DATA_SIZE, + .trig = DMA_TRIG_MEMORY, + }; + dma_trans_t trans; + +#ifdef TEST_SINGLE_MODE PRINTF("\n\n\r===================================\n\n\r"); PRINTF(" TESTING SINGLE MODE "); PRINTF("\n\n\r===================================\n\n\r"); - res = dma_validate_transaction( &trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY ); - PRINTF("tran: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); - res = dma_load_transaction(&trans); - PRINTF("load: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); - res = dma_launch(&trans); - PRINTF("laun: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); - - while( ! dma_is_ready()) { - // disable_interrupts - // this does not prevent waking up the core as this is controlled by the MIP register - CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); - if ( dma_is_ready() == 0 ) { - wait_for_interrupt(); - //from here we wake up even if we did not jump to the ISR - } - CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); - } - PRINTF(">> Finished transaction. \n\r"); + TEST_SINGLE - for(uint32_t i = 0; i < trans.size_b; i++ ) { - if ( ((uint8_t*)copied_data_4B)[i] != ((uint8_t*)test_data_4B)[i] ) { - PRINTF("ERROR [%d]: %04x != %04x\n\r", i, ((uint8_t*)copied_data_4B)[i], ((uint8_t*)test_data_4B)[i]); - errors++; - } - } +#endif // TEST_SINGLE_MODE - if (errors == 0) { - PRINTF("DMA single mode success.\n\r"); - } else { - PRINTF("DMA single mode failure: %d errors out of %d bytes checked\n\r", errors, trans.size_b ); - return EXIT_FAILURE; - } + // Initialize the DMA for the next tests + tgt_src.ptr = test_data_4B; + tgt_src.inc_du = 1; + tgt_src.size_du = TEST_DATA_SIZE; + tgt_src.trig = DMA_TRIG_MEMORY; + tgt_src.type = DMA_DATA_TYPE_WORD; + + tgt_dst.ptr = copied_data_4B; + tgt_dst.inc_du = 1; + tgt_dst.size_du = TEST_DATA_LARGE; + tgt_dst.trig = DMA_TRIG_MEMORY; + tgt_dst.type = DMA_DATA_TYPE_WORD; -#endif // TEST_SINGULAR_MODE + trans.src = &tgt_src; + trans.dst = &tgt_dst; + trans.src_addr = &tgt_addr; + trans.src_type = DMA_DATA_TYPE_WORD; + trans.dst_type = DMA_DATA_TYPE_WORD; + trans.mode = DMA_TRANS_MODE_SINGLE; + trans.win_du = 0; + trans.sign_ext = 0; + trans.end = DMA_TRANS_END_INTR; #ifdef TEST_ADDRESS_MODE @@ -162,64 +286,53 @@ int main(int argc, char *argv[]) PRINTF("\n\n\r===================================\n\n\r"); // Prepare the data - for (int i = 0; i < TEST_DATA_SIZE; i++) { - test_addr_4B_PTR[i] = &copied_data_4B[i*2]; + for (int i = 0; i < TEST_DATA_SIZE; i++) + { + test_addr_4B_PTR[i] = &copied_data_4B[i * 2]; } trans.mode = DMA_TRANS_MODE_ADDRESS; - res = dma_validate_transaction( &trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY ); - PRINTF("tran: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); - res = dma_load_transaction(&trans); - PRINTF("load: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); - res = dma_launch(&trans); - PRINTF("laun: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); - - while( ! dma_is_ready()) { - // disable_interrupts - // this does not prevent waking up the core as this is controlled by the MIP register - CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); - if ( dma_is_ready() == 0 ) { - wait_for_interrupt(); - //from here we wake up even if we did not jump to the ISR - } - CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); - } + RUN_DMA + WAIT_DMA PRINTF(">> Finished transaction. \n\r"); - for(uint32_t i = 0; i < trans.size_b >> 2; i++ ) { - if ( copied_data_4B[i*2] != test_data_4B[i] ) { - PRINTF("ERROR [%d]: %04x != %04x\n\r", i, copied_data_4B[i*2], test_data_4B[i]); + for (uint32_t i = 0; i < trans.size_b >> 2; i++) + { + if (copied_data_4B[i * 2] != test_data_4B[i]) + { + PRINTF("ERROR [%d]: %04x != %04x\n\r", i, copied_data_4B[i * 2], test_data_4B[i]); errors++; } } - if (errors == 0) { + if (errors == 0) + { PRINTF("DMA address mode success.\n\r"); - } else { - PRINTF("DMA address mode failure: %d errors out of %d bytes checked\n\r", errors, trans.size_b ); + } + else + { + PRINTF("DMA address mode failure: %d errors out of %d bytes checked\n\r", errors, trans.size_b); return EXIT_FAILURE; } trans.mode = DMA_TRANS_MODE_SINGLE; - #endif // TEST_ADDRESS_MODE -#ifndef TARGET_PYNQ_Z2 +#if defined(TARGET_SIM) || defined(TARGET_SYSTEMC) #ifdef TEST_ADDRESS_MODE_EXTERNAL_DEVICE -#pragma message ( "this application should not be ran in a system integrating x-heep as in the external \ - slave can be plugged something else than a slow memory as in our testbench" ) +#pragma message("this application should not be ran in a system integrating x-heep as in the external \ + slave can be plugged something else than a slow memory as in our testbench") - uint32_t* ext_test_addr_4B_PTR = EXT_SLAVE_START_ADDRESS; - uint32_t* ext_copied_data_4B; - - ext_copied_data_4B = &ext_test_addr_4B_PTR[TEST_DATA_SIZE+1]; + uint32_t *ext_test_addr_4B_PTR = EXT_SLAVE_START_ADDRESS; + uint32_t *ext_copied_data_4B; + ext_copied_data_4B = &ext_test_addr_4B_PTR[TEST_DATA_SIZE + 1]; tgt_addr.ptr = ext_test_addr_4B_PTR; trans.src_addr = &tgt_addr; @@ -229,53 +342,44 @@ int main(int argc, char *argv[]) PRINTF("\n\n\r=====================================\n\n\r"); // Prepare the data - for (int i = 0; i < TEST_DATA_SIZE; i++) { - ext_test_addr_4B_PTR[i] = &ext_copied_data_4B[i*2]; + for (int i = 0; i < TEST_DATA_SIZE; i++) + { + ext_test_addr_4B_PTR[i] = &ext_copied_data_4B[i * 2]; } trans.mode = DMA_TRANS_MODE_ADDRESS; - res = dma_validate_transaction( &trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY ); - PRINTF("tran: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); - res = dma_load_transaction(&trans); - PRINTF("load: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); - res = dma_launch(&trans); - PRINTF("laun: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); - - while( ! dma_is_ready()) { - // disable_interrupts - // this does not prevent waking up the core as this is controlled by the MIP register - CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); - if ( dma_is_ready() == 0 ) { - wait_for_interrupt(); - //from here we wake up even if we did not jump to the ISR - } - CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); - } + RUN_DMA + WAIT_DMA PRINTF(">> Finished transaction. \n\r"); - for(uint32_t i = 0; i < trans.size_b >> 2; i++ ) { - if ( ext_copied_data_4B[i*2] != test_data_4B[i] ) { + for (uint32_t i = 0; i < trans.size_b >> 2; i++) + { + if (ext_copied_data_4B[i * 2] != test_data_4B[i]) + { PRINTF("ERROR [%d]: %04x != %04x\n\r", i, ext_copied_data_4B[i], test_data_4B[i]); errors++; } } - if (errors == 0) { + if (errors == 0) + { PRINTF("DMA address mode in external memory success.\n\r"); - } else { - PRINTF("DMA address mode in external memory failure: %d errors out of %d bytes checked\n\r", errors, trans.size_b ); + } + else + { + PRINTF("DMA address mode in external memory failure: %d errors out of %d bytes checked\n\r", errors, trans.size_b); return EXIT_FAILURE; } trans.mode = DMA_TRANS_MODE_SINGLE; -#endif //TEST_ADDRESS_MODE_EXTERNAL_DEVICE +#endif // TEST_ADDRESS_MODE_EXTERNAL_DEVICE #else - #pragma message( "TEST_ADDRESS_MODE_EXTERNAL_DEVICE is not executed on PYNQ Z2" ) +#pragma message("TEST_ADDRESS_MODE_EXTERNAL_DEVICE is not executed on target different than TARGET_SIM") #endif #ifdef TEST_PENDING_TRANSACTION @@ -283,61 +387,71 @@ int main(int argc, char *argv[]) PRINTF(" TESTING MULTIPLE TRANSACTIONS "); PRINTF("\n\n\r===================================\n\n\r"); - for (uint32_t i = 0; i < TEST_DATA_LARGE; i++) { + for (uint32_t i = 0; i < TEST_DATA_LARGE; i++) + { test_data_large[i] = i; } - - tgt_src.ptr = test_data_large; + tgt_src.ptr = test_data_large; tgt_src.size_du = TEST_DATA_LARGE; + tgt_dst.size_du = TEST_DATA_LARGE; // trans.end = DMA_TRANS_END_INTR_WAIT; // This option makes no sense, because the launch is blocking the program until the trans finishes. trans.end = DMA_TRANS_END_INTR; // trans.end = DMA_TRANS_END_POLLING; - - res = dma_validate_transaction( &trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY ); - PRINTF("tran: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); + res = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY); + PRINTF("tran: %u \t%s\n\r", res, res == DMA_CONFIG_OK ? "Ok!" : "Error!"); cycles = 0; uint8_t consecutive_trans = 0; - for( uint8_t i = 0; i < TRANSACTIONS_N; i++ ){ - res = dma_load_transaction(&trans); + for (uint8_t i = 0; i < TRANSACTIONS_N; i++) + { + res = dma_load_transaction(&trans); res |= dma_launch(&trans); - if( res == DMA_CONFIG_OK ) consecutive_trans++; + if (res == DMA_CONFIG_OK) + consecutive_trans++; } - if( trans.end == DMA_TRANS_END_POLLING ){ - while( cycles < consecutive_trans ){ - while( ! dma_is_ready() ); + if (trans.end == DMA_TRANS_END_POLLING) + { + while (cycles < consecutive_trans) + { + while (!dma_is_ready()) + ; cycles++; } - } else { - while( cycles < consecutive_trans ){ + } + else + { + while (cycles < consecutive_trans) + { wait_for_interrupt(); } } PRINTF(">> Finished %d transactions. That is %s.\n\r", consecutive_trans, consecutive_trans > 1 ? "bad" : "good"); - - - for(int i=0; i + * + * Info: Example application of matrix manipulation by exploiting the 2D DMA. + * In this code, there are some optional features: + * - Verification of several matrix operations carried out by the 2D DMA + * - Performance comparison between the DMA and the CPU, obtained by performing similar matrix operations + * and monitoring the performance counter. + */ + +#include +#include +#include "dma.h" +#include "core_v_mini_mcu.h" +#include "x-heep.h" +#include "csr.h" +#include "rv_plic.h" +#include "test_data.h" + +/* + * This code contains four different tests that can be run by defining the corresponding TEST_ID_* macro. + * - Extract a NxM matrix, perform optional padding and copy it to a AxB matrix, using HALs + * - Extract a NxM matrix and copy its transposed version to AxB matrix, using HALs + * - Extract a 1xN matrix (array), perform optional padding and copy it to an array, using HALs + * - Extract a NxM matrix, perform optional padding and copy it to a AxB matrix, using direct register operations + */ + +#define TEST_ID_0 +#define TEST_ID_1 +#define TEST_ID_2 +#define TEST_ID_3 + +/* Enable performance analysis */ +#define EN_PERF 1 + +/* Enable verification */ +#define EN_VERIF 1 + +/* Parameters */ + +/* Size of the extracted matrix (including strides on the input, excluding strides on the outputs) */ +#define SIZE_EXTR_D1 10 +#define SIZE_EXTR_D2 10 + +/* Set strides of the input ad output matrix */ +#define STRIDE_IN_D1 1 +#define STRIDE_IN_D2 1 +#define STRIDE_OUT_D1 1 +#define STRIDE_OUT_D2 1 + +/* Set the padding parameters */ +#define TOP_PAD 1 +#define BOTTOM_PAD 1 +#define LEFT_PAD 1 +#define RIGHT_PAD 1 + +/* Macros for dimensions computation */ +#define OUT_D1_PAD ( SIZE_EXTR_D1 + LEFT_PAD + RIGHT_PAD ) +#define OUT_D2_PAD ( SIZE_EXTR_D2 + TOP_PAD + BOTTOM_PAD ) +#define OUT_D1_PAD_STRIDE ( (OUT_D1_PAD * STRIDE_OUT_D1) - (STRIDE_OUT_D1 - 1) ) +#define OUT_D2_PAD_STRIDE ( (OUT_D2_PAD * STRIDE_OUT_D2) - (STRIDE_OUT_D2 - 1) ) +#define OUT_DIM_1D ( OUT_D1_PAD_STRIDE ) +#define OUT_DIM_2D ( OUT_D1_PAD_STRIDE * OUT_D2_PAD_STRIDE ) + +/* Mask for the direct register operations example */ +#define DMA_CSR_REG_MIE_MASK (( 1 << 19 ) | (1 << 11 )) + +/* Transposition example def */ +#define TRANSPOSITION_EN 1 + +/* Pointer increments computation */ +#define SRC_INC_D1 STRIDE_IN_D1 +#define DST_INC_D1 STRIDE_OUT_D1 +#define SRC_INC_D2 (STRIDE_IN_D2 * SIZE_IN_D1 - (SIZE_EXTR_D1 - 1 + (STRIDE_IN_D1 - 1) * (SIZE_EXTR_D1 - 1))) +#define DST_INC_D2 ((STRIDE_OUT_D2 - 1) * OUT_DIM_1D + 1) +#define SRC_INC_TRSP_D1 SRC_INC_D1 +#define SRC_INC_TRSP_D2 (STRIDE_IN_D2 * SIZE_IN_D1) + +/* By default, printfs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 + +#if TARGET_SIM && PRINTF_IN_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA && !TARGET_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define PRINTF(...) +#endif + +dma_input_data_type copied_data_2D_DMA[OUT_DIM_2D]; +dma_input_data_type copied_data_1D_DMA[OUT_DIM_1D]; +dma_input_data_type copied_data_2D_CPU[OUT_DIM_2D]; +dma_input_data_type copied_data_1D_CPU[OUT_DIM_2D]; + +dma_config_flags_t res_valid, res_load, res_launch; + +dma *peri = dma_peri; + +dma_target_t tgt_src; +dma_target_t tgt_dst; +dma_trans_t trans; + +uint32_t dst_ptr = 0, src_ptr = 0; +uint32_t cycles_dma, cycles_cpu; +uint32_t size_dst_trans_d1; +uint32_t dst_stride_d1; +uint32_t dst_stride_d2; +uint32_t size_src_trans_d1; +uint32_t src_stride_d1; +uint32_t src_stride_d2; +uint32_t i_in; +uint32_t j_in; +uint32_t i_in_last; +uint16_t left_pad_cnt = 0; +uint16_t top_pad_cnt = 0; +uint8_t stride_1d_cnt = 0; +uint8_t stride_2d_cnt = 0; +char passed = 1; + +#ifdef TEST_ID_3 + +/* Function used to simplify register operations */ +static inline volatile void write_register( uint32_t p_val, + uint32_t p_offset, + uint32_t p_mask, + uint8_t p_sel, + dma* peri ) +{ + /* + * The index is computed to avoid needing to access the structure + * as a structure. + */ + uint8_t index = p_offset / sizeof(int); + + /* + * An intermediate variable "value" is used to prevent writing twice into + * the register. + */ + uint32_t value = (( uint32_t * ) peri ) [ index ]; + value &= ~( p_mask << p_sel ); + value |= (p_val & p_mask) << p_sel; + (( uint32_t * ) peri ) [ index ] = value; +}; +#endif + +int main() +{ + #ifdef TEST_ID_0 + + /* Testing copy and padding of a NxM matrix using HALs */ + + #if EN_PERF + + /* Reset the counter to evaluate the performance of the DMA */ + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + #endif + + tgt_src.ptr = test_data; + tgt_src.inc_du = SRC_INC_D1; + tgt_src.inc_d2_du = SRC_INC_D2; + tgt_src.size_du = SIZE_EXTR_D1; + tgt_src.size_d2_du = SIZE_EXTR_D2; + tgt_src.trig = DMA_TRIG_MEMORY; + tgt_src.type = DMA_DATA_TYPE; + + tgt_dst.ptr = copied_data_2D_DMA; + tgt_dst.inc_du = DST_INC_D1; + tgt_dst.inc_d2_du = DST_INC_D2; + tgt_dst.size_du = OUT_D1_PAD_STRIDE; + tgt_dst.size_d2_du = OUT_D2_PAD_STRIDE; + tgt_dst.trig = DMA_TRIG_MEMORY; + tgt_dst.type = DMA_DATA_TYPE; + + trans.src = &tgt_src; + trans.dst = &tgt_dst; + trans.mode = DMA_TRANS_MODE_SINGLE; + trans.dim = DMA_DIM_CONF_2D; + trans.pad_top_du = TOP_PAD, + trans.pad_bottom_du = BOTTOM_PAD, + trans.pad_left_du = LEFT_PAD, + trans.pad_right_du = RIGHT_PAD, + trans.win_du = 0, + trans.end = DMA_TRANS_END_INTR; + + dma_init(NULL); + + #if EN_PERF + + res_valid = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY); + res_load = dma_load_transaction(&trans); + res_launch = dma_launch(&trans); + + #else + + res_valid = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY); + PRINTF("tran: %u \t%s\n\r", res_valid, res_valid == DMA_CONFIG_OK ? "Ok!" : "Error!"); + res_load = dma_load_transaction(&trans); + PRINTF("load: %u \t%s\n\r", res_load, res_load == DMA_CONFIG_OK ? "Ok!" : "Error!"); + res_launch = dma_launch(&trans); + PRINTF("laun: %u \t%s\n\r", res_launch, res_launch == DMA_CONFIG_OK ? "Ok!" : "Error!"); + #endif + + while( ! dma_is_ready()) { + #if !EN_PERF + /* Disable_interrupts */ + /* This does not prevent waking up the core as this is controlled by the MIP register */ + + CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); + if ( dma_is_ready() == 0 ) { + wait_for_interrupt(); + /* From here the core wakes up even if we did not jump to the ISR */ + } + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); + #endif + } + + #if EN_PERF + + /* Read the cycles count after the DMA run */ + CSR_READ(CSR_REG_MCYCLE, &cycles_dma); + + /* Reset the performance counter to evaluate the CPU performance */ + CSR_SET_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + #endif + + #if EN_VERIF + + /* Run the same computation on the CPU */ + for (int i=0; i < OUT_D2_PAD_STRIDE; i++) + { + stride_1d_cnt = 0; + j_in = 0; + + for (int j=0; j < OUT_D1_PAD_STRIDE; j++) + { + dst_ptr = i * OUT_D1_PAD_STRIDE + j; + src_ptr = (i_in - top_pad_cnt ) * STRIDE_IN_D2 * SIZE_IN_D1 + (j_in - left_pad_cnt) * STRIDE_IN_D1; + if (i_in < TOP_PAD || i_in >= SIZE_EXTR_D2 + TOP_PAD || j_in < LEFT_PAD || j_in >= SIZE_EXTR_D1 + LEFT_PAD || + stride_1d_cnt != 0 || stride_2d_cnt != 0) + { + copied_data_2D_CPU[dst_ptr] = 0; + } + else + { + copied_data_2D_CPU[dst_ptr] = test_data[src_ptr]; + } + + if (j_in < LEFT_PAD && i_in >= TOP_PAD && stride_1d_cnt == 0 && stride_2d_cnt == 0) + { + left_pad_cnt++; + } + + if (stride_1d_cnt == STRIDE_OUT_D1 - 1) + { + stride_1d_cnt = 0; + j_in++; + } + else + { + stride_1d_cnt++; + } + + } + + if (i_in < TOP_PAD && stride_2d_cnt == 0) + { + top_pad_cnt++; + } + + if (stride_2d_cnt == STRIDE_OUT_D2 - 1) + { + stride_2d_cnt = 0; + i_in++; + } + else + { + stride_2d_cnt++; + } + + left_pad_cnt = 0; + } + #endif + + #if EN_PERF + + /* Read the cycles count after the CPU run */ + CSR_READ(CSR_REG_MCYCLE, &cycles_cpu); + PRINTF("DMA cycles: %d\n\r", cycles_dma); + PRINTF("CPU cycles: %d \n\r", cycles_cpu); + PRINTF("\n\r"); + + #endif + + #if EN_VERIF + + /* Verify that the DMA and the CPU outputs are the same */ + for (int i = 0; i < OUT_D2_PAD_STRIDE; i++) { + for (int j = 0; j < OUT_D1_PAD_STRIDE; j++) { + if (copied_data_2D_DMA[i * OUT_D1_PAD_STRIDE + j] != copied_data_2D_CPU[i * OUT_D1_PAD_STRIDE + j]) { + passed = 0; + } + } + } + + if (passed) { + PRINTF("Success test 0\n\n\r"); + } + else + { + PRINTF("Fail test 0\n\r"); + return EXIT_FAILURE; + } + #endif + + #endif + + /* Reset for second test */ + passed = 1; + i_in = 0; + j_in = 0; + left_pad_cnt = 0; + top_pad_cnt = 0; + stride_1d_cnt = 0; + stride_2d_cnt = 0; + + for (int i = 0; i < OUT_DIM_2D; i++) { + copied_data_2D_DMA[i] = 0; + copied_data_2D_CPU[i] = 0; + } + + #ifdef TEST_ID_1 + + /* Testing transposition and copy of a NxM matrix using HALs */ + + #if EN_PERF + + /* Reset the counter to evaluate the performance of the DMA */ + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + #endif + + tgt_src.ptr = &test_data[0]; + tgt_src.inc_du = SRC_INC_TRSP_D1; + tgt_src.inc_d2_du = SRC_INC_TRSP_D2; + tgt_src.size_du = SIZE_EXTR_D1; + tgt_src.size_d2_du = SIZE_EXTR_D2; + tgt_src.trig = DMA_TRIG_MEMORY; + tgt_src.type = DMA_DATA_TYPE; + + tgt_dst.ptr = &copied_data_2D_DMA[0]; + tgt_dst.inc_du = DST_INC_D1; + tgt_dst.inc_d2_du = DST_INC_D2; + tgt_dst.trig = DMA_TRIG_MEMORY; + + trans.src = &tgt_src; + trans.dst = &tgt_dst; + trans.mode = DMA_TRANS_MODE_SINGLE; + trans.dim = DMA_DIM_CONF_2D; + trans.pad_top_du = TOP_PAD; + trans.pad_bottom_du = BOTTOM_PAD; + trans.pad_left_du = LEFT_PAD; + trans.pad_right_du = RIGHT_PAD; + trans.dim_inv = TRANSPOSITION_EN; + trans.win_du = 0, + trans.end = DMA_TRANS_END_INTR; + + dma_init(NULL); + + #if EN_PERF + + res_valid = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY); + res_load = dma_load_transaction(&trans); + res_launch = dma_launch(&trans); + + #else + + res_valid = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY); + PRINTF("tran: %u \t%s\n\r", res_valid, res_valid == DMA_CONFIG_OK ? "Ok!" : "Error!"); + res_load = dma_load_transaction(&trans); + PRINTF("load: %u \t%s\n\r", res_load, res_load == DMA_CONFIG_OK ? "Ok!" : "Error!"); + res_launch = dma_launch(&trans); + PRINTF("laun: %u \t%s\n\r", res_launch, res_launch == DMA_CONFIG_OK ? "Ok!" : "Error!"); + #endif + + while( ! dma_is_ready()) { + #if !EN_PERF + /* Disable_interrupts */ + /* This does not prevent waking up the core as this is controlled by the MIP register */ + + CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); + if ( dma_is_ready() == 0 ) { + wait_for_interrupt(); + /* From here the core wakes up even if we did not jump to the ISR */ + } + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); + #endif + } + + #if EN_PERF + + /* Read the cycles count after the DMA run */ + CSR_READ(CSR_REG_MCYCLE, &cycles_dma); + + /* Reset the performance counter to evaluate the CPU performance */ + CSR_SET_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + #endif + + #if EN_VERIF + + /* Run the same computation on the CPU */ + for (int i=0; i < OUT_D2_PAD_STRIDE; i++) + { + stride_1d_cnt = 0; + j_in = 0; + + for (int j=0; j < OUT_D1_PAD_STRIDE; j++) + { + dst_ptr = i * OUT_D1_PAD_STRIDE + j; + src_ptr = (j_in - left_pad_cnt) * STRIDE_IN_D2 * SIZE_IN_D1 + (i_in - top_pad_cnt ) * STRIDE_IN_D1; + if (i_in < TOP_PAD || i_in >= SIZE_EXTR_D2 + TOP_PAD || j_in < LEFT_PAD || j_in >= SIZE_EXTR_D1 + LEFT_PAD || + stride_1d_cnt != 0 || stride_2d_cnt != 0) + { + copied_data_2D_CPU[dst_ptr] = 0; + } + else + { + copied_data_2D_CPU[dst_ptr] = test_data[src_ptr]; + } + + if (j_in < LEFT_PAD && i_in >= TOP_PAD && stride_1d_cnt == 0 && stride_2d_cnt == 0) + { + left_pad_cnt++; + } + + if (stride_1d_cnt == STRIDE_OUT_D1 - 1) + { + stride_1d_cnt = 0; + j_in++; + } + else + { + stride_1d_cnt++; + } + + } + + if (i_in < TOP_PAD && stride_2d_cnt == 0) + { + top_pad_cnt++; + } + + if (stride_2d_cnt == STRIDE_OUT_D2 - 1) + { + stride_2d_cnt = 0; + i_in++; + } + else + { + stride_2d_cnt++; + } + + left_pad_cnt = 0; + } + #endif + + #if EN_PERF + + /* Read the cycles count after the CPU run */ + CSR_READ(CSR_REG_MCYCLE, &cycles_cpu); + PRINTF("DMA cycles: %d\n\r", cycles_dma); + PRINTF("CPU cycles: %d \n\r", cycles_cpu); + PRINTF("\n\r"); + + #endif + + #if EN_VERIF + + /* Verify that the DMA and the CPU outputs are the same */ + for (int i = 0; i < OUT_D2_PAD_STRIDE; i++) { + for (int j = 0; j < OUT_D1_PAD_STRIDE; j++) { + if (copied_data_2D_DMA[i * OUT_D1_PAD_STRIDE + j] != copied_data_2D_CPU[i * OUT_D1_PAD_STRIDE + j]) { + passed = 0; + } + } + } + + if (passed) { + PRINTF("Success test 1\n\n\r"); + } + else + { + PRINTF("Fail test 1\n\r"); + return EXIT_FAILURE; + } + #endif + + #endif + + /* Reset for third test */ + passed = 1; + i_in = 0; + j_in = 0; + left_pad_cnt = 0; + top_pad_cnt = 0; + stride_1d_cnt = 0; + stride_2d_cnt = 0; + + #ifdef TEST_ID_2 + + /* Testing copy and padding of a 1xN matrix (an array) */ + + #if EN_PERF + + /* Reset the counter to evaluate the performance of the DMA */ + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + #endif + + tgt_src.ptr = &test_data[0]; + tgt_src.inc_du = SRC_INC_D1; + tgt_src.size_du = SIZE_EXTR_D1; + tgt_src.inc_d2_du = 0; + tgt_src.size_d2_du = 0; + tgt_src.trig = DMA_TRIG_MEMORY; + tgt_src.type = DMA_DATA_TYPE; + + tgt_dst.ptr = copied_data_1D_DMA; + tgt_dst.inc_du = DST_INC_D1; + tgt_dst.inc_d2_du = 0; + tgt_dst.trig = DMA_TRIG_MEMORY; + + trans.src = &tgt_src; + trans.dst = &tgt_dst; + trans.mode = DMA_TRANS_MODE_SINGLE; + trans.dim = DMA_DIM_CONF_1D; + trans.pad_top_du = 0; + trans.pad_bottom_du = 0; + trans.pad_left_du = LEFT_PAD; + trans.pad_right_du = RIGHT_PAD; + trans.dim_inv = 0; + trans.win_du = 0; + trans.end = DMA_TRANS_END_INTR; + + dma_init(NULL); + + #if EN_PERF + + res_valid = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY); + res_load = dma_load_transaction(&trans); + res_launch = dma_launch(&trans); + + #else + + res_valid = dma_validate_transaction(&trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY); + PRINTF("tran: %u \t%s\n\r", res_valid, res_valid == DMA_CONFIG_OK ? "Ok!" : "Error!"); + res_load = dma_load_transaction(&trans); + PRINTF("load: %u \t%s\n\r", res_load, res_load == DMA_CONFIG_OK ? "Ok!" : "Error!"); + res_launch = dma_launch(&trans); + PRINTF("laun: %u \t%s\n\r", res_launch, res_launch == DMA_CONFIG_OK ? "Ok!" : "Error!"); + #endif + + while( ! dma_is_ready()) { + #if !EN_PERF + /* Disable_interrupts */ + + CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); + if ( dma_is_ready() == 0 ) { + wait_for_interrupt(); + } + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); + #endif + } + + #if EN_PERF + + /* Read the cycles count after the DMA run */ + CSR_READ(CSR_REG_MCYCLE, &cycles_dma); + + /* Reset the performance counter to evaluate the CPU performance */ + CSR_SET_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + #endif + + #if EN_VERIF + + /* Run the same computation on the CPU */ + for (int j=0; j < OUT_D1_PAD_STRIDE; j++) + { + dst_ptr = j; + src_ptr = (j_in - left_pad_cnt) * STRIDE_IN_D1; + + if (j_in < LEFT_PAD || j_in >= SIZE_EXTR_D1 + LEFT_PAD || + stride_1d_cnt != 0) + { + copied_data_1D_CPU[dst_ptr] = 0; + } + else + { + copied_data_1D_CPU[dst_ptr] = test_data[src_ptr]; + } + + if (j_in < LEFT_PAD && stride_1d_cnt == 0) + { + left_pad_cnt++; + } + + if (stride_1d_cnt == STRIDE_OUT_D1 - 1) + { + stride_1d_cnt = 0; + j_in++; + } + else + { + stride_1d_cnt++; + } + } + + #endif + + #if EN_PERF + + /* Read the cycles count after the CPU run */ + CSR_READ(CSR_REG_MCYCLE, &cycles_cpu); + + PRINTF("DMA cycles: %d\n\r", cycles_dma); + PRINTF("CPU cycles: %d \n\r", cycles_cpu); + PRINTF("\n\r"); + #endif + + #if EN_VERIF + + /* Verify that the DMA and the CPU outputs are the same */ + for (int i = 0; i < OUT_D1_PAD_STRIDE; i++) { + if (copied_data_1D_DMA[i] != copied_data_1D_CPU[i]) { + passed = 0; + } + } + + if (passed) { + PRINTF("Success test 2\n\n\r"); + } + else + { + PRINTF("Fail test 2\n\r"); + return EXIT_FAILURE; + } + #endif + + #endif + + /* Reset for fourth test */ + passed = 1; + i_in = 0; + j_in = 0; + left_pad_cnt = 0; + top_pad_cnt = 0; + stride_1d_cnt = 0; + stride_2d_cnt = 0; + for (int i = 0; i < OUT_DIM_2D; i++) { + copied_data_2D_DMA[i] = 0; + copied_data_2D_CPU[i] = 0; + } + + #ifdef TEST_ID_3 + + /* Testing copy and padding of a NxM matrix using direct register operations. + * This strategy allows for maximum performance but doesn't perform any checks on the data integrity. + */ + + #if EN_PERF + + /* Reset the counter to evaluate the performance of the DMA */ + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + #endif + + /* The DMA is initialized (i.e. Any current transaction is cleaned.) */ + dma_init(NULL); + + /* Enable the DMA interrupt logic */ + write_register( 0x1, + DMA_INTERRUPT_EN_REG_OFFSET, + 0xffff, + DMA_INTERRUPT_EN_TRANSACTION_DONE_BIT, + peri ); + + /* Enable global interrupts */ + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); + + /* Enable fast interrupts */ + CSR_SET_BITS(CSR_REG_MIE, DMA_CSR_REG_MIE_MASK); + + /* Pointer set up */ + peri->SRC_PTR = &test_data[0]; + peri->DST_PTR = copied_data_2D_DMA; + + /* Dimensionality configuration */ + write_register( 0x1, + DMA_DIM_CONFIG_REG_OFFSET, + 0xffff, + DMA_DIM_CONFIG_DMA_DIM_BIT, + peri ); + + /* Operation mode configuration */ + write_register( DMA_TRANS_MODE_SINGLE, + DMA_MODE_REG_OFFSET, + DMA_MODE_MODE_MASK, + DMA_MODE_MODE_OFFSET, + peri ); + + /* Data type configuration */ + write_register( DMA_DATA_TYPE, + DMA_DST_DATA_TYPE_REG_OFFSET, + DMA_DST_DATA_TYPE_DATA_TYPE_MASK, + DMA_DST_DATA_TYPE_DATA_TYPE_OFFSET, + peri ); + write_register( DMA_DATA_TYPE, + DMA_SRC_DATA_TYPE_REG_OFFSET, + DMA_SRC_DATA_TYPE_DATA_TYPE_MASK, + DMA_SRC_DATA_TYPE_DATA_TYPE_OFFSET, + peri ); + + /* Set the source strides */ + write_register( SRC_INC_D1 * DMA_DATA_TYPE_2_SIZE(DMA_DATA_TYPE), + DMA_SRC_PTR_INC_D1_REG_OFFSET, + DMA_SRC_PTR_INC_D1_INC_MASK, + DMA_SRC_PTR_INC_D1_INC_OFFSET, + peri ); + + write_register( SRC_INC_D2 * DMA_DATA_TYPE_2_SIZE(DMA_DATA_TYPE), + DMA_SRC_PTR_INC_D2_REG_OFFSET, + DMA_SRC_PTR_INC_D2_INC_MASK, + DMA_SRC_PTR_INC_D2_INC_OFFSET, + peri ); + + write_register( DST_INC_D1 * DMA_DATA_TYPE_2_SIZE( DMA_DATA_TYPE), + DMA_DST_PTR_INC_D1_REG_OFFSET, + DMA_DST_PTR_INC_D1_INC_MASK, + DMA_DST_PTR_INC_D1_INC_OFFSET, + peri ); + + write_register( DST_INC_D2 * DMA_DATA_TYPE_2_SIZE( DMA_DATA_TYPE), + DMA_DST_PTR_INC_D2_REG_OFFSET, + DMA_DST_PTR_INC_D2_INC_MASK, + DMA_DST_PTR_INC_D2_INC_OFFSET, + peri ); + + /* Padding configuration */ + write_register( TOP_PAD * DMA_DATA_TYPE_2_SIZE( DMA_DATA_TYPE), + DMA_PAD_TOP_REG_OFFSET, + DMA_PAD_TOP_PAD_MASK, + DMA_PAD_TOP_PAD_OFFSET, + peri ); + + write_register( RIGHT_PAD * DMA_DATA_TYPE_2_SIZE( DMA_DATA_TYPE), + DMA_PAD_RIGHT_REG_OFFSET, + DMA_PAD_RIGHT_PAD_MASK, + DMA_PAD_RIGHT_PAD_OFFSET, + peri ); + + write_register( LEFT_PAD * DMA_DATA_TYPE_2_SIZE( DMA_DATA_TYPE), + DMA_PAD_LEFT_REG_OFFSET, + DMA_PAD_LEFT_PAD_MASK, + DMA_PAD_LEFT_PAD_OFFSET, + peri ); + + write_register( BOTTOM_PAD * DMA_DATA_TYPE_2_SIZE( DMA_DATA_TYPE), + DMA_PAD_BOTTOM_REG_OFFSET, + DMA_PAD_BOTTOM_PAD_MASK, + DMA_PAD_BOTTOM_PAD_OFFSET, + peri ); + + /* Set the sizes */ + + write_register( SIZE_EXTR_D2 * DMA_DATA_TYPE_2_SIZE( DMA_DATA_TYPE), + DMA_SIZE_D2_REG_OFFSET, + DMA_SIZE_D2_SIZE_MASK, + DMA_SIZE_D2_SIZE_OFFSET, + peri ); + + write_register( SIZE_EXTR_D1 * DMA_DATA_TYPE_2_SIZE( DMA_DATA_TYPE), + DMA_SIZE_D1_REG_OFFSET, + DMA_SIZE_D1_SIZE_MASK, + DMA_SIZE_D1_SIZE_OFFSET, + peri ); + + while( ! dma_is_ready()) { + #if !EN_PERF + /* Disable_interrupts */ + /* This does not prevent waking up the core as this is controlled by the MIP register */ + + CSR_CLEAR_BITS(CSR_REG_MSTATUS, 0x8); + if ( dma_is_ready() == 0 ) { + wait_for_interrupt(); + /* From here the core wakes up even if we did not jump to the ISR */ + } + CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); + #endif + } + + #if EN_PERF + + /* Read the cycles count after the DMA run */ + CSR_READ(CSR_REG_MCYCLE, &cycles_dma); + + /* Reset the performance counter to evaluate the CPU performance */ + CSR_SET_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + #endif + + #if EN_VERIF + + /* Run the same computation on the CPU */ + for (int i=0; i < OUT_D2_PAD_STRIDE; i++) + { + stride_1d_cnt = 0; + j_in = 0; + + for (int j=0; j < OUT_D1_PAD_STRIDE; j++) + { + dst_ptr = i * OUT_D1_PAD_STRIDE + j; + src_ptr = (i_in - top_pad_cnt ) * STRIDE_IN_D2 * SIZE_IN_D1 + (j_in - left_pad_cnt) * STRIDE_IN_D1; + if (i_in < TOP_PAD || i_in >= SIZE_EXTR_D2 + TOP_PAD || j_in < LEFT_PAD || j_in >= SIZE_EXTR_D1 + LEFT_PAD || + stride_1d_cnt != 0 || stride_2d_cnt != 0) + { + copied_data_2D_CPU[dst_ptr] = 0; + } + else + { + copied_data_2D_CPU[dst_ptr] = test_data[src_ptr]; + } + + if (j_in < LEFT_PAD && i_in >= TOP_PAD && stride_1d_cnt == 0 && stride_2d_cnt == 0) + { + left_pad_cnt++; + } + + if (stride_1d_cnt == STRIDE_OUT_D1 - 1) + { + stride_1d_cnt = 0; + j_in++; + } + else + { + stride_1d_cnt++; + } + + } + + if (i_in < TOP_PAD && stride_2d_cnt == 0) + { + top_pad_cnt++; + } + + if (stride_2d_cnt == STRIDE_OUT_D2 - 1) + { + stride_2d_cnt = 0; + i_in++; + } + else + { + stride_2d_cnt++; + } + + left_pad_cnt = 0; + } + + #endif + + #if EN_PERF + + /* Read the cycles count after the CPU run */ + CSR_READ(CSR_REG_MCYCLE, &cycles_cpu); + + PRINTF("DMA cycles: %d\n\r", cycles_dma); + PRINTF("CPU cycles: %d \n\r", cycles_cpu); + PRINTF("\n\r"); + #endif + + #if EN_VERIF + + /* Verify that the DMA and the CPU outputs are the same */ + for (int i = 0; i < OUT_D2_PAD_STRIDE; i++) { + for (int j = 0; j < OUT_D1_PAD_STRIDE; j++) { + if (copied_data_2D_DMA[i * OUT_D1_PAD_STRIDE + j] != copied_data_2D_CPU[i * OUT_D1_PAD_STRIDE + j]) { + passed = 0; + } + } + } + + if (passed) { + PRINTF("Success test 3\n\n\r"); + } + else + { + PRINTF("Fail test 3\n\r"); + return EXIT_FAILURE; + } + #endif + + #endif + + return EXIT_SUCCESS; +} \ No newline at end of file diff --git a/sw/applications/example_dma_2d/test_data.h b/sw/applications/example_dma_2d/test_data.h new file mode 100644 index 000000000..d12322237 --- /dev/null +++ b/sw/applications/example_dma_2d/test_data.h @@ -0,0 +1,40 @@ +#ifndef __TEST_DATA_H__ +#define __TEST_DATA_H__ + +#define SIZE_IN_D1 25 +#define SIZE_IN_D2 25 +#define DMA_DATA_TYPE DMA_DATA_TYPE_WORD + +/* Change the input datatype depending on the DMA_DATA_TYPE + * The test data has been generated using byte as datatype, so it's possible to use both uint8_t, uint16_t and uint32_t + */ +typedef uint32_t dma_input_data_type; + +dma_input_data_type test_data[SIZE_IN_D1 * SIZE_IN_D2] = { + 93 ,178 ,28 ,23 ,5 ,231 ,211 ,236 ,45 ,196 ,55 ,124 ,113 ,188 ,36 ,43 ,147 ,111 ,254 ,126 ,145 ,83 ,77 ,7 ,126, + 92 ,179 ,157 ,41 ,64 ,7 ,105 ,93 ,176 ,196 ,171 ,42 ,251 ,175 ,120 ,63 ,134 ,56 ,233 ,0 ,189 ,133 ,71 ,226 ,9, + 252 ,214 ,243 ,177 ,205 ,75 ,100 ,165 ,160 ,42 ,112 ,71 ,238 ,208 ,62 ,1 ,113 ,87 ,116 ,212 ,107 ,165 ,214 ,239 ,151, + 215 ,3 ,80 ,243 ,0 ,18 ,30 ,149 ,237 ,35 ,81 ,149 ,227 ,23 ,240 ,13 ,117 ,122 ,158 ,253 ,120 ,175 ,203 ,30 ,204, + 45 ,18 ,176 ,241 ,38 ,232 ,189 ,208 ,50 ,120 ,4 ,47 ,136 ,141 ,154 ,207 ,180 ,160 ,46 ,191 ,48 ,29 ,122 ,30 ,45, + 56 ,8 ,137 ,197 ,185 ,210 ,103 ,123 ,116 ,25 ,57 ,126 ,41 ,213 ,165 ,17 ,111 ,151 ,39 ,115 ,21 ,59 ,220 ,165 ,142, + 175 ,169 ,150 ,41 ,68 ,241 ,228 ,121 ,125 ,163 ,16 ,244 ,157 ,8 ,205 ,148 ,194 ,115 ,7 ,59 ,164 ,127 ,88 ,184 ,215, + 13 ,6 ,102 ,204 ,29 ,15 ,182 ,125 ,141 ,251 ,172 ,13 ,159 ,196 ,93 ,200 ,211 ,42 ,10 ,154 ,105 ,82 ,109 ,175 ,117, + 115 ,109 ,201 ,196 ,242 ,139 ,178 ,191 ,190 ,181 ,140 ,197 ,233 ,34 ,69 ,20 ,193 ,35 ,243 ,11 ,41 ,131 ,196 ,8 ,133, + 70 ,234 ,210 ,171 ,107 ,57 ,133 ,162 ,114 ,168 ,118 ,250 ,12 ,30 ,223 ,95 ,246 ,122 ,73 ,220 ,247 ,6 ,102 ,214 ,108, + 48 ,55 ,22 ,243 ,241 ,45 ,147 ,32 ,105 ,25 ,185 ,22 ,41 ,2 ,5 ,82 ,221 ,237 ,223 ,162 ,77 ,95 ,62 ,198 ,97, + 206 ,210 ,61 ,7 ,163 ,142 ,20 ,215 ,35 ,92 ,232 ,88 ,52 ,207 ,137 ,234 ,123 ,251 ,214 ,221 ,23 ,19 ,51 ,245 ,188, + 251 ,139 ,176 ,240 ,126 ,29 ,247 ,228 ,248 ,164 ,14 ,198 ,143 ,15 ,178 ,72 ,238 ,220 ,145 ,7 ,253 ,233 ,245 ,32 ,95, + 142 ,30 ,227 ,66 ,67 ,177 ,47 ,2 ,87 ,155 ,74 ,255 ,1 ,69 ,157 ,181 ,73 ,57 ,60 ,39 ,64 ,93 ,146 ,4 ,220, + 129 ,219 ,109 ,159 ,65 ,112 ,162 ,145 ,241 ,59 ,55 ,21 ,12 ,196 ,239 ,239 ,31 ,58 ,148 ,215 ,241 ,109 ,72 ,108 ,61, + 178 ,205 ,116 ,33 ,240 ,137 ,150 ,150 ,148 ,80 ,211 ,87 ,46 ,160 ,64 ,9 ,179 ,221 ,91 ,113 ,87 ,132 ,141 ,70 ,95, + 104 ,62 ,121 ,12 ,149 ,108 ,197 ,154 ,51 ,247 ,78 ,121 ,186 ,124 ,140 ,138 ,155 ,117 ,221 ,55 ,233 ,1 ,61 ,190 ,220, + 123 ,200 ,239 ,89 ,200 ,167 ,191 ,121 ,24 ,249 ,145 ,189 ,15 ,249 ,235 ,165 ,243 ,239 ,102 ,41 ,62 ,159 ,45 ,248 ,28, + 128 ,200 ,95 ,240 ,148 ,118 ,168 ,156 ,62 ,88 ,102 ,14 ,197 ,252 ,135 ,54 ,170 ,249 ,133 ,250 ,172 ,67 ,60 ,35 ,246, + 7 ,9 ,7 ,181 ,49 ,60 ,239 ,70 ,33 ,29 ,132 ,112 ,37 ,28 ,34 ,233 ,37 ,178 ,40 ,20 ,189 ,43 ,45 ,65 ,194, + 104 ,43 ,31 ,59 ,49 ,157 ,15 ,198 ,205 ,47 ,201 ,88 ,49 ,199 ,55 ,223 ,43 ,13 ,118 ,225 ,175 ,94 ,222 ,236 ,10, + 157 ,75 ,239 ,221 ,34 ,14 ,26 ,232 ,18 ,240 ,198 ,3 ,23 ,226 ,110 ,118 ,172 ,0 ,17 ,210 ,136 ,226 ,223 ,162 ,169, + 4 ,35 ,179 ,115 ,9 ,54 ,89 ,178 ,2 ,108 ,123 ,178 ,61 ,107 ,228 ,73 ,70 ,46 ,236 ,102 ,179 ,182 ,49 ,130 ,229, + 72 ,159 ,179 ,125 ,24 ,152 ,13 ,104 ,186 ,2 ,174 ,204 ,191 ,241 ,158 ,84 ,96 ,184 ,19 ,171 ,135 ,101 ,27 ,218 ,217, + 211 ,180 ,172 ,64 ,213 ,56 ,76 ,16 ,82 ,205 ,105 ,165 ,185 ,61 ,19 ,158 ,252 ,192 ,135 ,110 ,246 ,84 ,2 ,18 ,72}; + +#endif \ No newline at end of file diff --git a/sw/applications/example_dma_external/main.c b/sw/applications/example_dma_external/main.c index a4fa35a72..6afa8ab6f 100644 --- a/sw/applications/example_dma_external/main.c +++ b/sw/applications/example_dma_external/main.c @@ -20,7 +20,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_ext_memory/main.c b/sw/applications/example_ext_memory/main.c new file mode 100644 index 000000000..8af03ad46 --- /dev/null +++ b/sw/applications/example_ext_memory/main.c @@ -0,0 +1,128 @@ +// Copyright EPFL contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#include +#include +#include +#include "core_v_mini_mcu.h" +#include "x-heep.h" + +#define BUFF_LEN 100 + +uint32_t buffer_rnd_index[BUFF_LEN]; + +#ifdef TARGET_SYSTEMC +//make app PROJECT=example_ext_memory TARGET=systemc +#define CACHE_FLUSH 1 +#define CACHE_BYPASS 2 +#define CACHE_SIZE 4*1024 +#endif + +#define MEMORY_SIZE 32*1024 +#define MEMORY_ADDR_MASK 0x7FFF +#define MEMORY_MAX_WORD_INDEX (MEMORY_SIZE/4) + +int is_in_array(uint32_t number, uint32_t* array, int N ) { + + for (int i=0;i + + Info: Contains randomly generated input activations and the golden result of the im2col algorithm. +*/ + +#include "im2colGolden.h" + +const uint32_t input_image_nchw[48] = { + 13932, 24003, 46802, 9895, + 46807, 33972, 44507, 1507, + 14638, 51479, 39560, 22725, + 38212, 35631, 40479, 39503, + 53705, 5796, 58640, 51585, + 45069, 32035, 41983, 18828, + 22247, 54792, 20499, 6640, + 20565, 25501, 4154, 2925, + 43660, 10618, 52141, 45092, + 46500, 63085, 57079, 16974, + 52033, 46977, 35992, 6933, + 3158, 21127, 28588, 61815 +}; + +const uint32_t golden_im2col_nchw[108] = { + 0, 0, 0, 0, 33972, 1507, 0, 35631, 39503, + 0, 0, 0, 46807, 44507, 0, 38212, 40479, 0, + 0, 24003, 9895, 0, 51479, 22725, 0, 0, 0, + 13932, 46802, 0, 14638, 39560, 0, 0, 0, 0, + 0, 0, 0, 0, 32035, 18828, 0, 25501, 2925, + 0, 0, 0, 45069, 41983, 0, 20565, 4154, 0, + 0, 5796, 51585, 0, 54792, 6640, 0, 0, 0, + 53705, 58640, 0, 22247, 20499, 0, 0, 0, 0, + 0, 0, 0, 0, 63085, 16974, 0, 21127, 61815, + 0, 0, 0, 46500, 57079, 0, 3158, 28588, 0, + 0, 10618, 45092, 0, 46977, 6933, 0, 0, 0, + 43660, 52141, 0, 52033, 35992, 0, 0, 0, 0 +}; + +const uint32_t input_image_nhwc[48] = { + 4047, 16986, 10416, + 22393, 36967, 57252, + 30217, 40720, 42651, + 3810, 4754, 56157, + 44724, 26083, 1010, + 44426, 14005, 35222, + 47712, 1887, 65, + 37412, 50137, 2236, + 7582, 53150, 12696, + 24415, 40340, 26558, + 22643, 14656, 7085, + 804, 32415, 17930, + 47706, 3314, 2947, + 19673, 37744, 24015, + 55137, 1975, 54009, + 25888, 50886, 35445 +}; + +const uint32_t golden_im2col_nhwc[108] = { + 0, 0, 0, 4047, 0, 0, 0, 16986, 0, 0, 0, 10416, + 0, 0, 22393, 30217, 0, 0, 36967, 40720, 0, 0, 57252, 42651, + 0, 0, 3810, 0, 0, 0, 4754, 0, 0, 0, 56157, 0, + 0, 44724, 0, 7582, 0, 26083, 0, 53150, 0, 1010, 0, 12696, + 44426, 47712, 24415, 22643, 14005, 1887, 40340, 14656, 35222, 65, 26558, 7085, + 37412, 0, 804, 0, 50137, 0, 32415, 0, 2236, 0, 17930, 0, + 0, 47706, 0, 0, 0, 3314, 0, 0, 0, 2947, 0, 0, + 19673, 55137, 0, 0, 37744, 1975, 0, 0, 24015, 54009, 0, 0, + 25888, 0, 0, 0, 50886, 0, 0, 0, 35445, 0, 0, 0 +}; diff --git a/sw/applications/example_im2col/im2colGolden.h b/sw/applications/example_im2col/im2colGolden.h new file mode 100644 index 000000000..27083aad1 --- /dev/null +++ b/sw/applications/example_im2col/im2colGolden.h @@ -0,0 +1,31 @@ +/* + Copyright EPFL contributors. + Licensed under the Apache License, Version 2.0, see LICENSE for details. + SPDX-License-Identifier: Apache-2.0 + + Author: Tommaso Terzano + + Info: Header file of im2colGolden, contains activations parameters and the prototypes of both input tensors and golden output. +*/ + +#ifndef IMAGE_AND_COL_H +#define IMAGE_AND_COL_H + +#include + +// Parameters +#define IW 4 +#define IH 4 +#define CH 3 +#define FW 2 +#define FH 2 +#define STRIDES 2 +#define PAD 1 +#define BATCH 1 + +extern const uint32_t input_image_nchw[48]; +extern const uint32_t golden_im2col_nchw[108]; +extern const uint32_t input_image_nhwc[48]; +extern const uint32_t golden_im2col_nhwc[108]; + +#endif diff --git a/sw/applications/example_im2col/im2col_lib.c b/sw/applications/example_im2col/im2col_lib.c new file mode 100644 index 000000000..ae4d11604 --- /dev/null +++ b/sw/applications/example_im2col/im2col_lib.c @@ -0,0 +1,211 @@ +/* + Copyright EPFL contributors. + Licensed under the Apache License, Version 2.0, see LICENSE for details. + SPDX-License-Identifier: Apache-2.0 + + Author: Tommaso Terzano + + Info: im2col_lib.c describes functions used to calculate im2col and verify it using + the golden result in im2colGolden.c. + + Notes: im2col_nchw_int32() and im2col_nhwc_int32() algorithms are inspired from the library SHL, developed by T-HEAD Semi. + For reference, check out the following link: + https://github.com/T-head-Semi/csi-nn2/blob/main/source/reference/im2col.c +*/ + +#include "im2col_lib.h" + +int output_data[OH_NCHW*OW_NCHW]; + +int im2col_nchw_int32() +{ + PRINTF("OH: %d, OW: %d\n", OH_NCHW, OW_NCHW); + + int size_transfer = 0; + int im_row = 0; + int im_col = 0; + int w_offset = 0; // the offset ALONG the IW + int h_offset = 0; // the offset ALONG the IH + int im_c = 0; // Gets the CH on which the im2col is being performed depending on the row of the output image (c) + int col_index = 0; + + // Iterate over each row of the output matrix. + for (int c = 0; c < CH_COL; ++c) { + // Calculate offsets within the kernel window. + // These are used to move the filter around the input image + + w_offset = c % FW; + h_offset = (c / FW) % FH; + im_c = c / (FH * FW); // Gets the CH on which the im2col is being performed depending on the row of the output image (c) + + // Iterate over each BATCH. + for (int b = 0; b < BATCH; ++b) { + // Iterate over each patch on the IW of the input matrix. + for (int h = 0; h < N_PATCHES_H; ++h) { + // Iterate over each patch on the heigth in the output matrix. + for (int w = 0; w < N_PATCHES_W; ++w) { + // Calculate the row and column indices in the original input image, applying the stride and offset. + im_row = h_offset + h * STRIDES - PAD; + im_col = w_offset + w * STRIDES - PAD; + + // Calculate the index in the flattened output array where this value should be stored. + col_index = ((c * BATCH + b) * N_PATCHES_H + h) * N_PATCHES_W + w; + + // If the calculated indices are outside the bounds of the input image, set the output to 0 (padding effect). + // Otherwise, fetch the value from the input image and store it in the output array. + if (im_row < 0 || im_col < 0 || im_row >= IH || im_col >= IW) { + output_data[col_index] = 0; + } else { + output_data[col_index] = input_image_nchw[get_index(CH, IH, IW, b, im_c, im_row, im_col)]; + } + } + } + } + } + + // Finished! + + PRINTF("Final output matrix:\n\n"); + + #if DEBUG + for (int i=0; i= IH || im_col >= IW) { + output_data[col_index] = 0; + } else { + output_data[col_index] = input_image_nhwc[get_index(IH, IW, CH, b, im_row, im_col, im_c)]; + } + } + } + } + } + + PRINTF("Final output matrix:\n\n"); + + #if DEBUG + for (int i=0; i + + Info: Header file of im2col_lib.c, containing the function prototypes, parameters macros and the configuration of prints and performance analysis. +*/ + +#ifndef _IM2COL_ +#define _IM2COL_ + +#include +#include +#include +#include "im2colGolden.h" +#include "dma.h" +#include "core_v_mini_mcu.h" +#include "x-heep.h" +#include "rv_plic.h" +#include "csr.h" + +// By default, printfs are activated for FPGA and for simulation. +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 +#define DEBUG 0 // Set to 1 to enable debug prints +#define TIMING 0 // Set to 1 to enable timing measurements + +// Format is defined in im2colGolden.h + +#if TARGET_SIM && PRINTF_IN_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) + #define PRINTF_DEB(...) + #define PRINTF_TIM(...) +#elif PRINTF_IN_FPGA && !TARGET_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) + #if DEBUG + #define PRINTF_DEB(fmt, ...) printf(fmt, ## __VA_ARGS__) + #else + #define PRINTF_DEB(...) + #endif + #if TIMING + #define PRINTF_TIM(fmt, ...) printf(fmt, ## __VA_ARGS__) + #else + #define PRINTF_TIM(...) + #endif +#else + #define PRINTF(...) + #define PRINTF_DEB(...) + #define PRINTF_TIM(...) +#endif + +// Define the dimensions of the input tensor and the kernel + +#define N_PATCHES_H ((IH + (PAD + PAD) - FH)/ STRIDES + 1) +#define N_PATCHES_W ((IW + (PAD + PAD) - FW)/ STRIDES + 1) + +#define CH_COL (CH * FH * FW) + +#define OH_NCHW (CH * FH * FW * BATCH) +#define OW_NCHW (N_PATCHES_H) * (N_PATCHES_W) + +#define OW_NHWC (FW * FH * CH * BATCH) +#define OH_NHWC (N_PATCHES_W) * (N_PATCHES_H) + +int im2col_nchw_int32(); +int im2col_nhwc_int32(); + +int get_index(int dim1, int dim2, int dim3, int index0, int index1, int index2, int index3); + +int verify(int format); + +#endif \ No newline at end of file diff --git a/sw/applications/example_im2col/main.c b/sw/applications/example_im2col/main.c new file mode 100644 index 000000000..a22c8bf8d --- /dev/null +++ b/sw/applications/example_im2col/main.c @@ -0,0 +1,81 @@ +/* + Copyright EPFL contributors. + Licensed under the Apache License, Version 2.0, see LICENSE for details. + SPDX-License-Identifier: Apache-2.0 + + Author: Tommaso Terzano + + Info: Example application of im2col algorithm with configurable format, verification and performance analysis. +*/ + +#include +#include +#include +#include "x-heep.h" +#include "im2col_lib.h" + +#define NCHW_FORMAT 0 +#define NHWC_FORMAT 1 + +int main() +{ + PRINTF("\nStarting test...\n\n"); + + int errors; + unsigned int cycles; + + #if TIMING + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + #endif + + im2col_nchw_int32(); // Execute the im2col algorithm with NCHW format + + #if TIMING + CSR_READ(CSR_REG_MCYCLE, &cycles); + #endif + + errors = verify(NCHW_FORMAT); + + PRINTF("im2col NCHW test executed\n"); + + PRINTF_TIM("Total number of cycles: [%d]\n\n", cycles); + + if (errors != 0) + { + PRINTF("TEST FAILED: %d errors\n", errors); + return 1; + } + else + { + PRINTF("TEST PASSED!\n"); + } + + #if TIMING + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + #endif + + im2col_nhwc_int32(); // Execute the im2col algorithm with NHWC format + + #if TIMING + CSR_READ(CSR_REG_MCYCLE, &cycles); + #endif + + errors = verify(NHWC_FORMAT); + + PRINTF("im2col NHWC test executed\n"); + PRINTF_TIM("Total number of cycles: [%d]\n\n", cycles); + + if (errors != 0) + { + PRINTF("TEST FAILED: %d errors\n", errors); + return 1; + } + else + { + PRINTF("TEST PASSED!\n"); + } + + return 0; +} diff --git a/sw/applications/example_matadd/main.c b/sw/applications/example_matadd/main.c index 80fdb6555..7c35229cc 100644 --- a/sw/applications/example_matadd/main.c +++ b/sw/applications/example_matadd/main.c @@ -14,7 +14,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_matadd_interleaved/main.c b/sw/applications/example_matadd_interleaved/main.c index 86f699720..058aadbe6 100644 --- a/sw/applications/example_matadd_interleaved/main.c +++ b/sw/applications/example_matadd_interleaved/main.c @@ -15,7 +15,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_matfadd/main.c b/sw/applications/example_matfadd/main.c index ab2191bdf..59dc2bef6 100644 --- a/sw/applications/example_matfadd/main.c +++ b/sw/applications/example_matfadd/main.c @@ -17,7 +17,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) @@ -28,11 +28,24 @@ uint32_t check_results(float * C, int N, int M); float m_c[HEIGHT*WIDTH]; -void swap(char *a, char *b) +void putlong(long i) { - char temp = *a; - *a = *b; - *b = temp; + char int_str[20]; // An array to store the digits + int len = 0; // The length of the string + do + { + // Get the last digit and store it in the array + int_str[len] = '0' + i % 10; + len++; + // Remove the last digit from i + i /= 10; + } while (i > 0); + + // Print the reversed string of digits + for (int j = len - 1; j >= 0; j--) + { + putchar(int_str[j]); + } } // A function to print a floating point number using putchar @@ -47,57 +60,68 @@ void putfloat(float x, int p) x = -x; } - // Convert the integer part of x into a string of digits - long i = (long)x; // Get the integer part - char int_str[20]; // An array to store the digits - int len = 0; // The length of the string - do - { - // Get the last digit and store it in the array - int_str[len] = '0' + i % 10; - len++; - // Remove the last digit from i - i /= 10; - } while (i > 0); + float f = x - (long)x; // Get the fractional part of x - // Reverse the string of digits - for (int j = 0; j < len / 2; j++) + // Get the p most significant digits of the fractional part as the + // integer part of f. + // Count the number of initial zeros. + int initial_zeros = 0; + // Check if the fraction will overflow to the integer part when + // rounding up (i.e. if the fraction is 0.999...) + int fraction_overflow = 1; + for (int j = 0; j < p; j++) { - // Swap the elements at both ends - swap(&int_str[j], &int_str[len - 1 - j]); + f *= 10; + if (f < 1) + { + // exclude the last digit with round up + if (!(j == p - 1 && f >= 0.5f)) + initial_zeros++; + } + if (fraction_overflow && (long)f % 10 < 9) + { + fraction_overflow = 0; + } } - // Print the string of digits - for (int j = 0; j < len; j++) + // Round up if necessary + if ((f - (long)f) >= 0.5f) { - putchar(int_str[j]); + // If the rounding causes a digit to overflow in the fractional + // part, then we need to print one less zero + if (fraction_overflow == 0) + { + f += 1; + if (f >= 10 && initial_zeros > 0) + { + initial_zeros--; + } + } + // If the overflow is in the integer part, then we need to print + // one more digit in the integer part, and none in the fractional + else + { + f = 0; + x += 1; + initial_zeros = p - 1; + } } + // Convert the integer part of x into a string of digits + putlong((long)x); + // Print a decimal point putchar('.'); - // Convert the fractional part of x into a string of digits - float f = x - (long)x; // Get the fractional part - char frac_str[20]; // An array to store the digits - len = 0; // The length of the string - while (p--) + // Print the initial zeros + while (initial_zeros--) { - // Get the first digit after the decimal point and store it in the array - f = (f - (long)f) * 10; - frac_str[len] = '0' + (long)f; - len++; - // Round up if necessary - if (fabs(f - (long)f) >= 0.5f) - { - frac_str[len - 1]++; - } + putchar('0'); } - // Print the string of digits - for (int j = 0; j < len; j++) - { - putchar(frac_str[j]); - } + // Convert the fractional part of x into a string of digits + if (f > 1) + putlong((long)f); } void __attribute__ ((noinline)) printMatrix(float * C, int N, int M) diff --git a/sw/applications/example_matmul/gen_stimuly.py b/sw/applications/example_matmul/gen_stimuly.py index 30c9ad62e..cdf555035 100644 --- a/sw/applications/example_matmul/gen_stimuly.py +++ b/sw/applications/example_matmul/gen_stimuly.py @@ -1,27 +1,21 @@ #!/usr/bin/env python +## Copyright 2024 EPFL +## Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +## SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + import sys import random - -# Copyright 2017 ETH Zurich and University of Bologna. -# Copyright and related rights are licensed under the Solderpad Hardware -# License, Version 0.51 (the License); you may not use this file except in -# compliance with the License. You may obtain a copy of the License at -# http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law -# or agreed to in writing, software, hardware and materials distributed under -# this License is distributed on an AS IS BASIS, WITHOUT WARRANTIES OR -# CONDITIONS OF ANY KIND, either express or implied. See the License for the -# specific language governing permissions and limitations under the License. +import numpy as np def write_arr(f, name, arr, ctype, size): f.write("const " + ctype + " " + name + "[] = {\n") - i = 1 - for v in arr: - if i % size == 0: - f.write('%d,\n' % (v)) - else: - f.write('%d,' % (v)) - i+=1 + + for row in arr: + for elem in row[:-1]: + f.write('%d,' % (elem)) + f.write('%d,\n' % (row[-1])) + f.write('};\n\n') return @@ -33,33 +27,23 @@ def write_arr(f, name, arr, ctype, size): f.write('// This file is automatically generated\n') -SIZE = 16 +SIZE = 16 RANGE = 4 m_a = [] m_b = [] m_exp = [] -for i in range(0,SIZE): - for j in range(0,SIZE): - a = random.randint(-RANGE, RANGE-1) - b = random.randint(-RANGE, RANGE-1) - - m_a.append(a) - m_b.append(b) - -for i in range(0,SIZE): - for j in range(0,SIZE): - r = 0 - - for k in range (0,SIZE): - r = r + m_a[i * SIZE + k] * m_b[k * SIZE + j] - - m_exp.append(r) +# Generate random 8 bit integers from -RANGE to RANGE for A and B +m_a = np.random.randint(-RANGE, RANGE, size=(SIZE, SIZE), dtype=np.int32) +m_b = np.random.randint(-RANGE, RANGE, size=(SIZE, SIZE), dtype=np.int32) +m_exp = np.zeros((SIZE, SIZE), dtype=np.int32) +# Test the function with A and B +m_exp = np.matmul(m_a,m_b) write_arr(f, 'm_a', m_a, 'int8_t', SIZE) -write_arr(f, 'm_b_transposed', m_b, 'int8_t', SIZE) +write_arr(f, 'm_b', m_b, 'int8_t', SIZE) write_arr(f, 'm_exp', m_exp, 'int32_t', SIZE) f.write('#define SIZE %d\n' % SIZE) diff --git a/sw/applications/example_matmul/main.c b/sw/applications/example_matmul/main.c index 789f41132..69d1750b8 100644 --- a/sw/applications/example_matmul/main.c +++ b/sw/applications/example_matmul/main.c @@ -14,31 +14,52 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) #endif -void __attribute__ ((noinline)) matrixMul8(int8_t * A, int8_t * Bt, int32_t * C, int N); +void __attribute__ ((noinline)) matrixMul8_blocksize(int8_t * A, int8_t * B, int32_t * C, int N); + +void __attribute__ ((noinline)) matrixMul8_tiled(int8_t * A, int8_t * B, int32_t * C, int N); uint32_t check_results(int32_t * C, int N); int32_t m_c[SIZE*SIZE]; +#define BLOCK_SIZE 4 + +// Define a macro for accessing matrix elements +#define A(i,j) &A[i*SIZE+j] +#define B(i,j) &B[i*SIZE+j] +#define C(i,j) &C[i*SIZE+j] + +#define HIGHEST_PERF + int main() { uint32_t errors = 0; unsigned int instr, cycles; + for(int i =0;i> 1; // Half the size + // Multiply the blocks and add them to the corresponding blocks of C + matrixMul8_tiled(A(0, 0), B(0, 0), C(0, 0), N); // C_00 += A_00 * B_00 + matrixMul8_tiled(A(0, N), B(N, 0), C(0, 0), N); // C_00 += A_01 * B_10 + matrixMul8_tiled(A(0, 0), B(0, N), C(0, N), N); // C_01 += A_00 * B_01 + matrixMul8_tiled(A(0, N), B(N, N), C(0, N), N); // C_01 += A_01 * B_11 + matrixMul8_tiled(A(N, 0), B(0, 0), C(N, 0), N); // C_10 += A_10 * B_00 + matrixMul8_tiled(A(N, N), B(N, 0), C(N, 0), N); // C_10 += A_11 * B_10 + matrixMul8_tiled(A(N, 0), B(0, N), C(N, N), N); // C_11 += A_10 * B_01 + matrixMul8_tiled(A(N, N), B(N, N), C(N, N), N); // C_11 += A_11 * B_11 + } } + uint32_t check_results(int32_t * C, int N) { // check diff --git a/sw/applications/example_matmul/matrixMul8.h b/sw/applications/example_matmul/matrixMul8.h index 2725551b0..bc2be3e0b 100644 --- a/sw/applications/example_matmul/matrixMul8.h +++ b/sw/applications/example_matmul/matrixMul8.h @@ -2,60 +2,60 @@ #define _MATMUL8_ // This file is automatically generated const int8_t m_a[] = { --2,3,-1,-1,-3,2,0,2,-2,-1,-3,-3,1,3,2,1, -1,-3,1,-4,-1,2,1,0,-4,-1,-1,-3,-2,-1,0,-2, -1,0,-1,2,1,3,2,-3,0,3,-1,3,-2,3,2,2, --3,1,0,0,1,0,-4,-4,0,-2,-3,-2,-1,-1,3,2, --2,0,-1,-3,-4,-1,-3,0,3,-1,2,-1,0,-2,1,1, -0,-2,-4,1,1,-4,1,1,0,-2,0,-4,-4,-1,2,-4, --2,-1,-2,-4,0,-2,2,3,-2,2,-2,-1,2,3,1,-3, --1,-2,3,-4,0,3,-4,-3,1,-4,0,-1,-2,0,1,-1, -1,-3,0,1,-3,-3,0,-2,2,1,-3,1,1,-4,3,0, -2,1,1,2,-1,-4,3,-3,-3,1,-2,-1,1,3,-2,3, --1,-1,-1,-1,-3,1,-2,3,-1,0,-2,3,1,3,-3,-4, --1,-1,-1,-1,-2,3,-3,3,-3,-1,-4,-2,-3,3,0,-2, -2,3,-4,0,3,-4,-4,3,3,2,-1,-2,0,-1,-4,-4, -3,-1,-1,-3,2,-2,3,1,-2,2,3,0,-2,-2,-2,0, --4,1,2,2,3,-2,2,1,-2,0,-4,2,-2,-2,3,-2, -1,3,-2,0,1,3,2,-3,-4,0,0,3,3,-1,3,3, +-4,-2,-3,3,3,-1,-4,-3,1,-2,-2,2,-2,-4,-2,-1, +0,2,0,-2,-3,3,-2,3,-1,3,0,2,3,1,-3,-2, +0,2,2,3,-2,-2,-3,-2,0,-3,0,-4,2,-3,-3,1, +3,-4,1,1,-1,0,-3,0,1,3,-1,-2,2,3,-4,0, +3,-1,2,-4,-1,-2,0,-3,-3,0,-2,-4,-2,1,0,-2, +-3,0,-2,2,-1,2,2,2,3,1,0,-2,-1,-4,3,-3, +1,3,2,-3,-4,-3,-2,-1,-2,3,1,-3,2,-4,0,0, +-2,0,3,-1,3,-4,0,-2,2,3,-2,-2,-1,-4,-3,1, +-4,-2,-4,-1,-2,-4,-4,-2,-4,-4,-3,0,-2,-4,2,-4, +1,-1,-3,-2,-1,-2,-4,0,2,3,1,-3,-3,1,3,1, +-3,0,1,3,-3,1,3,2,0,-2,1,-4,-3,0,1,-4, +3,0,-1,-3,2,-1,-1,1,-1,3,2,-4,2,2,0,-2, +-4,2,2,-2,2,-4,-3,-3,-3,-3,-2,-3,3,0,3,0, +2,3,-2,0,3,1,1,-3,3,0,1,-4,-4,-1,0,1, +0,0,1,3,-3,3,3,1,-3,1,1,2,1,-2,-2,2, +-1,2,-2,1,0,-2,-3,-1,0,0,2,0,3,3,-4,-4, }; -const int8_t m_b_transposed[] = { --1,-2,-1,3,-1,-3,2,-3,1,0,2,1,2,-4,1,-1, -2,-4,2,3,-4,-3,3,-2,2,1,-1,-2,-4,-4,-4,2, --4,-4,-4,1,-4,0,1,0,-2,1,-4,2,-3,2,-3,-4, -2,2,-4,0,-2,-4,-3,-3,-3,-4,-3,-3,-3,2,1,0, --2,1,-2,-4,2,3,1,-4,2,2,2,-2,-2,-4,-1,-3, -3,2,3,2,3,0,2,-2,-3,1,1,3,3,2,2,2, --3,-4,-2,1,-1,2,3,-2,-3,0,-1,-2,3,-3,2,-4, -1,-2,-1,3,-1,-3,-4,2,-4,3,2,-4,1,1,-2,-2, --2,2,3,-3,0,2,-1,1,2,-2,-2,3,-2,-2,-3,-4, --1,-3,-3,1,3,3,3,1,3,-4,0,-4,-2,-3,0,3, --1,1,-4,0,-1,-2,-3,2,1,1,-4,2,1,0,1,-1, --2,-4,0,-3,-1,2,0,-1,2,-3,-3,-4,0,-3,-3,-2, -3,1,2,3,2,-3,1,2,2,-4,-4,3,-2,3,-4,1, --3,-1,-3,-1,-2,3,-2,-4,2,-4,-1,0,-2,1,-1,2, --1,0,-2,-3,3,3,0,-1,3,2,3,-3,3,1,-1,3, -3,3,-1,-3,-3,2,-3,0,2,-2,1,-2,3,1,0,-4, +const int8_t m_b[] = { +2,1,2,1,0,3,-1,-1,3,3,3,-2,0,-3,-1,3, +-3,0,-3,0,2,-2,3,0,-2,-1,2,-3,2,-2,0,-2, +-4,0,1,3,-3,-3,-2,2,3,0,-1,2,3,2,-2,-4, +3,3,-2,3,2,2,-2,-1,-3,-3,-1,-3,3,-2,2,-2, +0,1,0,-3,-1,-3,-1,0,-1,3,3,1,-1,-1,2,2, +-2,-4,-1,-1,1,1,2,-1,2,3,0,3,2,0,-4,3, +-1,-1,-3,2,-4,3,3,-1,3,1,-3,-2,1,1,2,3, +0,2,0,1,1,-3,-4,-4,2,-2,1,0,3,2,0,-1, +0,1,1,-1,2,-1,0,2,-1,-3,3,0,-2,-4,2,3, +1,-4,2,-4,0,1,3,-4,-2,-3,-2,-2,-1,3,-2,-3, +1,0,2,3,0,1,1,-1,-3,-1,3,-3,-3,-2,1,-4, +-3,2,-4,1,3,2,3,-2,1,1,3,0,-2,-2,-1,-1, +1,-3,-1,1,-2,2,-1,-4,3,0,-3,-4,-3,1,-1,2, +-1,-2,3,-4,0,1,1,1,-2,1,-4,3,0,1,2,1, +1,3,-1,-4,2,0,-1,-1,-4,-3,-2,-4,-1,-4,3,2, +0,1,0,1,3,-4,-1,2,1,-1,3,-2,-2,-2,-1,-1, }; const int32_t m_exp[] = { -33,0,23,29,-4,-7,0,-3,-11,8,25,-4,8,35,-6,44, --7,-4,7,25,23,5,22,9,-34,50,40,27,43,18,42,16, --14,-10,-23,-30,8,47,18,-49,22,-39,5,-34,12,-26,14,10, -23,44,26,-39,11,22,0,-4,25,12,25,11,-8,25,-3,25, -17,28,36,-7,4,-4,-22,53,17,13,-2,32,12,23,-9,8, --15,20,-8,-13,20,-3,-17,1,-18,43,45,-13,15,-9,39,13, --16,-28,3,16,30,24,12,14,5,5,23,-19,3,1,-7,31, --11,31,31,-23,12,21,1,8,1,40,12,66,13,32,5,4, --2,6,11,-16,20,8,8,25,12,-18,3,-5,12,6,0,-2, --8,-23,-35,10,-41,0,12,-27,10,-44,-15,-18,-16,-9,7,-7, -2,-21,25,29,5,-15,-8,13,-20,-17,-10,4,-15,19,-12,30, -17,7,20,21,13,-2,-11,-6,-35,26,49,-1,13,37,21,49, -9,1,29,20,8,-36,0,11,19,5,18,-7,-56,-48,-22,14, --24,-27,-24,9,2,5,15,6,0,26,17,-16,27,-48,26,-26, --20,-33,-19,-25,2,24,14,-17,-17,22,12,-61,-18,-10,-14,-6, -31,-8,7,0,12,5,34,-29,27,-12,4,-24,28,-14,-1,22, +15,30,-21,1,25,-11,-8,24,-29,-1,32,23,-10,-15,9,-4, +-20,-44,1,-1,3,12,19,-41,25,4,-10,11,7,36,-44,-17, +13,10,3,50,-5,-17,-31,28,5,-12,9,-14,14,-3,-8,-28, +23,-31,50,-6,-12,14,-20,-5,19,6,-17,24,-4,29,-22,-1, +0,-17,35,-16,-40,5,-2,28,15,29,-28,27,10,27,-7,8, +15,8,-21,-5,6,6,0,-18,-23,-38,-11,-19,16,-8,16,14, +3,-18,14,16,-15,-9,1,-8,6,-23,-5,-36,-6,20,-27,-43, +-4,-1,9,3,-23,-39,-5,24,5,-13,15,6,-4,21,-3,-26, +20,35,-18,-6,6,3,-17,18,-28,-4,-14,16,-2,8,15,-4, +31,7,45,-49,28,-14,-8,10,-49,-31,11,-6,-24,-17,14,-3, +3,8,-5,21,-19,8,-10,8,-17,-23,-39,10,49,16,22,-9, +21,-30,44,-32,-25,5,-5,-22,-5,10,-12,-8,-15,19,1,8, +-7,8,-2,-18,-18,-38,-23,28,-20,-3,-29,-4,-9,9,17,-11, +10,4,11,-18,8,-13,16,34,-29,6,36,-5,0,-37,18,22, +-7,-13,-29,48,-2,22,13,-26,31,1,-8,-18,20,16,-30,-23, +8,-20,13,0,-5,20,10,-10,-22,2,-9,4,-12,14,6,-18, }; #define SIZE 16 diff --git a/sw/applications/example_pdm2pcm/main.c b/sw/applications/example_pdm2pcm/main.c index b4f9c3b37..72551c216 100644 --- a/sw/applications/example_pdm2pcm/main.c +++ b/sw/applications/example_pdm2pcm/main.c @@ -35,7 +35,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_power_gating_core/main.c b/sw/applications/example_power_gating_core/main.c index da7d84486..b70af5c3f 100644 --- a/sw/applications/example_power_gating_core/main.c +++ b/sw/applications/example_power_gating_core/main.c @@ -30,7 +30,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_power_gating_external/main.c b/sw/applications/example_power_gating_external/main.c index 1ffc90c4d..fb5a50507 100644 --- a/sw/applications/example_power_gating_external/main.c +++ b/sw/applications/example_power_gating_external/main.c @@ -17,7 +17,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_power_gating_periph/main.c b/sw/applications/example_power_gating_periph/main.c index fb51e1c73..dd0273e25 100644 --- a/sw/applications/example_power_gating_periph/main.c +++ b/sw/applications/example_power_gating_periph/main.c @@ -17,7 +17,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_power_gating_ram_blocks/main.c b/sw/applications/example_power_gating_ram_blocks/main.c index fe6734037..0b3db15f6 100644 --- a/sw/applications/example_power_gating_ram_blocks/main.c +++ b/sw/applications/example_power_gating_ram_blocks/main.c @@ -17,7 +17,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_set_retentive_ram_blocks/main.c b/sw/applications/example_set_retentive_ram_blocks/main.c index cde898ad8..478a9b5c4 100644 --- a/sw/applications/example_set_retentive_ram_blocks/main.c +++ b/sw/applications/example_set_retentive_ram_blocks/main.c @@ -17,7 +17,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_set_retentive_ram_blocks_external/main.c b/sw/applications/example_set_retentive_ram_blocks_external/main.c index 7b4555910..eaa191733 100644 --- a/sw/applications/example_set_retentive_ram_blocks_external/main.c +++ b/sw/applications/example_set_retentive_ram_blocks_external/main.c @@ -17,7 +17,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) diff --git a/sw/applications/example_simple_accelerator/main.c b/sw/applications/example_simple_accelerator/main.c index 71de642f1..53fb896b0 100644 --- a/sw/applications/example_simple_accelerator/main.c +++ b/sw/applications/example_simple_accelerator/main.c @@ -10,7 +10,7 @@ #define TEST_DATA_SIZE 16 -#define PRINTF_IN_SIM 1 +#define PRINTF_IN_SIM 0 #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) diff --git a/sw/applications/example_spi_host_dma_power_gate/main.c b/sw/applications/example_spi_host_dma_power_gate/main.c index 0f1a444a9..08fda5caa 100644 --- a/sw/applications/example_spi_host_dma_power_gate/main.c +++ b/sw/applications/example_spi_host_dma_power_gate/main.c @@ -27,7 +27,7 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) @@ -35,7 +35,7 @@ // Type of data frome the SPI. For types different than words the SPI data is requested in separate transactions // word(0), half-word(1), byte(2,3) -#define SPI_DATA_TYPE DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_32BIT_WORD +#define SPI_DATA_TYPE DMA_DATA_TYPE_WORD // Number of elements to copy #define COPY_DATA_NUM 16 diff --git a/sw/applications/example_spi_read/main.c b/sw/applications/example_spi_read/main.c index 2e9528160..1261e0c29 100644 --- a/sw/applications/example_spi_read/main.c +++ b/sw/applications/example_spi_read/main.c @@ -20,13 +20,13 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) #endif -#ifdef TARGET_PYNQ_Z2 +#if defined(TARGET_PYNQ_Z2) || defined(TARGET_ZCU104) || defined(TARGET_NEXYS_A7_100T) #define USE_SPI_FLASH #endif diff --git a/sw/applications/example_spi_write/main.c b/sw/applications/example_spi_write/main.c index 2b1a362b8..56cc9e407 100644 --- a/sw/applications/example_spi_write/main.c +++ b/sw/applications/example_spi_write/main.c @@ -25,13 +25,13 @@ #if TARGET_SIM && PRINTF_IN_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA +#elif PRINTF_IN_FPGA && !TARGET_SIM #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) #else #define PRINTF(...) #endif -#ifdef TARGET_PYNQ_Z2 +#if defined(TARGET_PYNQ_Z2) || defined(TARGET_ZCU104) || defined(TARGET_NEXYS_A7_100T) #define USE_SPI_FLASH #endif @@ -140,6 +140,9 @@ void erase_memory(uint32_t addr); w25q_error_codes_t global_status; int main(int argc, char *argv[]) { + // Initialize the DMA + dma_init(NULL); + soc_ctrl_t soc_ctrl; soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); diff --git a/sw/applications/example_virtual_flash/main.c b/sw/applications/example_virtual_flash/main.c deleted file mode 100644 index e1e16f56d..000000000 --- a/sw/applications/example_virtual_flash/main.c +++ /dev/null @@ -1,222 +0,0 @@ - -#include -#include -#include "csr.h" -#include "hart.h" -#include "handler.h" -#include "core_v_mini_mcu.h" -#include "rv_timer.h" -#include "rv_timer_regs.h" -#include "soc_ctrl.h" -#include "rv_plic.h" -#include "rv_plic_regs.h" -#include "spi_host.h" -#include "spi_host_regs.h" -#include "dma.h" -#include "fast_intr_ctrl.h" -#include "gpio.h" -#include "fast_intr_ctrl_regs.h" -#include "x-heep.h" - -#define REVERT_24b_ADDR(addr) ((((uint32_t)addr & 0xff0000) >> 16) | ((uint32_t)addr & 0xff00) | (((uint32_t)addr & 0xff) << 16)) -#define FLASH_ADDR 0x00000000 -#define FLASH_SIZE 64 * 1024 * 1024 -#define FLASH_CLK_MAX_HZ (133 * 1000 * 1000) - - -/* By default, printfs are activated for FPGA and disabled for simulation. */ -#define PRINTF_IN_FPGA 1 -#define PRINTF_IN_SIM 0 - -#if TARGET_SIM && PRINTF_IN_SIM - #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#elif TARGET_PYNQ_Z2 && PRINTF_IN_FPGA - #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) -#else - #define PRINTF(...) -#endif - -// Interrupt controller variables -plic_result_t plic_res; -uint32_t intr_num; - -//volatile int8_t timer_flag; -volatile int8_t spi_intr_flag; - -spi_host_t spi_host_flash; - -void dma_intr_handler_trans_done(){ - PRINTF("#\n\r"); -} - -void fic_irq_spi_flash(){ - // Disable SPI interrupts - spi_enable_evt_intr(&spi_host_flash, false); - spi_enable_rxwm_intr(&spi_host_flash, false); - spi_intr_flag = 1; - PRINTF("@\n\r"); -} - - -void write_to_flash(spi_host_t *SPI, uint16_t *data, uint32_t byte_count, uint32_t addr) -{ - uint32_t write_to_mem = 0x02; - spi_write_word(SPI, write_to_mem); - uint32_t cmd_write_to_mem = spi_create_command((spi_command_t){ - .len = 0, - .csaat = true, - .speed = kSpiSpeedStandard, - .direction = kSpiDirTxOnly - }); - spi_set_command(SPI, cmd_write_to_mem); - spi_wait_for_ready(SPI); - - uint32_t addr_cmd = __builtin_bswap32(addr); - spi_write_word(SPI, addr_cmd); - uint32_t cmd_address = spi_create_command((spi_command_t){ - .len = 3, - .csaat = true, - .speed = kSpiSpeedStandard, - .direction = kSpiDirTxOnly - }); - spi_set_command(SPI, cmd_address); - spi_wait_for_ready(SPI); - - uint32_t *fifo_ptr_tx = SPI->base_addr.base + SPI_HOST_TXDATA_REG_OFFSET; - - // -- DMA CONFIGURATION -- - dma_init(NULL); - - dma_target_t tgt_src = { - .ptr = data, - .inc_du = 1, - .size_du = 64, - .type = DMA_DATA_TYPE_HALF_WORD, - .trig = DMA_TRIG_MEMORY, - }; - dma_target_t tgt_dst = { - .ptr = fifo_ptr_tx, - .inc_du = 0, - .size_du = 0, - .type = DMA_DATA_TYPE_HALF_WORD, - .trig = DMA_TRIG_SLOT_SPI_FLASH_TX, - }; - dma_trans_t trans = { - .src = &tgt_src, - .dst = &tgt_dst, - .end = DMA_TRANS_END_INTR, - }; - - dma_config_flags_t res; - - spi_intr_flag = 0; - - res = dma_validate_transaction( &trans, DMA_ENABLE_REALIGN, DMA_PERFORM_CHECKS_INTEGRITY ); - PRINTF("trans: %u\n\r", res ); - res = dma_load_transaction(&trans); - PRINTF("load: %u\n\r", res ); - res = dma_launch(&trans); - - // Wait for the first data to arrive to the TX FIFO before enabling interrupt - spi_wait_for_tx_not_empty(SPI); - // Enable event interrupt - spi_enable_evt_intr(SPI, true); - // Enable TX empty interrupt - spi_enable_txempty_intr(SPI, true); - - const uint32_t cmd_write_tx = spi_create_command((spi_command_t){ - .len = byte_count - 1, - .csaat = false, - .speed = kSpiSpeedStandard, - .direction = kSpiDirTxOnly - }); - spi_set_command(SPI, cmd_write_tx); - spi_wait_for_ready(SPI); - - // Wait for SPI interrupt - while(spi_intr_flag == 0) { - wait_for_interrupt(); - } - - PRINTF("%d words written to flash.\n\n\r", byte_count/4); -} - -int main(int argc, char *argv[]) -{ - // Get current Frequency - soc_ctrl_t soc_ctrl; - soc_ctrl.base_addr = mmio_region_from_addr((uintptr_t)SOC_CTRL_START_ADDRESS); - soc_ctrl_select_spi_host(&soc_ctrl); - - uint32_t core_clk = soc_ctrl_get_frequency(&soc_ctrl); - - // Enable interrupt on processor side - // Enable global interrupt for machine-level interrupts - CSR_SET_BITS(CSR_REG_MSTATUS, 0x8); - uint32_t mask = 1 << 21; - CSR_SET_BITS(CSR_REG_MIE, mask); - spi_intr_flag = 0; - // Set mie.MEIE bit to one to enable timer interrupt - mask = 1 << 7; - CSR_SET_BITS(CSR_REG_MIE, mask); - - spi_host_flash.base_addr = mmio_region_from_addr((uintptr_t)SPI_HOST_START_ADDRESS); - spi_set_enable(&spi_host_flash, true); - spi_output_enable(&spi_host_flash, true); - - uint16_t clk_div = 0; - if (FLASH_CLK_MAX_HZ < core_clk / 2) - { - clk_div = (core_clk / (FLASH_CLK_MAX_HZ)-2) / 2; // The value is truncated - if (core_clk / (2 + 2 * clk_div) > FLASH_CLK_MAX_HZ) - clk_div += 1; // Adjust if the truncation was not 0 - } - - // SPI Configuration - // Configure chip 0 (flash memory) - const uint32_t chip_cfg_flash = spi_create_configopts((spi_configopts_t){ - .clkdiv = clk_div, - .csnidle = 0xF, - .csntrail = 0xF, - .csnlead = 0xF, - .fullcyc = false, - .cpha = 0, - .cpol = 0}); - spi_set_configopts(&spi_host_flash, 0, chip_cfg_flash); - spi_set_csid(&spi_host_flash, 0); - - // To set the number of dummy cycles we have to send command 0x11 and then a 1B value - const uint32_t reset_cmd = 0x11; - spi_write_word(&spi_host_flash, reset_cmd); - const uint32_t cmd_reset = spi_create_command((spi_command_t){ - .len = 0, - .csaat = true, - .speed = kSpiSpeedStandard, - .direction = kSpiDirTxOnly - }); - spi_set_command(&spi_host_flash, cmd_reset); - spi_wait_for_ready(&spi_host_flash); - - const uint32_t set_dummy_cycle = 0x07; - spi_write_word(&spi_host_flash, set_dummy_cycle); - const uint32_t cmd_set_dummy = spi_create_command((spi_command_t){ - .len = 0, - .csaat = false, - .speed = kSpiSpeedStandard, - .direction = kSpiDirTxOnly - }); - - spi_set_command(&spi_host_flash, cmd_set_dummy); - spi_wait_for_ready(&spi_host_flash); - - uint32_t results[32]; - for(uint32_t i = 0; i < 32; i++){ - results[i] = i; - } - - write_to_flash(&spi_host_flash, results, sizeof(*results) * 32, FLASH_ADDR); - - PRINTF("Success.\n\r"); - - return EXIT_SUCCESS; -} diff --git a/sw/applications/minver/arch.cfg b/sw/applications/minver/arch.cfg new file mode 100644 index 000000000..0583bdfe6 --- /dev/null +++ b/sw/applications/minver/arch.cfg @@ -0,0 +1,67 @@ +############################################################################### +# +# Copyright 2020 OpenHW Group +# +# Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# https://solderpad.org/licenses/ +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# +# SPDX-License-Identifier: Apache-2.0 WITH SHL-2.0 +# +############################################################################### + +# This is a python setting of parameters for the architecture. The following +# parameters may be set (other keys are silently ignored). Defaults are shown +# in brackets +# - cc ('cc') +# - ld (same value as for cc) +# - cflags ([]) +# - ldflags ([]) +# - cc_define_pattern ('-D{0}') +# - cc_incdir_pattern ('-I{0}') +# - cc_input_pattern ('{0}') +# - cc_output_pattern ('-o {0}') +# - ld_input_pattern ('{0}') +# - ld_output_pattern ('-o {0}') +# - user_libs ([]) +# - dummy_libs ([]) +# - cpu_mhz (1) +# - warmup_heat (1) + +# The "flags" and "libs" parameters (cflags, ldflags, user_libs, dummy_libs) +# should be lists of arguments to be passed to the compile or link line as +# appropriate. Patterns are Python format patterns used to create arguments. +# Thus for GCC or Clang/LLVM defined constants can be passed using the prefix +# '-D', and the pattern '-D{0}' would be appropriate (which happens to be the +# default). + +# "user_libs" may be absolute file names or arguments to the linker. In the +# latter case corresponding arguments in ldflags may be needed. For example +# with GCC or Clang/LLVM is "-l" flags are used in "user_libs", the "-L" flags +# may be needed in "ldflags". + +# Dummy libs have their source in the "support" subdirectory. Thus if 'crt0' +# is specified, there should be a source file 'dummy-crt0.c' in the support +# directory. + +# There is no need to set an unused parameter, and this file may be empty to +# set no flags. + +# Parameter values which are duplicated in architecture, board, chip or +# command line are used in the following order of priority +# - default value +# - architecture specific value +# - chip specific value +# - board specific value +# - command line value + +# For flags, this priority is applied to individual flags, not the complete +# list of flags. diff --git a/sw/applications/minver/beebsc.c b/sw/applications/minver/beebsc.c new file mode 100644 index 000000000..5e55329f1 --- /dev/null +++ b/sw/applications/minver/beebsc.c @@ -0,0 +1,177 @@ +/* BEEBS local library variants + + Copyright (C) 2019 Embecosm Limited. + + Contributor Jeremy Bennett + + This file is part of Embench and was formerly part of the Bristol/Embecosm + Embedded Benchmark Suite. + + SPDX-License-Identifier: GPL-3.0-or-later */ + +/* These are very simple local versions of library routines, to ensure the + code is compiled with the flags used for the benchmark. Not all library + routines are here, just ones that cause a lot of unecessary load, or where + there is variation between platforms and architectures. */ + +#include +#include +#include "beebsc.h" + +/* Seed for the random number generator */ + +static long int seed = 0; + +/* Heap records and sane initial values */ + +static void *heap_ptr = NULL; +static void *heap_end = NULL; +static size_t heap_requested = 0; + + +/* Yield a sequence of random numbers in the range [0, 2^15-1]. + + long int is guaranteed to be at least 32 bits. The seed only ever uses 31 + bits (so is positive). + + For BEEBS this gets round different operating systems using different + multipliers and offsets and RAND_MAX variations. */ + +int +rand_beebs (void) +{ + seed = (seed * 1103515245L + 12345) & ((1UL << 31) - 1); + return (int) (seed >> 16); +} + + +/* Initialize the random number generator */ + +void +srand_beebs (unsigned int new_seed) +{ + seed = (long int) new_seed; +} + + +/* Initialize the BEEBS heap pointers. Note that the actual memory block is + in the caller code. */ + +void +init_heap_beebs (void *heap, size_t heap_size) +{ + heap_ptr = (void *) heap; + heap_end = (void *) ((char *) heap_ptr + heap_size); + heap_requested = 0; +} + + +/* Report if malloc ever failed. + + Return non-zero (TRUE) if malloc did not reqest more than was available + since the last call to init_heap_beebs, zero (FALSE) otherwise. */ + +int +check_heap_beebs (void *heap) +{ + return ((void *) ((char *) heap + heap_requested) <= heap_end); +} + + +/* BEEBS version of malloc. + + This is primarily to reduce library and OS dependencies. Malloc is + generally not used in embedded code, or if it is, only in well defined + contexts to pre-allocate a fixed amount of memory. So this simplistic + implementation is just fine. + + Note in particular the assumption that memory will never be freed! */ + +void * +malloc_beebs (size_t size) +{ + void *new_ptr = heap_ptr; + + heap_requested += size; + + if (((void *) ((char *) heap_ptr + size) > heap_end) || (0 == size)) + return NULL; + else + { + heap_ptr = (void *) ((char *) heap_ptr + size); + return new_ptr; + } +} + + +/* BEEBS version of calloc. + + Implement as wrapper for malloc */ + +void * +calloc_beebs (size_t nmemb, size_t size) +{ + void *new_ptr = malloc_beebs (nmemb * size); + + /* Calloc is defined to zero the memory. OK to use a function here, because + it will be handled specially by the compiler anyway. */ + + if (NULL != new_ptr) + memset (new_ptr, 0, nmemb * size); + + return new_ptr; +} + + +/* BEEBS version of realloc. + + This is primarily to reduce library and OS dependencies. We just have to + allocate new memory and copy stuff across. */ + +void * +realloc_beebs (void *ptr, size_t size) +{ + void *new_ptr = heap_ptr; + + heap_requested += size; + + if (((void *) ((char *) heap_ptr + size) > heap_end) || (0 == size)) + return NULL; + else + { + heap_ptr = (void *) ((char *) heap_ptr + size); + + /* This is clunky, since we don't know the size of the original + pointer. However it is a read only action and we know it must + be big enough if we right off the end, or we couldn't have + allocated here. If the size is smaller, it doesn't matter. */ + + if (NULL != ptr) + { + size_t i; + + for (i = 0; i < size; i++) + ((char *) new_ptr)[i] = ((char *) ptr)[i]; + } + + return new_ptr; + } +} + + +/* BEEBS version of free. + + For our simplified version of memory handling, free can just do nothing. */ + +void +free_beebs (void *ptr __attribute__ ((unused))) +{ +} + + +/* + Local Variables: + mode: C + c-file-style: "gnu" + End: +*/ diff --git a/sw/applications/minver/beebsc.h b/sw/applications/minver/beebsc.h new file mode 100644 index 000000000..4a1089596 --- /dev/null +++ b/sw/applications/minver/beebsc.h @@ -0,0 +1,65 @@ +/* BEEBS local library variants header + + Copyright (C) 2019 Embecosm Limited. + + Contributor Jeremy Bennett + + This file is part of Embench and was formerly part of the Bristol/Embecosm + Embedded Benchmark Suite. + + SPDX-License-Identifier: GPL-3.0-or-later */ + +#ifndef BEEBSC_H +#define BEEBSC_H + +#include + +/* BEEBS fixes RAND_MAX to its lowest permitted value, 2^15-1 */ + +#ifdef RAND_MAX +#undef RAND_MAX +#endif +#define RAND_MAX ((1U << 15) - 1) + +/* Common understanding of a "small value" (epsilon) for floating point + comparisons. */ + +#define VERIFY_DOUBLE_EPS 1.0e-13 +#define VERIFY_FLOAT_EPS 1.0e-5 + +/* Simplified assert. + + The full complexity of assert is not needed for a benchmark. See the + discussion at: + + https://lists.librecores.org/pipermail/embench/2019-August/000007.html + + This function just*/ + +#define assert_beebs(expr) { if (!(expr)) exit (1); } + +#define float_eq_beebs(exp, actual) (fabsf(exp - actual) < VERIFY_FLOAT_EPS) +#define float_neq_beebs(exp, actual) !float_eq_beebs(exp, actual) +#define double_eq_beebs(exp, actual) (fabs(exp - actual) < VERIFY_DOUBLE_EPS) +#define double_neq_beebs(exp, actual) !double_eq_beebs(exp, actual) + +/* Local simplified versions of library functions */ + +int rand_beebs (void); +void srand_beebs (unsigned int new_seed); + +void init_heap_beebs (void *heap, const size_t heap_size); +int check_heap_beebs (void *heap); +void *malloc_beebs (size_t size); +void *calloc_beebs (size_t nmemb, size_t size); +void *realloc_beebs (void *ptr, size_t size); +void free_beebs (void *ptr); +#endif /* BEEBSC_H */ + + +/* + Local Variables: + mode: C + c-file-style: "gnu" + End: +*/ diff --git a/sw/applications/minver/board.c b/sw/applications/minver/board.c new file mode 100644 index 000000000..db8fafa80 --- /dev/null +++ b/sw/applications/minver/board.c @@ -0,0 +1,22 @@ +/* Common board.c for the benchmarks + + Copyright (C) 2018-2019 Embecosm Limited + + Contributor: Jeremy Bennett + + This file is part of Embench and was formerly part of the Bristol/Embecosm + Embedded Benchmark Suite. + + SPDX-License-Identifier: GPL-3.0-or-later */ + +/* This is just a wrapper for the board specific support file. */ + +#include "boardsupport.c" + + +/* + Local Variables: + mode: C + c-file-style: "gnu" + End: +*/ diff --git a/sw/applications/minver/boardsupport.c b/sw/applications/minver/boardsupport.c new file mode 100644 index 000000000..f2e674dea --- /dev/null +++ b/sw/applications/minver/boardsupport.c @@ -0,0 +1,21 @@ +/* +** +** Copyright 2020 OpenHW Group +** +** Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** https://solderpad.org/licenses/ +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +** +******************************************************************************* +*/ + +#include "boardsupport.h" + diff --git a/sw/applications/minver/boardsupport.h b/sw/applications/minver/boardsupport.h new file mode 100644 index 000000000..6223cd684 --- /dev/null +++ b/sw/applications/minver/boardsupport.h @@ -0,0 +1,21 @@ +/* +** +** Copyright 2020 OpenHW Group +** +** Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** https://solderpad.org/licenses/ +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +** +******************************************************************************* +*/ + + + diff --git a/sw/applications/minver/chip.c b/sw/applications/minver/chip.c new file mode 100644 index 000000000..37ac28fe4 --- /dev/null +++ b/sw/applications/minver/chip.c @@ -0,0 +1,32 @@ +/* Common board.c for the benchmarks + + Copyright (C) 2018-2019 Embecosm Limited + + Contributor: Jeremy Bennett + + This file is part of Embench and was formerly part of the Bristol/Embecosm + Embedded Benchmark Suite. + + SPDX-License-Identifier: GPL-3.0-or-later */ + +/* This is just a wrapper for the chip specific support file if there is one. */ + +/*#include "config.h"*/ + +#ifdef HAVE_CHIPSUPPORT_H +#include "chipsupport.c" +#endif + +/* Standard C does not permit empty translation units, so provide one. */ + +static void +empty_func () +{ +} + +/* + Local Variables: + mode: C + c-file-style: "gnu" + End: +*/ diff --git a/sw/applications/minver/chipsupport.c b/sw/applications/minver/chipsupport.c new file mode 100644 index 000000000..181a3dd9e --- /dev/null +++ b/sw/applications/minver/chipsupport.c @@ -0,0 +1,73 @@ +/* +** +** Copyright 2020 OpenHW Group +** +** Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** https://solderpad.org/licenses/ +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +** +******************************************************************************* +*/ + +#include +#include +#include +#include "chipsupport.h" + +#include "csr.h" +#include "x-heep.h" + +/* By default, printfs are activated for FPGA and disabled for simulation. */ +#define PRINTF_IN_FPGA 1 +#define PRINTF_IN_SIM 0 + +#if TARGET_SIM && PRINTF_IN_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#elif PRINTF_IN_FPGA && !TARGET_SIM + #define PRINTF(fmt, ...) printf(fmt, ## __VA_ARGS__) +#else + #define PRINTF(...) +#endif + +#define FS_INITIAL 0x01 + +void +initialise_board () +{ + PRINTF("Initialize board corev32 \n"); + + //enable FP operations + CSR_SET_BITS(CSR_REG_MSTATUS, (FS_INITIAL << 13)); + +} + +void __attribute__ ((noinline)) __attribute__ ((externally_visible)) +start_trigger () +{ + PRINTF("start of test \n"); + + // Enable mcycle counter and read value + CSR_CLEAR_BITS(CSR_REG_MCOUNTINHIBIT, 0x1); + CSR_WRITE(CSR_REG_MCYCLE, 0); + +} + +void __attribute__ ((noinline)) __attribute__ ((externally_visible)) +stop_trigger () +{ + uint32_t cycle_cnt; + CSR_READ(CSR_REG_MCYCLE, &cycle_cnt); + PRINTF("end of test \n"); + PRINTF("Result is given in CPU cycles \n"); + PRINTF("RES: %d \n", cycle_cnt); + +} + diff --git a/sw/applications/minver/chipsupport.h b/sw/applications/minver/chipsupport.h new file mode 100644 index 000000000..bd174c315 --- /dev/null +++ b/sw/applications/minver/chipsupport.h @@ -0,0 +1,25 @@ +/* +** +** Copyright 2020 OpenHW Group +** +** Licensed under the Solderpad Hardware Licence, Version 2.0 (the "License"); +** you may not use this file except in compliance with the License. +** You may obtain a copy of the License at +** +** https://solderpad.org/licenses/ +** +** Unless required by applicable law or agreed to in writing, software +** distributed under the License is distributed on an "AS IS" BASIS, +** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +** See the License for the specific language governing permissions and +** limitations under the License. +** +******************************************************************************* +*/ + +#ifndef CHIPSUPPORT_H +#define CHIPSUPPORT_H + +#define CPU_MHZ 1 + +#endif diff --git a/sw/applications/minver/libminver.c b/sw/applications/minver/libminver.c new file mode 100644 index 000000000..0cabd55bf --- /dev/null +++ b/sw/applications/minver/libminver.c @@ -0,0 +1,292 @@ +/* BEEBS minver benchmark + + This version, copyright (C) 2014-2019 Embecosm Limited and University of + Bristol + + Contributor Pierre Langlois + Contributor Jeremy Bennett + + This file is part of Embench and was formerly part of the Bristol/Embecosm + Embedded Benchmark Suite. + + SPDX-License-Identifier: GPL-3.0-or-later + + ************************************************************************* + * * + * SNU-RT Benchmark Suite for Worst Case Timing Analysis * + * ===================================================== * + * Collected and Modified by S.-S. Lim * + * sslim@archi.snu.ac.kr * + * Real-Time Research Group * + * Seoul National University * + * * + * * + * < Features > - restrictions for our experimental environment * + * * + * 1. Completely structured. * + * - There are no unconditional jumps. * + * - There are no exit from loop bodies. * + * (There are no 'break' or 'return' in loop bodies) * + * 2. No 'switch' statements. * + * 3. No 'do..while' statements. * + * 4. Expressions are restricted. * + * - There are no multiple expressions joined by 'or', * + * 'and' operations. * + * 5. No library calls. * + * - All the functions needed are implemented in the * + * source file. * + * * + * * + ************************************************************************* + * * + * FILE: minver.c * + * SOURCE : Turbo C Programming for Engineering by Hyun Soo Ahn * + * * + * DESCRIPTION : * + * * + * Matrix inversion for 3x3 floating point matrix. * + * * + * REMARK : * + * * + * EXECUTION TIME : * + * * + * * + ************************************************************************* + +*/ + +#include +#include +#include "support.h" + +/* This scale factor will be changed to equalise the runtime of the + benchmarks. */ +#define LOCAL_SCALE_FACTOR 555 + +int minver (int row, int col, float eps); +int mmul (int row_a, int col_a, int row_b, int col_b); + +static float a_ref[3][3] = { + {3.0, -6.0, 7.0}, + {9.0, 0.0, -5.0}, + {5.0, -8.0, 6.0}, +}; + +static float b[3][3] = { + {-3.0, 0.0, 2.0}, + {3.0, -2.0, 0.0}, + {0.0, 2.0, -3.0}, +}; + +static float a[3][3], c[3][3], d[3][3], det; + +static float +minver_fabs (float n) +{ + float f; + + if (n >= 0) + f = n; + else + f = -n; + return f; +} + +int +mmul (int row_a, int col_a, int row_b, int col_b) +{ + int i, j, k, row_c, col_c; + float w; + + row_c = row_a; + col_c = col_b; + + if (row_c < 1 || row_b < 1 || col_c < 1 || col_a != row_b) + return (999); + for (i = 0; i < row_c; i++) + { + for (j = 0; j < col_c; j++) + { + w = 0.0; + for (k = 0; k < row_b; k++) + w += a[i][k] * b[k][j]; + c[i][j] = w; + } + } + + return (0); +} + + +int +minver (int row, int col, float eps) +{ + int work[500], i, j, k, r, iw, u, v; + float w, wmax, pivot, api, w1; + + r = w = 0; + if (row < 2 || row > 500 || eps <= 0.0) + return (999); + w1 = 1.0; + for (i = 0; i < row; i++) + work[i] = i; + for (k = 0; k < row; k++) + { + wmax = 0.0; + for (i = k; i < row; i++) + { + w = minver_fabs (a[i][k]); + if (w > wmax) + { + wmax = w; + r = i; + } + } + pivot = a[r][k]; + api = minver_fabs (pivot); + if (api <= eps) + { + det = w1; + return (1); + } + w1 *= pivot; + u = k * col; + v = r * col; + if (r != k) + { + w1 = -w; + iw = work[k]; + work[k] = work[r]; + work[r] = iw; + for (j = 0; j < row; j++) + { + w = a[k][j]; + a[k][j] = a[r][j]; + a[r][j] = w; + } + } + for (i = 0; i < row; i++) + a[k][i] /= pivot; + for (i = 0; i < row; i++) + { + if (i != k) + { + v = i * col; + w = a[i][k]; + if (w != 0.0) + { + for (j = 0; j < row; j++) + if (j != k) + a[i][j] -= w * a[k][j]; + a[i][k] = -w / pivot; + } + } + } + a[k][k] = 1.0 / pivot; + } + + for (i = 0; i < row; i++) + { + while (1) + { + k = work[i]; + if (k == i) + break; + iw = work[k]; + work[k] = work[i]; + work[i] = iw; + for (j = 0; j < row; j++) + { + u = j * col; + w = a[k][i]; + a[k][i] = a[k][k]; + a[k][k] = w; + } + } + } + + det = w1; + + return (0); +} + + +int +verify_benchmark (int res __attribute ((unused))) +{ + int i, j; + float eps = 1.0e-6; + + static float c_exp[3][3] = { + {-27.0, 26.0, -15.0}, + {-27.0, -10.0, 33.0}, + {-39.0, 28.0, -8.0} + }; + + static float d_exp[3][3] = { + {0.133333325, -0.199999958, 0.2666665910}, + {-0.519999862, 0.113333330, 0.5266665220}, + {0.479999840, -0.359999895, 0.0399999917} + }; + + /* Allow small errors in floating point */ + + for (i = 0; i < 3; i++) + for (j = 0; j < 3; j++) + if (float_neq_beebs(c[i][j], c_exp[i][j]) || float_neq_beebs(d[i][j], d_exp[i][j])) + return 0; + + return float_eq_beebs(det, -16.6666718); +} + + +void +initialise_benchmark (void) +{ +} + + +static int benchmark_body (int rpt); + +void +warm_caches (int heat) +{ + int res = benchmark_body (heat); + + return; +} + + +int +benchmark (void) +{ + return benchmark_body (LOCAL_SCALE_FACTOR * 1); +} + + +static int __attribute__ ((noinline)) +benchmark_body (int rpt) +{ + int i; + + for (i = 0; i < rpt; i++) + { + float eps = 1.0e-6; + + memcpy (a, a_ref, 3 * 3 * sizeof (a[0][0])); + minver (3, 3, eps); + memcpy (d, a, 3 * 3 * sizeof (a[0][0])); + memcpy (a, a_ref, 3 * 3 * sizeof (a[0][0])); + mmul (3, 3, 3, 3); + } + + return 0; +} + + +/* + Local Variables: + mode: C + c-file-style: "gnu" + End: +*/ diff --git a/sw/applications/minver/main.c b/sw/applications/minver/main.c new file mode 100644 index 000000000..645661013 --- /dev/null +++ b/sw/applications/minver/main.c @@ -0,0 +1,50 @@ +/* Common main.c for the benchmarks + + Copyright (C) 2014 Embecosm Limited and University of Bristol + Copyright (C) 2018-2019 Embecosm Limited + + Contributor: James Pallister + Contributor: Jeremy Bennett + + This file is part of Embench and was formerly part of the Bristol/Embecosm + Embedded Benchmark Suite. + + SPDX-License-Identifier: GPL-3.0-or-later */ + +#include "csr.h" +#include "x-heep.h" + +#include "support.h" + + +int __attribute__ ((used)) +main (int argc __attribute__ ((unused)), + char *argv[] __attribute__ ((unused))) +{ + int i; + volatile int result; + int correct; + + initialise_board (); + initialise_benchmark (); + warm_caches (1); + + start_trigger (); + result = benchmark (); + stop_trigger (); + + /* bmarks that use arrays will check a global array rather than int result */ + + correct = verify_benchmark (result); + + return (!correct); + +} /* main () */ + + +/* + Local Variables: + mode: C + c-file-style: "gnu" + End: +*/ diff --git a/sw/applications/minver/support.h b/sw/applications/minver/support.h new file mode 100644 index 000000000..b68b81e66 --- /dev/null +++ b/sw/applications/minver/support.h @@ -0,0 +1,72 @@ +/* Support header for BEEBS. + + Copyright (C) 2014 Embecosm Limited and the University of Bristol + Copyright (C) 2019 Embecosm Limited + + Contributor James Pallister + + Contributor Jeremy Bennett + + This file is part of Embench and was formerly part of the Bristol/Embecosm + Embedded Benchmark Suite. + + SPDX-License-Identifier: GPL-3.0-or-later */ + +#ifndef SUPPORT_H +#define SUPPORT_H + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +/* Include board support header if we have one */ + +#ifdef HAVE_BOARDSUPPORT_H +#include "boardsupport.h" +#endif + +/* Benchmarks must implement verify_benchmark, which must return -1 if no + verification is done. */ + +int verify_benchmark (int result); + +/* Standard functions implemented for each board */ + +void initialise_board (void); +void start_trigger (void); +void stop_trigger (void); + +/* Every benchmark implements this for one-off data initialization. This is + only used for initialization that is independent of how often benchmark () + is called. */ + +void initialise_benchmark (void); + +/* Every benchmark implements this for cache warm up, typically calling + benchmark several times. The argument controls how much warming up is + done, with 0 meaning no warming. */ + +void warm_caches (int temperature); + +/* Every benchmark implements this as its entry point. Don't allow it to be + inlined! */ + +int benchmark (void) __attribute__ ((noinline)); + +/* Every benchmark must implement this to validate the result of the + benchmark. */ + +int verify_benchmark (int res); + +/* Local simplified versions of library functions */ + +#include "beebsc.h" + +#endif /* SUPPORT_H */ + +/* + Local Variables: + mode: C + c-file-style: "gnu" + End: +*/ diff --git a/sw/cmake/riscv.cmake b/sw/cmake/riscv.cmake index 5c9d070d1..972710f5f 100644 --- a/sw/cmake/riscv.cmake +++ b/sw/cmake/riscv.cmake @@ -43,7 +43,7 @@ get_filename_component(RISCV_TOOLCHAIN_BIN_EXT ${RISCV_GCC_COMPILER} EXT) #message( "RISC-V GCC Path: ${RISCV_TOOLCHAIN_BIN_PATH}" ) -STRING(REGEX REPLACE "\-gcc" "-" GCC_CROSS_COMPILE ${RISCV_GCC_COMPILER}) +STRING(REGEX REPLACE "\-gcc$" "-" GCC_CROSS_COMPILE ${RISCV_GCC_COMPILER}) if ($ENV{COMPILER} MATCHES "clang") STRING(REGEX REPLACE "clang" "" CLANG_CROSS_COMPILE ${RISCV_CLANG_COMPILER}) endif() diff --git a/sw/device/bsp/w25q/w25q.c b/sw/device/bsp/w25q/w25q.c index 8a86b4730..e8fdef7e5 100644 --- a/sw/device/bsp/w25q/w25q.c +++ b/sw/device/bsp/w25q/w25q.c @@ -29,6 +29,8 @@ /* MODULES USED */ /** **/ /****************************************************************************/ +#include "string.h" + #include "w25q128jw.h" /* To manage addresses. */ @@ -70,7 +72,7 @@ /** * @bref If the target is the FPGA, use the SPI FLASH. */ -#ifdef TARGET_PYNQ_Z2 +#ifndef TARGET_SIM #define USE_SPI_FLASH #endif @@ -266,9 +268,9 @@ w25q_error_codes_t w25q128jw_init(spi_host_t spi_host) { flash_power_up(); // Set QE bit (only FPGA, simulation do not support status registers at all) - #ifdef TARGET_PYNQ_Z2 + #ifndef TARGET_SIM if (set_QE_bit() == FLASH_ERROR) return FLASH_ERROR; // Error occurred while setting QE bit - #endif // TARGET_PYNQ_Z2 + #endif // TARGET_SIM return FLASH_OK; // Success } @@ -408,9 +410,9 @@ w25q_error_codes_t w25q128jw_erase_and_write_standard(uint32_t addr, void* data, if (status != FLASH_OK) return FLASH_ERROR; // Erase the sector (no need to do so in simulation) - #ifdef TARGET_PYNQ_Z2 + #ifndef TARGET_SIM w25q128jw_4k_erase(sector_start_addr); - #endif // TARGET_PYNQ_Z2 + #endif // TARGET_SIM // Calculate the length of data to write in this sector uint32_t write_length = MIN(FLASH_SECTOR_SIZE - (current_addr - sector_start_addr), remaining_length); @@ -549,9 +551,9 @@ w25q_error_codes_t w25q128jw_erase_and_write_standard_dma(uint32_t addr, void* d if (status != FLASH_OK) return FLASH_ERROR; // Erase the sector (no need to do so in simulation) - #ifdef TARGET_PYNQ_Z2 + #ifndef TARGET_SIM w25q128jw_4k_erase(sector_start_addr); - #endif // TARGET_PYNQ_Z2 + #endif // TARGET_SIM // Calculate the length of data to write in this sector uint32_t write_length = MIN(FLASH_SECTOR_SIZE - (current_addr - sector_start_addr), remaining_length); @@ -607,7 +609,7 @@ w25q_error_codes_t w25q128jw_read_quad(uint32_t addr, void *data, uint32_t lengt // Quad read requires dummy clocks const uint32_t dummy_clocks_cmd = spi_create_command((spi_command_t){ - #ifdef TARGET_PYNQ_Z2 + #ifndef TARGET_SIM .len = DUMMY_CLOCKS_FAST_READ_QUAD_IO-1, #else .len = DUMMY_CLOCKS_SIM-1, @@ -697,9 +699,9 @@ w25q_error_codes_t w25q128jw_erase_and_write_quad(uint32_t addr, void *data, uin if (status != FLASH_OK) return FLASH_ERROR; // Erase the sector (no need to do so in simulation) - #ifdef TARGET_PYNQ_Z2 + #ifndef TARGET_SIM w25q128jw_4k_erase(sector_start_addr); - #endif // TARGET_PYNQ_Z2 + #endif // TARGET_SIM // Calculate the length of data to write in this sector uint32_t write_length = MIN(FLASH_SECTOR_SIZE - (current_addr - sector_start_addr), remaining_length); @@ -755,7 +757,7 @@ w25q_error_codes_t w25q128jw_read_quad_dma(uint32_t addr, void *data, uint32_t l // Quad read requires dummy clocks const uint32_t dummy_clocks_cmd = spi_create_command((spi_command_t){ - #ifdef TARGET_PYNQ_Z2 + #ifndef TARGET_SIM .len = DUMMY_CLOCKS_FAST_READ_QUAD_IO-1, // W25Q128JW flash needs 4 dummy cycles #else .len = DUMMY_CLOCKS_SIM-1, // SPI flash simulation model needs 8 dummy cycles @@ -863,9 +865,9 @@ w25q_error_codes_t w25q128jw_erase_and_write_quad_dma(uint32_t addr, void *data, if (status != FLASH_OK) return FLASH_ERROR; // Erase the sector (no need to do so in simulation) - #ifdef TARGET_PYNQ_Z2 + #ifndef TARGET_SIM w25q128jw_4k_erase(sector_start_addr); - #endif // TARGET_PYNQ_Z2 + #endif // TARGET_SIM // Calculate the length of data to write in this sector uint32_t write_length = MIN(FLASH_SECTOR_SIZE - (current_addr - sector_start_addr), remaining_length); @@ -887,7 +889,7 @@ w25q_error_codes_t w25q128jw_erase_and_write_quad_dma(uint32_t addr, void *data, } -void w25q128jw_4k_erase(uint32_t addr) { +w25q_error_codes_t w25q128jw_4k_erase(uint32_t addr) { // Sanity checks if (addr > MAX_FLASH_ADDR || addr < 0) return FLASH_ERROR; @@ -914,7 +916,7 @@ void w25q128jw_4k_erase(uint32_t addr) { flash_wait(); } -void w25q128jw_32k_erase(uint32_t addr) { +w25q_error_codes_t w25q128jw_32k_erase(uint32_t addr) { // Sanity checks if (addr > 0x00ffffff || addr < 0) return FLASH_ERROR; @@ -941,7 +943,7 @@ void w25q128jw_32k_erase(uint32_t addr) { flash_wait(); } -void w25q128jw_64k_erase(uint32_t addr) { +w25q_error_codes_t w25q128jw_64k_erase(uint32_t addr) { // Sanity checks if (addr > 0x00ffffff || addr < 0) return FLASH_ERROR; @@ -1173,7 +1175,7 @@ static void flash_wait(void) { spi_set_command(&spi, spi_status_read_cmd); spi_wait_for_ready(&spi); spi_wait_for_rx_watermark(&spi); - spi_read_word(&spi, &flash_resp[0]); + spi_read_word(&spi, (uint32_t *)flash_resp); if ((flash_resp[0] & 0x01) == 0) flash_busy = false; } } @@ -1218,9 +1220,9 @@ w25q_error_codes_t erase_and_write(uint32_t addr, uint8_t *data, uint32_t length if (status != FLASH_OK) return FLASH_ERROR; // Erase the sector (no need to do so in simulation) - #ifdef TARGET_PYNQ_Z2 + #ifndef TARGET_SIM w25q128jw_4k_erase(sector_start_addr); - #endif // TARGET_PYNQ_Z2 + #endif // TARGET_SIM // Calculate the length of data to write in this sector uint32_t write_length = MIN(FLASH_SECTOR_SIZE - (current_addr - sector_start_addr), remaining_length); @@ -1342,9 +1344,9 @@ static w25q_error_codes_t page_write(uint32_t addr, uint8_t *data, uint32_t leng spi_wait_for_ready(&spi); // Wait for flash to be ready again (FPGA only) - #ifdef TARGET_PYNQ_Z2 + #ifndef TARGET_SIM flash_wait(); - #endif // TARGET_PYNQ_Z2 + #endif // TARGET_SIM } static w25q_error_codes_t dma_send_toflash(uint8_t *data, uint32_t length) { @@ -1444,4 +1446,4 @@ static w25q_error_codes_t w25q128jw_sanity_checks(uint32_t addr, uint8_t *data, /** **/ /* EOF */ /** **/ -/****************************************************************************/ \ No newline at end of file +/****************************************************************************/ diff --git a/sw/device/bsp/w25q/w25q128jw.h b/sw/device/bsp/w25q/w25q128jw.h index 10e0f4786..c6f29e9c0 100644 --- a/sw/device/bsp/w25q/w25q128jw.h +++ b/sw/device/bsp/w25q/w25q128jw.h @@ -357,7 +357,7 @@ w25q_error_codes_t w25q128jw_erase_and_write_quad_dma(uint32_t addr, void* data, * * @param addr 24-bit address of the sector to erase. */ -void w25q128jw_4k_erase(uint32_t addr); +w25q_error_codes_t w25q128jw_4k_erase(uint32_t addr); /** * @brief Erase a 32kb block. @@ -367,7 +367,7 @@ void w25q128jw_4k_erase(uint32_t addr); * * @param addr 24-bit address of the block to erase. */ -void w25q128jw_32k_erase(uint32_t addr); +w25q_error_codes_t w25q128jw_32k_erase(uint32_t addr); /** * @brief Erase a 64kb block. @@ -377,7 +377,7 @@ void w25q128jw_32k_erase(uint32_t addr); * * @param addr 24-bit address of the block to erase. */ -void w25q128jw_64k_erase(uint32_t addr); +w25q_error_codes_t w25q128jw_64k_erase(uint32_t addr); /** * @brief Erase the entire chip. @@ -430,4 +430,4 @@ void w25q128jw_power_down(void); /** **/ /** EOF **/ /** **/ -/****************************************************************************/ \ No newline at end of file +/****************************************************************************/ diff --git a/sw/device/lib/crt/crt0.S b/sw/device/lib/crt/crt0.S.tpl similarity index 83% rename from sw/device/lib/crt/crt0.S rename to sw/device/lib/crt/crt0.S.tpl index 82776471d..8b8ccfb79 100644 --- a/sw/device/lib/crt/crt0.S +++ b/sw/device/lib/crt/crt0.S.tpl @@ -72,36 +72,25 @@ _start: sub a0,a0,s2 call w25q128jw_read_standard -_load_data_section: +% for i, section in enumerate(xheep.iter_linker_sections()): +% if section.name != "code": +_load_${section.name}_section: // src ptr - la a0, _lma_data_start + la a0, _lma_${section.name}_start // dst ptr - la a1, __data_start + la a1, __${section.name}_start // copy size in bytes - la a2, _lma_data_end + la a2, _lma_${section.name}_end sub a2, a2, a0 - bltz a2, _load_data_interleaved_section // dont do anything if you do not have data + bltz a2, _load_${section.name}_section_end // dont do anything if you do not have something in ${section.name} sub a0,a0,s2 call w25q128jw_read_standard +_load_${section.name}_section_end: - -_load_data_interleaved_section: - #ifdef HAS_MEMORY_BANKS_IL - // src ptr - la a0, _lma_data_interleaved_start - // dst ptr - la a1, __data_interleaved_start - // copy size in bytes - la a2, _lma_data_interleaved_end - sub a2, a2, a0 - - bltz a2, _init_bss // dont do anything if you do not have interleaved data - - sub a0,a0,s2 - call w25q128jw_read_standard - #endif +% endif +% endfor #endif diff --git a/sw/device/lib/drivers/dma/dma.c b/sw/device/lib/drivers/dma/dma.c index abb13cfb8..6e9a2ef78 100644 --- a/sw/device/lib/drivers/dma/dma.c +++ b/sw/device/lib/drivers/dma/dma.c @@ -165,12 +165,32 @@ static inline uint8_t get_misalignment_b( uint8_t *p_ptr, * @retval 1 There is an outbound. * @retval 0 There is NOT an outbound. */ -static inline uint8_t is_region_outbound( uint8_t *p_start, +static inline uint8_t is_region_outbound_1D( uint8_t *p_start, uint8_t *p_end, uint32_t p_type, uint32_t p_size_du, uint32_t p_inc_du ); +/** + * @brief Determines whether a given region will fit before the end of an + * environment with a 2D transaction. + * @param p_start Pointer to the beginning of the region. + * @param p_end Pointer to the last byte of the environment. + * @param p_type The data type to be transferred. + * @param p_size_du The number of data units to be transferred. Must be + * non-zero. + * @param p_inc_du The size in data units of each increment. + * @retval 1 There is an outbound. + * @retval 0 There is NOT an outbound. + */ +static inline uint8_t is_region_outbound_2D( uint8_t *p_start, + uint8_t *p_end, + uint32_t p_type, + uint32_t p_size_d1_du, + uint32_t p_size_d2_du, + uint32_t p_inc_d1_du, + uint32_t p_inc_d2_du ); + /** * @brief Writes a given value into the specified register. Its operation * mimics that of bitfield_field32_write(), but does not require the use of @@ -190,11 +210,18 @@ static inline void write_register( uint32_t p_val, /** - * @brief Analyzes a target to determine the size of its increment (in bytes). + * @brief Analyzes a target to determine the size of its D1 increment (in bytes). * @param p_tgt A pointer to the target to analyze. * @return The number of bytes of the increment. */ -static inline uint32_t get_increment_b( dma_target_t * p_tgt ); +static inline uint32_t get_increment_b_1D( dma_target_t * p_tgt ); + +/** + * @brief Analyzes a target to determine the size of its D2 increment (in bytes). + * @param p_tgt A pointer to the target to analyze. + * @return The number of bytes of the increment. + */ +static inline uint32_t get_increment_b_2D( dma_target_t * p_tgt ); /****************************************************************************/ @@ -273,15 +300,28 @@ void dma_init( dma *peri ) /* Clear the loaded transaction */ dma_cb.trans = NULL; /* Clear all values in the DMA registers. */ - dma_cb.peri->SRC_PTR = 0; - dma_cb.peri->DST_PTR = 0; - dma_cb.peri->SIZE = 0; - dma_cb.peri->PTR_INC = 0; - dma_cb.peri->SLOT = 0; - dma_cb.peri->DATA_TYPE = 0; - dma_cb.peri->MODE = 0; - dma_cb.peri->WINDOW_SIZE = 0; - dma_cb.peri->INTERRUPT_EN = 0; + dma_cb.peri->SRC_PTR = 0; + dma_cb.peri->DST_PTR = 0; + dma_cb.peri->ADDR_PTR = 0; + dma_cb.peri->SIZE_D1 = 0; + dma_cb.peri->SIZE_D2 = 0; + dma_cb.peri->SRC_PTR_INC_D1 = 0; + dma_cb.peri->SRC_PTR_INC_D2 = 0; + dma_cb.peri->DST_PTR_INC_D1 = 0; + dma_cb.peri->DST_PTR_INC_D2 = 0; + dma_cb.peri->DIM_CONFIG = 0; + dma_cb.peri->SLOT = 0; + dma_cb.peri->SRC_DATA_TYPE = 0; + dma_cb.peri->DST_DATA_TYPE = 0; + dma_cb.peri->SIGN_EXT = 0; + dma_cb.peri->MODE = 0; + dma_cb.peri->WINDOW_SIZE = 0; + dma_cb.peri->INTERRUPT_EN = 0; + dma_cb.peri->PAD_TOP = 0; + dma_cb.peri->PAD_BOTTOM = 0; + dma_cb.peri->PAD_LEFT = 0; + dma_cb.peri->PAD_RIGHT = 0; + dma_cb.peri->DIM_INV = 0; } dma_config_flags_t dma_validate_transaction( dma_trans_t *p_trans, @@ -308,6 +348,14 @@ dma_config_flags_t dma_validate_transaction( dma_trans_t *p_trans, /* The checks request should be a valid request. */ DMA_STATIC_ASSERT( p_check < DMA_PERFORM_CHECKS__size, "Check request not valid"); + /* The padding should be a valid number */ + DMA_STATIC_ASSERT( ((p_trans->pad_top_du >= 0 && p_trans->pad_top_du < 64) && + (p_trans->pad_bottom_du >= 0 && p_trans->pad_bottom_du < 64) && + (p_trans->pad_left_du >= 0 && p_trans->pad_left_du < 64) && + (p_trans->pad_right_du >= 0 && p_trans->pad_right_du < 64)), + "Padding not valid"); + /* The dimensionality should be valid*/ + DMA_STATIC_ASSERT( p_trans->dim < DMA_DIM_CONF__size, "Dimensionality not valid"); /* * CHECK IF TARGETS HAVE ERRORS @@ -335,6 +383,43 @@ dma_config_flags_t dma_validate_transaction( dma_trans_t *p_trans, return p_trans->flags; } + /* + * CHECK IF THERE ARE INCREMENTS INCONSISTENCIES + */ + + /* + * A transaction is considered 2D if the source and/or the destination has a 2D increment. + * e.g. It's possible to copy a 1x9 matrix to a 3x3 matrix or to copy a 3x3 matrix to a 1x9 one. + */ + + if (p_check) + { + // If the transaction is 2D, check that the D2 increment of the targets are non zero. + // If the transaction is 1D, check that the D2 increment of the targets are zero. + if((p_trans->dim == DMA_DIM_CONF_2D && (p_trans->src->inc_d2_du == 0 || p_trans->dst->inc_d2_du == 0)) || + (p_trans->dim == DMA_DIM_CONF_1D && (p_trans->src->inc_d2_du != 0 || p_trans->dst->inc_d2_du != 0))) + { + p_trans->flags |= DMA_CONFIG_INCOMPATIBLE; + p_trans->flags |= DMA_CONFIG_CRITICAL_ERROR; + return p_trans->flags; + } + } + + /* + * CHECK IF THERE ARE PADDING INCONSISTENCIES + */ + + if (p_check) + { + // If the transaction is 1D, check that the top and bottom paddings are set to zero. + if((p_trans->dim == DMA_DIM_CONF_1D && (p_trans->pad_top_du != 0 || p_trans->pad_bottom_du != 0))) + { + p_trans->flags |= DMA_CONFIG_INCOMPATIBLE; + p_trans->flags |= DMA_CONFIG_CRITICAL_ERROR; + return p_trans->flags; + } + } + /* * CHECK IF THERE ARE TRIGGER INCONSISTENCIES */ @@ -387,12 +472,16 @@ dma_config_flags_t dma_validate_transaction( dma_trans_t *p_trans, /* The flags are cleaned in case the structure was used before.*/ p_trans->flags = DMA_CONFIG_OK; + /* The copy size of the source (in data units -of the source-) is transformed to bytes, to be used as default size.*/ - uint8_t dataSize_b = DMA_DATA_TYPE_2_SIZE(p_trans->src->type); + uint8_t dataSize_b = DMA_DATA_TYPE_2_SIZE(p_trans->dst->type); p_trans->size_b = p_trans->src->size_du * dataSize_b; - /* By default, the source defines the data type.*/ - p_trans->type = p_trans->src->type; + p_trans->size_d2_b = p_trans->src->size_d2_du * dataSize_b; + + p_trans->src_type = p_trans->src->type; + p_trans->dst_type = p_trans->dst->type; + /* * By default, the transaction increment is set to 0 and, if required, * it will be changed to 1 (in which case both src and dst will have an @@ -416,12 +505,12 @@ dma_config_flags_t dma_validate_transaction( dma_trans_t *p_trans, if( p_trans->src->trig == DMA_TRIG_MEMORY ) { - misalignment = get_misalignment_b( p_trans->src->ptr, p_trans->type ); + misalignment = get_misalignment_b( p_trans->src->ptr, p_trans->src_type ); } if( p_trans->dst->trig == DMA_TRIG_MEMORY ) { - dstMisalignment = get_misalignment_b( p_trans->dst->ptr, p_trans->type ); + dstMisalignment = get_misalignment_b( p_trans->dst->ptr, p_trans->dst_type ); } p_trans->flags |= ( misalignment ? DMA_CONFIG_SRC : DMA_CONFIG_OK ); @@ -508,26 +597,26 @@ dma_config_flags_t dma_validate_transaction( dma_trans_t *p_trans, * a more granular data type is used according to the detected * misalignment in order to overcome it. */ - p_trans->type += misalignment; + p_trans->dst_type += misalignment; /* * Source and destination increment should now be of the size * of the data. * As increments are given in bytes, in both cases should be the * size of a data unit. */ - p_trans->inc_b = DMA_DATA_TYPE_2_SIZE( p_trans->type ); + p_trans->inc_b = DMA_DATA_TYPE_2_SIZE( p_trans->dst_type ); /* The copy size does not change, as it is already stored in bytes.*/ } /* - * CHECK IF SOURCE HAS SIZE 0 + * CHECK IF SOURCE HAS ZERO SIZE(s) */ /* * No further operations are done to prevent corrupting information * that could be useful for debugging purposes. */ - if( p_trans->src->size_du == 0 ) + if(p_trans->src->size_du == 0 || (p_trans->dim == DMA_DIM_CONF_2D && p_trans->src->size_d2_du == 0)) { p_trans->flags |= DMA_CONFIG_SRC; p_trans->flags |= DMA_CONFIG_CRITICAL_ERROR; @@ -562,21 +651,24 @@ dma_config_flags_t dma_validate_transaction( dma_trans_t *p_trans, * No further operations are done to prevent corrupting information * that could be useful for debugging purposes. */ - uint8_t isEnv = p_trans->dst->env; - uint8_t isOutb = is_region_outbound( - p_trans->dst->ptr, - p_trans->dst->env->end, - p_trans->type, - p_trans->src->size_du, - p_trans->dst->inc_du ); - if( isEnv && isOutb ) - { - p_trans->flags |= DMA_CONFIG_DST; - p_trans->flags |= DMA_CONFIG_OUTBOUNDS; - p_trans->flags |= DMA_CONFIG_CRITICAL_ERROR; - return p_trans->flags; - } + uint8_t isEnv = (p_trans->dst->env != NULL); + + if(isEnv) { + uint8_t isOutb = is_region_outbound_1D( + p_trans->dst->ptr, + p_trans->dst->env->end, + p_trans->dst_type, + p_trans->src->size_du, + p_trans->dst->inc_du ); + if( isOutb ) + { + p_trans->flags |= DMA_CONFIG_DST; + p_trans->flags |= DMA_CONFIG_OUTBOUNDS; + p_trans->flags |= DMA_CONFIG_CRITICAL_ERROR; + return p_trans->flags; + } + } // @ToDo: It should also be checked that the destination is behind the // source if there will be overlap. // @ToDo: Consider if (when a destination target has no environment) @@ -672,24 +764,73 @@ dma_config_flags_t dma_load_transaction( dma_trans_t *p_trans ) if( dma_cb.trans->end != DMA_TRANS_END_POLLING ) { - /* Enable global interrupt for machine-level interrupts. */ + /* Enable global interrupt. */ CSR_SET_BITS(CSR_REG_MSTATUS, 0x8 ); - /* @ToDo: What does this do? */ + /* Enable machine-level fast interrupt. */ CSR_SET_BITS(CSR_REG_MIE, DMA_CSR_REG_MIE_MASK ); - dma_cb.peri->INTERRUPT_EN |= INTR_EN_TRANS_DONE; + write_register( + 0x1, + DMA_INTERRUPT_EN_REG_OFFSET, + 0xffff, + DMA_INTERRUPT_EN_TRANSACTION_DONE_BIT + ); /* Only if a window is used should the window interrupt be set. */ if( p_trans->win_du > 0 ) { - dma_cb.peri->INTERRUPT_EN |= INTR_EN_WINDOW_DONE; + write_register( + 0x1, + DMA_INTERRUPT_EN_REG_OFFSET, + 0xffff, + DMA_INTERRUPT_EN_WINDOW_DONE_BIT + ); } } + /* + * SET THE PADDING + */ + + /* + * In the case of a 1D transaction with padding enabled, the DMA has to be configured to treat + * the transaction as a 2D one with a second dimension of 1 du and a second dimension increment of 1 du. + */ + + if (p_trans->dim == DMA_DIM_CONF_1D && (p_trans->pad_left_du != 0 || p_trans->pad_right_du != 0)) + { + p_trans->dim = DMA_DIM_CONF_2D; + p_trans->size_d2_b = DMA_DATA_TYPE_2_SIZE( p_trans->dst_type ); + p_trans->src->inc_d2_du = DMA_DATA_TYPE_2_SIZE( p_trans->dst_type ); + } + + if (dma_cb.trans->pad_top_du != 0 || dma_cb.trans->pad_bottom_du != 0 || dma_cb.trans->pad_left_du != 0 || dma_cb.trans->pad_right_du != 0) + { + write_register( dma_cb.trans->pad_top_du * DMA_DATA_TYPE_2_SIZE( p_trans->dst_type ), + DMA_PAD_TOP_REG_OFFSET, + DMA_PAD_TOP_PAD_MASK, + DMA_PAD_TOP_PAD_OFFSET); + + write_register( dma_cb.trans->pad_bottom_du * DMA_DATA_TYPE_2_SIZE( p_trans->dst_type ), + DMA_PAD_BOTTOM_REG_OFFSET, + DMA_PAD_BOTTOM_PAD_MASK, + DMA_PAD_BOTTOM_PAD_OFFSET); + + write_register( dma_cb.trans->pad_left_du * DMA_DATA_TYPE_2_SIZE( p_trans->dst_type ), + DMA_PAD_LEFT_REG_OFFSET, + DMA_PAD_LEFT_PAD_MASK, + DMA_PAD_LEFT_PAD_OFFSET); + + write_register( dma_cb.trans->pad_right_du * DMA_DATA_TYPE_2_SIZE( p_trans->dst_type ), + DMA_PAD_RIGHT_REG_OFFSET, + DMA_PAD_RIGHT_PAD_MASK, + DMA_PAD_RIGHT_PAD_OFFSET); + } + /* * SET THE POINTERS */ - dma_cb.peri->SRC_PTR = dma_cb.trans->src->ptr; + dma_cb.peri->SRC_PTR = (uint32_t)dma_cb.trans->src->ptr; if(dma_cb.trans->mode != DMA_TRANS_MODE_ADDRESS) { @@ -698,13 +839,22 @@ dma_config_flags_t dma_load_transaction( dma_trans_t *p_trans ) otherwise the destination address is read in a separate port in parallel with the data from the address port */ - dma_cb.peri->DST_PTR = dma_cb.trans->dst->ptr; + dma_cb.peri->DST_PTR = (uint32_t)dma_cb.trans->dst->ptr; } else { - dma_cb.peri->ADDR_PTR = dma_cb.trans->src_addr->ptr; + dma_cb.peri->ADDR_PTR = (uint32_t)dma_cb.trans->src_addr->ptr; } + /* + * SET THE TRANSPOSITION MODE + */ + + write_register(dma_cb.trans->dim_inv, + DMA_DIM_INV_REG_OFFSET, + 0x1 << DMA_DIM_INV_SEL_BIT, + DMA_DIM_INV_SEL_BIT); + /* * SET THE INCREMENTS */ @@ -717,24 +867,38 @@ dma_config_flags_t dma_load_transaction( dma_trans_t *p_trans ) * In that case, a increment of 0 is necessary. * In case of DMA Address mode transaction, the dst pointer is ignored * as the values read from the second port are instead used. + * In case of a 2D DMA transaction, the second dimension increment is set. */ + + write_register( get_increment_b_1D( dma_cb.trans->src ), + DMA_SRC_PTR_INC_D1_REG_OFFSET, + DMA_SRC_PTR_INC_D1_INC_MASK, + DMA_SRC_PTR_INC_D1_INC_OFFSET ); - write_register( get_increment_b( dma_cb.trans->src ), - DMA_PTR_INC_REG_OFFSET, - DMA_PTR_INC_SRC_PTR_INC_MASK, - DMA_PTR_INC_SRC_PTR_INC_OFFSET ); - - + if(dma_cb.trans->dim == DMA_DIM_CONF_2D) + { + write_register( get_increment_b_2D( dma_cb.trans->src ), + DMA_SRC_PTR_INC_D2_REG_OFFSET, + DMA_SRC_PTR_INC_D2_INC_MASK, + DMA_SRC_PTR_INC_D2_INC_OFFSET ); + } if(dma_cb.trans->mode != DMA_TRANS_MODE_ADDRESS) { - write_register( get_increment_b( dma_cb.trans->dst ), - DMA_PTR_INC_REG_OFFSET, - DMA_PTR_INC_DST_PTR_INC_MASK, - DMA_PTR_INC_DST_PTR_INC_OFFSET ); + write_register( get_increment_b_1D( dma_cb.trans->dst ), + DMA_DST_PTR_INC_D1_REG_OFFSET, + DMA_DST_PTR_INC_D1_INC_MASK, + DMA_DST_PTR_INC_D1_INC_OFFSET ); + + if(dma_cb.trans->dim == DMA_DIM_CONF_2D) + { + write_register( get_increment_b_2D( dma_cb.trans->dst ), + DMA_DST_PTR_INC_D2_REG_OFFSET, + DMA_DST_PTR_INC_D2_INC_MASK, + DMA_DST_PTR_INC_D2_INC_OFFSET ); + } } - /* * SET THE OPERATION MODE AND WINDOW SIZE */ @@ -747,6 +911,23 @@ dma_config_flags_t dma_load_transaction( dma_trans_t *p_trans ) ? dma_cb.trans->win_du : dma_cb.trans->size_b; + /* + * SET THE DIMENSIONALITY + */ + write_register( dma_cb.trans->dim, + DMA_DIM_CONFIG_REG_OFFSET, + 0x1 << DMA_DIM_CONFIG_DMA_DIM_BIT, + DMA_DIM_CONFIG_DMA_DIM_BIT ); + + /* + * SET THE SIGN EXTENSION BIT + */ + write_register( dma_cb.trans->sign_ext, + DMA_SIGN_EXT_REG_OFFSET, + 0x1 << DMA_SIGN_EXT_SIGNED_BIT, + DMA_SIGN_EXT_SIGNED_BIT ); + + /* * SET TRIGGER SLOTS AND DATA TYPE */ @@ -760,15 +941,20 @@ dma_config_flags_t dma_load_transaction( dma_trans_t *p_trans ) DMA_SLOT_TX_TRIGGER_SLOT_MASK, DMA_SLOT_TX_TRIGGER_SLOT_OFFSET ); - write_register( dma_cb.trans->type, - DMA_DATA_TYPE_REG_OFFSET, - DMA_DATA_TYPE_DATA_TYPE_MASK, + write_register( dma_cb.trans->dst_type, + DMA_DST_DATA_TYPE_REG_OFFSET, + DMA_DST_DATA_TYPE_DATA_TYPE_MASK, + DMA_SELECTION_OFFSET_START ); + + write_register( dma_cb.trans->src_type, + DMA_SRC_DATA_TYPE_REG_OFFSET, + DMA_SRC_DATA_TYPE_DATA_TYPE_MASK, DMA_SELECTION_OFFSET_START ); return DMA_CONFIG_OK; } -dma_config_flags_t dma_launch( dma_trans_t *p_trans ) +dma_config_flags_t dma_launch( dma_trans_t *p_trans) { /* * Make sure that the loaded transaction is the intended transaction. @@ -803,22 +989,36 @@ dma_config_flags_t dma_launch( dma_trans_t *p_trans ) */ dma_cb.intrFlag = 0; - /* Load the size and start the transaction. */ - dma_cb.peri->SIZE = dma_cb.trans->size_b; + /* Load the size(s) and start the transaction. */ + if(dma_cb.trans->dim == DMA_DIM_CONF_2D) + { + write_register( dma_cb.trans->size_d2_b, + DMA_SIZE_D2_REG_OFFSET, + DMA_SIZE_D2_SIZE_MASK, + DMA_SIZE_D2_SIZE_OFFSET + ); + } + + write_register( dma_cb.trans->size_b, + DMA_SIZE_D1_REG_OFFSET, + DMA_SIZE_D1_SIZE_MASK, + DMA_SIZE_D1_SIZE_OFFSET + ); /* * If the end event was set to wait for the interrupt, the dma_launch * will not return until the interrupt arrives. */ + + while( p_trans->end == DMA_TRANS_END_INTR_WAIT - && ( dma_cb.intrFlag != 0 ) ) { // @ToDo: add a label for this 0 - wait_for_interrupt(); + && ( dma_cb.intrFlag != 0x0 ) ) { + wait_for_interrupt(); } return DMA_CONFIG_OK; } - __attribute__((optimize("O0"))) uint32_t dma_is_ready(void) { /* The transaction READY bit is read from the status register*/ @@ -901,14 +1101,19 @@ dma_config_flags_t validate_target( dma_target_t *p_tgt ) */ /* Increment can be 0 when a trigger is used. */ - DMA_STATIC_ASSERT( p_tgt->inc_du >= 0 , "Increment not valid"); + DMA_STATIC_ASSERT( p_tgt->inc_du >= 0 && p_tgt->inc_du < 64 , "Increment not valid"); + /* Increment on D2 has to be 0 for 1D operations */ + DMA_STATIC_ASSERT( p_tgt->inc_d2_du >= 0 && p_tgt->inc_d2_du < 4194304 , "Increment d2 not valid"); /* The size could be 0 if the target is only going to be used as a destination. */ - DMA_STATIC_ASSERT( p_tgt->size_du >= 0 , "Size not valid"); + DMA_STATIC_ASSERT( p_tgt->size_du >= 0 && p_tgt->size_du < 65536 , "Size not valid"); + /* The size can be 0 or 1 if the target is involved in a 1D padded transaction */ + DMA_STATIC_ASSERT( p_tgt->size_d2_du >= 0 && p_tgt->size_du < 65536 , "Size d2 not valid"); /* The data type must be a valid type */ - DMA_STATIC_ASSERT( p_tgt->type < DMA_DATA_TYPE__size , "Type not valid"); + DMA_STATIC_ASSERT( p_tgt->type < DMA_DATA_TYPE__size , "Source type not valid"); /* The trigger must be among the valid trigger values. */ DMA_STATIC_ASSERT( p_tgt->trig < DMA_TRIG__size , "Trigger not valid"); + /* * INTEGRITY CHECKS @@ -923,7 +1128,6 @@ dma_config_flags_t validate_target( dma_target_t *p_tgt ) { /* Check if the environment was properly formed.*/ flags |= validate_environment( p_tgt->env ); - /* * Check if the target selected size goes beyond the boundaries of * the environment. @@ -931,7 +1135,7 @@ dma_config_flags_t validate_target( dma_target_t *p_tgt ) */ if( p_tgt->size_du != 0 ) { - uint8_t isOutb = is_region_outbound( p_tgt->ptr, + uint8_t isOutb = is_region_outbound_1D( p_tgt->ptr, p_tgt->env->end, p_tgt->type, p_tgt->size_du, @@ -941,6 +1145,21 @@ dma_config_flags_t validate_target( dma_target_t *p_tgt ) flags |= DMA_CONFIG_OUTBOUNDS; } } + /* Do the same but for 2D case */ + if( p_tgt->size_d2_du != 0 ) + { + uint8_t isOutb = is_region_outbound_2D( p_tgt->ptr, + p_tgt->env->end, + p_tgt->type, + p_tgt->size_du, + p_tgt->size_d2_du, + p_tgt->inc_du, + p_tgt->inc_d2_du); + if( isOutb ) + { + flags |= DMA_CONFIG_OUTBOUNDS; + } + } /* Check if the target starts before the environment starts. */ uint8_t beforeEnv = ( p_tgt->ptr < p_tgt->env->start ); @@ -1071,7 +1290,7 @@ static inline uint8_t get_misalignment_b( uint8_t *p_ptr, return misalignment; } -static inline uint8_t is_region_outbound( uint8_t *p_start, +static inline uint8_t is_region_outbound_1D( uint8_t *p_start, uint8_t *p_end, uint32_t p_type, uint32_t p_size_du, @@ -1093,8 +1312,29 @@ static inline uint8_t is_region_outbound( uint8_t *p_start, */ uint32_t affectedUnits = ( p_size_du - 1 ) * p_inc_du + 1; uint32_t rangeSize = DMA_DATA_TYPE_2_SIZE(p_type) * affectedUnits; - uint32_t lasByteInsideRange = p_start + rangeSize -1; - return ( p_end < lasByteInsideRange ); + uint32_t lastByteInsideRange = (uint32_t)p_start + rangeSize -1; + return ( p_end < lastByteInsideRange ); + // Size is be guaranteed to be non-zero before calling this function. +} + +static inline uint8_t is_region_outbound_2D( uint8_t *p_start, + uint8_t *p_end, + uint32_t p_type, + uint32_t p_size_d1_du, + uint32_t p_size_d2_du, + uint32_t p_inc_d1_du, + uint32_t p_inc_d2_du ) +{ + /* + * If the environment ends before the last affected byte, then there is + * outbound writing and the function returns 1. + */ + + uint32_t affectedUnits = (( p_size_d1_du - 1 ) * p_inc_d1_du + 1) * (p_size_d2_du) + p_inc_d2_du * (p_size_d2_du - 1); + uint32_t rangeSize = DMA_DATA_TYPE_2_SIZE(p_type) * affectedUnits; + uint32_t lastByteInsideRange = (uint32_t)p_start + rangeSize -1; + return ( p_end < lastByteInsideRange ); + // Size is be guaranteed to be non-zero before calling this function. } @@ -1123,7 +1363,8 @@ static inline void write_register( uint32_t p_val, } -static inline uint32_t get_increment_b( dma_target_t * p_tgt ) + +static inline uint32_t get_increment_b_1D( dma_target_t * p_tgt ) { uint32_t inc_b = 0; /* If the target uses a trigger, the increment remains 0. */ @@ -1134,22 +1375,46 @@ static inline uint32_t get_increment_b( dma_target_t * p_tgt ) * misalignments), then that value is used (it's always set to 1). */ inc_b = dma_cb.trans->inc_b; - /* * Otherwise, the target-specific increment is used transformed into * bytes). */ if( inc_b == 0 ) { - uint8_t dataSize_b = DMA_DATA_TYPE_2_SIZE( dma_cb.trans->type ); + uint8_t dataSize_b = DMA_DATA_TYPE_2_SIZE( p_tgt->type ); inc_b = ( p_tgt->inc_du * dataSize_b ); } } return inc_b; } +static inline uint32_t get_increment_b_2D( dma_target_t * p_tgt ) +{ + uint32_t inc_b = 0; + /* If the target uses a trigger, the increment remains 0. */ + if( p_tgt->trig == DMA_TRIG_MEMORY ) + { + /* + * If the transaction increment has been overriden (due to + * misalignments), then that value is used (it's always set to 1). + */ + inc_b = dma_cb.trans->inc_b; + + /* + * Otherwise, the target-specific increment is used transformed into + * bytes). + */ + if( inc_b == 0 ) + { + uint8_t dataSize_b = DMA_DATA_TYPE_2_SIZE( p_tgt->type ); + inc_b = ( p_tgt->inc_d2_du * dataSize_b ); + } + } + return inc_b; +} + /****************************************************************************/ /** **/ /* EOF */ /** **/ -/****************************************************************************/ \ No newline at end of file +/****************************************************************************/ diff --git a/sw/device/lib/drivers/dma/dma.h b/sw/device/lib/drivers/dma/dma.h index 85ee64751..2534c4945 100644 --- a/sw/device/lib/drivers/dma/dma.h +++ b/sw/device/lib/drivers/dma/dma.h @@ -68,6 +68,8 @@ #define DMA_SPI_FLASH_TX_SLOT 0x08 #define DMA_I2S_RX_SLOT 0x10 +#define DMA_INT_TR_START 0x0 + #ifdef __cplusplus extern "C" { #endif @@ -118,11 +120,11 @@ typedef enum */ typedef enum { - DMA_DATA_TYPE_WORD = DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_32BIT_WORD,/*!< + DMA_DATA_TYPE_WORD = DMA_SRC_DATA_TYPE_DATA_TYPE_VALUE_DMA_32BIT_WORD,/*!< Word = 4 bytes = 32 bits */ - DMA_DATA_TYPE_HALF_WORD = DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_16BIT_WORD,/*!< + DMA_DATA_TYPE_HALF_WORD = DMA_SRC_DATA_TYPE_DATA_TYPE_VALUE_DMA_16BIT_WORD,/*!< Half Word = 2 bytes = 16 bits */ - DMA_DATA_TYPE_BYTE = DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_8BIT_WORD,/*!< + DMA_DATA_TYPE_BYTE = DMA_SRC_DATA_TYPE_DATA_TYPE_VALUE_DMA_8BIT_WORD,/*!< Byte = 1 byte = 8 bits */ /* DMA_DATA_TYPE_BYTE_alt = DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_8BIT_WORD_2, * BYTE and BYTE_alt are interchangeable in hw, but we advice against @@ -134,6 +136,19 @@ typedef enum DMA_DATA_TYPE__undef, /*!< DMA will not be used. */ } dma_data_type_t; +typedef enum +{ + DMA_DIM_CONF_1D = 0, /* The DMA will copy data along D1 only. */ + DMA_DIM_CONF_2D = 1, /* The DMA will copy data along D1 and D2. */ + DMA_DIM_CONF__size, /* Not used, only for sanity checks. */ + /* + Padding is enabled with the 2D mode. This means that to pad a 1D + data structure, i.e. an array, the DMA would have to be set in 2D + mode with the D2 dimension set to 1. + This case is handled by the DMA HAL, so it's transparent to the user. + */ +} dma_dim_t; + /** * It is possible to choose the level of safety with which the DMA operation * should be configured. @@ -279,11 +294,15 @@ typedef struct if the target is a peripheral. */ uint8_t* ptr; /*!< Pointer to the start address from/to where data will be copied/pasted. */ - uint16_t inc_du; /*!< How much the pointer will increase + uint8_t inc_du; /*!< How much the pointer will increase every time a read/write operation is done. It is a multiple of the data units. Can be left blank if the target is a peripheral. */ - uint32_t size_du; /*!< The size (in data units) of the data to + uint32_t inc_d2_du; /*!< How much the D2 pointer will increase + every time the DMA finishes to read a #D1 of data units. */ + uint16_t size_du; /*!< The size (in data units) of the data to be copied. Can be left blank if the target will only be used as destination.*/ + uint16_t size_d2_du; /*!< The size (in data units) of the data + to be copied along D2.*/ dma_data_type_t type; /*!< The type of data to be transferred. Can be left blank if the target will only be used as destination. */ dma_trigger_slot_mask_t trig; /*!< If the target is a peripheral, a @@ -296,6 +315,7 @@ typedef struct * It also includes control parameters to override the targets' specific ones * if needed. */ + typedef struct { dma_target_t* src; /*!< Target from where the data will be @@ -306,11 +326,23 @@ typedef struct copied. - only valid in address mode */ uint16_t inc_b; /*!< A common increment in case both targets need to use one same increment. */ - uint32_t size_b; /*!< The size of the transfer, in bytes (in + uint32_t size_b; /*!< The size of the transfer along D1, in bytes (in contrast, the size stored in the targets is in data units). */ - dma_data_type_t type; /*!< The data type to use. One is chosen among + uint32_t size_d2_b; /*!< The size of the transfer along D2, in bytes (in + contrast, the size stored in the targets is in data units). */ + dma_dim_t dim; /*!< Sets the dimensionality of the + DMA, either 1D or 2D. */ + uint8_t pad_top_du; /*!< Padding at the top of the 2D transfer. */ + uint8_t pad_bottom_du; /*!< Padding at the bottom of the 2D transfer. */ + uint8_t pad_left_du; /*!< Padding at the left of the 2D transfer. */ + uint8_t pad_right_du; /*!< Padding at the right of the 2D transfer. */ + dma_data_type_t src_type; /*!< Source data type to use. One is chosen among + the targets. */ + dma_data_type_t dst_type; /*!< Destination data type to use. One is chosen among the targets. */ + uint8_t sign_ext; /*!< Whether to sign extend the data. */ dma_trans_mode_t mode; /*!< The copy mode to use. */ + uint8_t dim_inv; /*!< If the D1 and D2 dimensions are inverted, i.e. perform transposition. */ uint32_t win_du; /*!< The amount of data units every which the WINDOW_DONE flag is raised and its corresponding interrupt triggered. It can be set to 0 to disable this functionality. */ @@ -393,7 +425,7 @@ dma_config_flags_t dma_load_transaction( dma_trans_t* p_trans ); * loaded is not the desired one). * @retval DMA_CONFIG_OK == 0 otherwise. */ -dma_config_flags_t dma_launch( dma_trans_t* p_trans ); +dma_config_flags_t dma_launch( dma_trans_t* p_trans); /** * @brief Read from the done register of the DMA. Additionally decreases the @@ -419,7 +451,7 @@ uint32_t dma_get_window_count(void); /** * @brief Prevent the DMA from relaunching the transaction automatically after * finishing the current one. It does not affect the currently running - * transaction. It has no effect if the DMA is operating in SINGULAR + * transaction. It has no effect if the DMA is operating in SINGLE * transaction mode. */ void dma_stop_circular(void); diff --git a/sw/device/lib/drivers/dma/dma_regs.h b/sw/device/lib/drivers/dma/dma_regs.h index ce6169c16..a598323d3 100644 --- a/sw/device/lib/drivers/dma/dma_regs.h +++ b/sw/device/lib/drivers/dma/dma_regs.h @@ -25,27 +25,56 @@ extern "C" { // Addess data pointer (word aligned) #define DMA_ADDR_PTR_REG_OFFSET 0x8 -// Number of bytes to copy - Once a value is written, the copy starts -#define DMA_SIZE_REG_OFFSET 0xc +// Number of bytes to copy from, defined with respect to the first dimension +// - Once a value is written, the copy starts +#define DMA_SIZE_D1_REG_OFFSET 0xc +#define DMA_SIZE_D1_SIZE_MASK 0xffff +#define DMA_SIZE_D1_SIZE_OFFSET 0 +#define DMA_SIZE_D1_SIZE_FIELD \ + ((bitfield_field32_t) { .mask = DMA_SIZE_D1_SIZE_MASK, .index = DMA_SIZE_D1_SIZE_OFFSET }) + +// Number of bytes to copy from, defined with respect to the second dimension +#define DMA_SIZE_D2_REG_OFFSET 0x10 +#define DMA_SIZE_D2_SIZE_MASK 0xffff +#define DMA_SIZE_D2_SIZE_OFFSET 0 +#define DMA_SIZE_D2_SIZE_FIELD \ + ((bitfield_field32_t) { .mask = DMA_SIZE_D2_SIZE_MASK, .index = DMA_SIZE_D2_SIZE_OFFSET }) // Status bits are set to one if a given event occurred -#define DMA_STATUS_REG_OFFSET 0x10 +#define DMA_STATUS_REG_OFFSET 0x14 #define DMA_STATUS_READY_BIT 0 #define DMA_STATUS_WINDOW_DONE_BIT 1 -// Increment number of src/dst pointer every time a word is copied -#define DMA_PTR_INC_REG_OFFSET 0x14 -#define DMA_PTR_INC_SRC_PTR_INC_MASK 0xff -#define DMA_PTR_INC_SRC_PTR_INC_OFFSET 0 -#define DMA_PTR_INC_SRC_PTR_INC_FIELD \ - ((bitfield_field32_t) { .mask = DMA_PTR_INC_SRC_PTR_INC_MASK, .index = DMA_PTR_INC_SRC_PTR_INC_OFFSET }) -#define DMA_PTR_INC_DST_PTR_INC_MASK 0xff -#define DMA_PTR_INC_DST_PTR_INC_OFFSET 8 -#define DMA_PTR_INC_DST_PTR_INC_FIELD \ - ((bitfield_field32_t) { .mask = DMA_PTR_INC_DST_PTR_INC_MASK, .index = DMA_PTR_INC_DST_PTR_INC_OFFSET }) +// Increment the D1 source pointer every time a word is copied +#define DMA_SRC_PTR_INC_D1_REG_OFFSET 0x18 +#define DMA_SRC_PTR_INC_D1_INC_MASK 0x3f +#define DMA_SRC_PTR_INC_D1_INC_OFFSET 0 +#define DMA_SRC_PTR_INC_D1_INC_FIELD \ + ((bitfield_field32_t) { .mask = DMA_SRC_PTR_INC_D1_INC_MASK, .index = DMA_SRC_PTR_INC_D1_INC_OFFSET }) + +// Increment the D2 source pointer every time a word is copied +#define DMA_SRC_PTR_INC_D2_REG_OFFSET 0x1c +#define DMA_SRC_PTR_INC_D2_INC_MASK 0x7fffff +#define DMA_SRC_PTR_INC_D2_INC_OFFSET 0 +#define DMA_SRC_PTR_INC_D2_INC_FIELD \ + ((bitfield_field32_t) { .mask = DMA_SRC_PTR_INC_D2_INC_MASK, .index = DMA_SRC_PTR_INC_D2_INC_OFFSET }) + +// Increment the D1 destination pointer every time a word is copied +#define DMA_DST_PTR_INC_D1_REG_OFFSET 0x20 +#define DMA_DST_PTR_INC_D1_INC_MASK 0x3f +#define DMA_DST_PTR_INC_D1_INC_OFFSET 0 +#define DMA_DST_PTR_INC_D1_INC_FIELD \ + ((bitfield_field32_t) { .mask = DMA_DST_PTR_INC_D1_INC_MASK, .index = DMA_DST_PTR_INC_D1_INC_OFFSET }) + +// Increment the D2 destination pointer every time a word is copied +#define DMA_DST_PTR_INC_D2_REG_OFFSET 0x24 +#define DMA_DST_PTR_INC_D2_INC_MASK 0x7fffff +#define DMA_DST_PTR_INC_D2_INC_OFFSET 0 +#define DMA_DST_PTR_INC_D2_INC_FIELD \ + ((bitfield_field32_t) { .mask = DMA_DST_PTR_INC_D2_INC_MASK, .index = DMA_DST_PTR_INC_D2_INC_OFFSET }) // The DMA will wait for the signal -#define DMA_SLOT_REG_OFFSET 0x18 +#define DMA_SLOT_REG_OFFSET 0x28 #define DMA_SLOT_RX_TRIGGER_SLOT_MASK 0xffff #define DMA_SLOT_RX_TRIGGER_SLOT_OFFSET 0 #define DMA_SLOT_RX_TRIGGER_SLOT_FIELD \ @@ -55,19 +84,35 @@ extern "C" { #define DMA_SLOT_TX_TRIGGER_SLOT_FIELD \ ((bitfield_field32_t) { .mask = DMA_SLOT_TX_TRIGGER_SLOT_MASK, .index = DMA_SLOT_TX_TRIGGER_SLOT_OFFSET }) -// Width/type of the data to transfer -#define DMA_DATA_TYPE_REG_OFFSET 0x1c -#define DMA_DATA_TYPE_DATA_TYPE_MASK 0x3 -#define DMA_DATA_TYPE_DATA_TYPE_OFFSET 0 -#define DMA_DATA_TYPE_DATA_TYPE_FIELD \ - ((bitfield_field32_t) { .mask = DMA_DATA_TYPE_DATA_TYPE_MASK, .index = DMA_DATA_TYPE_DATA_TYPE_OFFSET }) -#define DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_32BIT_WORD 0x0 -#define DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_16BIT_WORD 0x1 -#define DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_8BIT_WORD 0x2 -#define DMA_DATA_TYPE_DATA_TYPE_VALUE_DMA_8BIT_WORD_2 0x3 +// Width/type of the source data to transfer +#define DMA_SRC_DATA_TYPE_REG_OFFSET 0x2c +#define DMA_SRC_DATA_TYPE_DATA_TYPE_MASK 0x3 +#define DMA_SRC_DATA_TYPE_DATA_TYPE_OFFSET 0 +#define DMA_SRC_DATA_TYPE_DATA_TYPE_FIELD \ + ((bitfield_field32_t) { .mask = DMA_SRC_DATA_TYPE_DATA_TYPE_MASK, .index = DMA_SRC_DATA_TYPE_DATA_TYPE_OFFSET }) +#define DMA_SRC_DATA_TYPE_DATA_TYPE_VALUE_DMA_32BIT_WORD 0x0 +#define DMA_SRC_DATA_TYPE_DATA_TYPE_VALUE_DMA_16BIT_WORD 0x1 +#define DMA_SRC_DATA_TYPE_DATA_TYPE_VALUE_DMA_8BIT_WORD 0x2 +#define DMA_SRC_DATA_TYPE_DATA_TYPE_VALUE_DMA_8BIT_WORD_2 0x3 + +// Width/type of the destination data to transfer +#define DMA_DST_DATA_TYPE_REG_OFFSET 0x30 +#define DMA_DST_DATA_TYPE_DATA_TYPE_MASK 0x3 +#define DMA_DST_DATA_TYPE_DATA_TYPE_OFFSET 0 +#define DMA_DST_DATA_TYPE_DATA_TYPE_FIELD \ + ((bitfield_field32_t) { .mask = DMA_DST_DATA_TYPE_DATA_TYPE_MASK, .index = DMA_DST_DATA_TYPE_DATA_TYPE_OFFSET }) +#define DMA_DST_DATA_TYPE_DATA_TYPE_VALUE_DMA_32BIT_WORD 0x0 +#define DMA_DST_DATA_TYPE_DATA_TYPE_VALUE_DMA_16BIT_WORD 0x1 +#define DMA_DST_DATA_TYPE_DATA_TYPE_VALUE_DMA_8BIT_WORD 0x2 +#define DMA_DST_DATA_TYPE_DATA_TYPE_VALUE_DMA_8BIT_WORD_2 0x3 + +// Is the data to be sign extended? (Checked only if the dst data type is +// wider than the src data type) +#define DMA_SIGN_EXT_REG_OFFSET 0x34 +#define DMA_SIGN_EXT_SIGNED_BIT 0 // Set the operational mode of the DMA -#define DMA_MODE_REG_OFFSET 0x20 +#define DMA_MODE_REG_OFFSET 0x38 #define DMA_MODE_MODE_MASK 0x3 #define DMA_MODE_MODE_OFFSET 0 #define DMA_MODE_MODE_FIELD \ @@ -76,14 +121,58 @@ extern "C" { #define DMA_MODE_MODE_VALUE_CIRCULAR_MODE 0x1 #define DMA_MODE_MODE_VALUE_ADDRESS_MODE 0x2 +// Set the dimensionality of the DMA +#define DMA_DIM_CONFIG_REG_OFFSET 0x3c +#define DMA_DIM_CONFIG_DMA_DIM_BIT 0 + +// DMA dimensionality inversion selector +#define DMA_DIM_INV_REG_OFFSET 0x40 +#define DMA_DIM_INV_SEL_BIT 0 + +// Set the top padding +#define DMA_PAD_TOP_REG_OFFSET 0x44 +#define DMA_PAD_TOP_PAD_MASK 0x3f +#define DMA_PAD_TOP_PAD_OFFSET 0 +#define DMA_PAD_TOP_PAD_FIELD \ + ((bitfield_field32_t) { .mask = DMA_PAD_TOP_PAD_MASK, .index = DMA_PAD_TOP_PAD_OFFSET }) + +// Set the bottom padding +#define DMA_PAD_BOTTOM_REG_OFFSET 0x48 +#define DMA_PAD_BOTTOM_PAD_MASK 0x3f +#define DMA_PAD_BOTTOM_PAD_OFFSET 0 +#define DMA_PAD_BOTTOM_PAD_FIELD \ + ((bitfield_field32_t) { .mask = DMA_PAD_BOTTOM_PAD_MASK, .index = DMA_PAD_BOTTOM_PAD_OFFSET }) + +// Set the right padding +#define DMA_PAD_RIGHT_REG_OFFSET 0x4c +#define DMA_PAD_RIGHT_PAD_MASK 0x3f +#define DMA_PAD_RIGHT_PAD_OFFSET 0 +#define DMA_PAD_RIGHT_PAD_FIELD \ + ((bitfield_field32_t) { .mask = DMA_PAD_RIGHT_PAD_MASK, .index = DMA_PAD_RIGHT_PAD_OFFSET }) + +// Set the left padding +#define DMA_PAD_LEFT_REG_OFFSET 0x50 +#define DMA_PAD_LEFT_PAD_MASK 0x3f +#define DMA_PAD_LEFT_PAD_OFFSET 0 +#define DMA_PAD_LEFT_PAD_FIELD \ + ((bitfield_field32_t) { .mask = DMA_PAD_LEFT_PAD_MASK, .index = DMA_PAD_LEFT_PAD_OFFSET }) + // Will trigger a every "WINDOW_SIZE" writes -#define DMA_WINDOW_SIZE_REG_OFFSET 0x24 +#define DMA_WINDOW_SIZE_REG_OFFSET 0x54 +#define DMA_WINDOW_SIZE_WINDOW_SIZE_MASK 0x1fff +#define DMA_WINDOW_SIZE_WINDOW_SIZE_OFFSET 0 +#define DMA_WINDOW_SIZE_WINDOW_SIZE_FIELD \ + ((bitfield_field32_t) { .mask = DMA_WINDOW_SIZE_WINDOW_SIZE_MASK, .index = DMA_WINDOW_SIZE_WINDOW_SIZE_OFFSET }) // Number of times the end of the window was reached since the beginning. -#define DMA_WINDOW_COUNT_REG_OFFSET 0x28 +#define DMA_WINDOW_COUNT_REG_OFFSET 0x58 +#define DMA_WINDOW_COUNT_WINDOW_COUNT_MASK 0xff +#define DMA_WINDOW_COUNT_WINDOW_COUNT_OFFSET 0 +#define DMA_WINDOW_COUNT_WINDOW_COUNT_FIELD \ + ((bitfield_field32_t) { .mask = DMA_WINDOW_COUNT_WINDOW_COUNT_MASK, .index = DMA_WINDOW_COUNT_WINDOW_COUNT_OFFSET }) // Interrupt Enable Register -#define DMA_INTERRUPT_EN_REG_OFFSET 0x2c +#define DMA_INTERRUPT_EN_REG_OFFSET 0x5c #define DMA_INTERRUPT_EN_TRANSACTION_DONE_BIT 0 #define DMA_INTERRUPT_EN_WINDOW_DONE_BIT 1 diff --git a/sw/device/lib/drivers/gpio/gpio.c b/sw/device/lib/drivers/gpio/gpio.c index 3e733ecd8..f5cb004f2 100644 --- a/sw/device/lib/drivers/gpio/gpio.c +++ b/sw/device/lib/drivers/gpio/gpio.c @@ -150,7 +150,7 @@ gpio_result_t gpio_assign_irq_handler( uint32_t intr_id, { if( intr_id >= GPIO_INTR_START && intr_id <= GPIO_INTR_END ) { - gpio_handlers[ intr_id - GPIO_INTR_START ] = handler; + gpio_handlers[ intr_id - GPIO_INTR_START ] = (void (*)(void))handler; return GpioOk; } return GpioError; @@ -160,7 +160,7 @@ void gpio_reset_handlers_list( ) { for( uint8_t i = 0; i < GPIO_INTR_QTY; i++ ) { - gpio_handlers[ i ] = &gpio_handler_irq_dummy; + gpio_handlers[ i ] = (void (*)(void))&gpio_handler_irq_dummy; } } diff --git a/sw/device/lib/drivers/power_manager/data/power_manager.h.tpl b/sw/device/lib/drivers/power_manager/data/power_manager.h.tpl index 7f0d939ea..52215d273 100644 --- a/sw/device/lib/drivers/power_manager/data/power_manager.h.tpl +++ b/sw/device/lib/drivers/power_manager/data/power_manager.h.tpl @@ -105,16 +105,16 @@ typedef struct power_manager_ram_map_t { uint32_t monitor_power_gate; } power_manager_ram_map_t; -static power_manager_ram_map_t power_manager_ram_map[${ram_numbanks}] = { -% for bank in range(ram_numbanks): +static power_manager_ram_map_t power_manager_ram_map[${xheep.ram_numbanks()}] = { +% for bank in xheep.iter_ram_banks(): (power_manager_ram_map_t) { - .clk_gate = POWER_MANAGER_RAM_${bank}_CLK_GATE_REG_OFFSET, - .power_gate_ack = POWER_MANAGER_POWER_GATE_RAM_BLOCK_${bank}_ACK_REG_OFFSET, - .switch_off = POWER_MANAGER_RAM_${bank}_SWITCH_REG_OFFSET, - .wait_ack_switch = POWER_MANAGER_RAM_${bank}_WAIT_ACK_SWITCH_ON_REG_OFFSET, - .iso = POWER_MANAGER_RAM_${bank}_ISO_REG_OFFSET, - .retentive = POWER_MANAGER_RAM_${bank}_RETENTIVE_REG_OFFSET, - .monitor_power_gate = POWER_MANAGER_MONITOR_POWER_GATE_RAM_BLOCK_${bank}_REG_OFFSET + .clk_gate = POWER_MANAGER_RAM_${bank.name()}_CLK_GATE_REG_OFFSET, + .power_gate_ack = POWER_MANAGER_POWER_GATE_RAM_BLOCK_${bank.name()}_ACK_REG_OFFSET, + .switch_off = POWER_MANAGER_RAM_${bank.name()}_SWITCH_REG_OFFSET, + .wait_ack_switch = POWER_MANAGER_RAM_${bank.name()}_WAIT_ACK_SWITCH_ON_REG_OFFSET, + .iso = POWER_MANAGER_RAM_${bank.name()}_ISO_REG_OFFSET, + .retentive = POWER_MANAGER_RAM_${bank.name()}_RETENTIVE_REG_OFFSET, + .monitor_power_gate = POWER_MANAGER_MONITOR_POWER_GATE_RAM_BLOCK_${bank.name()}_REG_OFFSET }, % endfor }; diff --git a/sw/device/lib/drivers/rv_plic/rv_plic.c b/sw/device/lib/drivers/rv_plic/rv_plic.c index 7b6ba5f44..ffbb822df 100644 --- a/sw/device/lib/drivers/rv_plic/rv_plic.c +++ b/sw/device/lib/drivers/rv_plic/rv_plic.c @@ -371,7 +371,7 @@ plic_result_t plic_assign_external_irq_handler( uint32_t id, { if( id >= EXT_IRQ_START && id <= QTY_INTR ) { - handlers[ id ] = (handler_funct_t*) handler; + handlers[ id ] = (handler_funct_t) handler; return kPlicOk; } return kPlicBadArg; diff --git a/sw/device/lib/runtime/core_v_mini_mcu.c b/sw/device/lib/runtime/core_v_mini_mcu.c index 2d0af1e38..326fd963e 100644 --- a/sw/device/lib/runtime/core_v_mini_mcu.c +++ b/sw/device/lib/runtime/core_v_mini_mcu.c @@ -5,8 +5,10 @@ #include "core_v_mini_mcu.h" #include -// functions +//heep functions prototypes uint32_t * heep_get_flash_address_offset(uint32_t* data_address_lma); +void heep_init_lfsr(); +uint32_t heep_rand_lfsr(); // this translates the logical address of the FLASH relative to 0 instead of FLASH_MEM_START_ADDRESS, as used by the BSP uint32_t * heep_get_flash_address_offset(uint32_t* data_address_lma){ @@ -20,3 +22,16 @@ uint32_t * heep_get_flash_address_offset(uint32_t* data_address_lma){ #endif } + +//get random values +uint32_t lfsr; + +void heep_init_lfsr() { + lfsr = (uint32_t)0xAABBCCDD; +} + +uint32_t heep_rand_lfsr() { + uint32_t bit = (lfsr ^ (lfsr >> 10) ^ (lfsr >> 11) ^ (lfsr >> 12)) & 1; + lfsr = (lfsr >> 1) | (bit << 31); + return lfsr; +} diff --git a/sw/device/lib/runtime/core_v_mini_mcu.h.tpl b/sw/device/lib/runtime/core_v_mini_mcu.h.tpl index 2bfec7d90..3784711ca 100644 --- a/sw/device/lib/runtime/core_v_mini_mcu.h.tpl +++ b/sw/device/lib/runtime/core_v_mini_mcu.h.tpl @@ -9,8 +9,8 @@ extern "C" { #endif // __cplusplus -#define MEMORY_BANKS ${ram_numbanks} -% if ram_numbanks_il > 0: +#define MEMORY_BANKS ${xheep.ram_numbanks()} +% if xheep.has_il_ram(): #define HAS_MEMORY_BANKS_IL % endif diff --git a/sw/device/lib/runtime/handler.c b/sw/device/lib/runtime/handler.c index 8ee1a0d81..c9cd02d35 100644 --- a/sw/device/lib/runtime/handler.c +++ b/sw/device/lib/runtime/handler.c @@ -3,9 +3,9 @@ // SPDX-License-Identifier: Apache-2.0 #include "handler.h" - #include "csr.h" #include "stdasm.h" +#include ' /** * Return value of mtval diff --git a/sw/device/lib/runtime/syscalls.c b/sw/device/lib/runtime/syscalls.c index 05dd28f90..a9a7fcc20 100644 --- a/sw/device/lib/runtime/syscalls.c +++ b/sw/device/lib/runtime/syscalls.c @@ -18,33 +18,41 @@ */ #include +#include #include #include +#include #include #include "uart.h" #include "soc_ctrl.h" #include "core_v_mini_mcu.h" #include "error.h" #include "x-heep.h" +#include #undef errno extern int errno; #define STDOUT_FILENO 1 -/* It turns out that older newlib versions use different symbol names which goes - * against newlib recommendations. Anyway this is fixed in later version. - */ -#if __NEWLIB__ <= 2 && __NEWLIB_MINOR__ <= 5 -# define _sbrk sbrk -# define _write write -# define _close close -# define _lseek lseek -# define _read read -# define _fstat fstat -# define _isatty isatty +#ifndef _LIBC +/* Provide prototypes for most of the _ names that are + provided in newlib for some compilers. */ +int _close (int __fildes); +pid_t _fork (void); +pid_t _getpid (void); +int _isatty (int __fildes); +int _link (const char *__path1, const char *__path2); +_off_t _lseek (int __fildes, _off_t __offset, int __whence); +ssize_t _read (int __fd, void *__buf, size_t __nbyte); +void * _sbrk (ptrdiff_t __incr); +int _unlink (const char *__path); +ssize_t _write (int __fd, const void *__buf, size_t __nbyte); +int _execve (const char *__path, char * const __argv[], char * const __envp[]); +int _kill (pid_t pid, int sig); #endif + void unimplemented_syscall() { const char *p = "Unimplemented system call called!\n"; @@ -108,7 +116,7 @@ int _faccessat(int dirfd, const char *file, int mode, int flags) return -1; } -int _fork(void) +pid_t _fork(void) { errno = EAGAIN; return -1; @@ -140,7 +148,7 @@ char *_getcwd(char *buf, size_t size) return NULL; } -int _getpid() +pid_t _getpid() { return 1; } @@ -156,7 +164,7 @@ int _isatty(int file) return (file == STDOUT_FILENO); } -int _kill(int pid, int sig) +int _kill(pid_t pid, int sig) { errno = EINVAL; return -1; diff --git a/sw/device/target/systemc/x-heep.h b/sw/device/target/systemc/x-heep.h new file mode 100644 index 000000000..1c1390cb1 --- /dev/null +++ b/sw/device/target/systemc/x-heep.h @@ -0,0 +1,30 @@ +// Copyright EPFL contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef X_HEEP +#define X_HEEP + +#pragma message ( "the x-heep.h for SIMULATION in SYSTEMC is used" ) + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + + +#define REFERENCE_CLOCK_Hz 100*1000*1000 +#define UART_BAUDRATE 256000 +#define TARGET_SYSTEMC 1 + +/** + * As the hw is configurable, we can have setups with different number of + * Gpio pins + */ +#define MAX_PIN 32 + + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // X_HEEP diff --git a/sw/device/target/zcu104/x-heep.h b/sw/device/target/zcu104/x-heep.h new file mode 100644 index 000000000..af1b4cd5a --- /dev/null +++ b/sw/device/target/zcu104/x-heep.h @@ -0,0 +1,28 @@ +// Copyright EPFL contributors. +// Licensed under the Apache License, Version 2.0, see LICENSE for details. +// SPDX-License-Identifier: Apache-2.0 + +#ifndef X_HEEP +#define X_HEEP + +#pragma message ( "the x-heep.h for ZCU104 is used" ) + +#ifdef __cplusplus +extern "C" { +#endif // __cplusplus + +#define REFERENCE_CLOCK_Hz 15*1000*1000 +#define UART_BAUDRATE 9600 +#define TARGET_ZCU104 1 + +/** + * As the hw is configurable, we can have setups with different number of + * Gpio pins + */ +#define MAX_PIN 32 + +#ifdef __cplusplus +} // extern "C" +#endif // __cplusplus + +#endif // X_HEEP diff --git a/sw/linker/link.ld.tpl b/sw/linker/link.ld.tpl index 7403248dc..b499b5763 100644 --- a/sw/linker/link.ld.tpl +++ b/sw/linker/link.ld.tpl @@ -20,11 +20,9 @@ MEMORY /* Our testbench is a bit weird in that we initialize the RAM (thus allowing initialized sections to be placed there). Infact we dump all sections to ram. */ - ram0 (rwxai) : ORIGIN = 0x${linker_onchip_code_start_address}, LENGTH = 0x${linker_onchip_code_size_address} - ram1 (rwxai) : ORIGIN = 0x${linker_onchip_data_start_address}, LENGTH = 0x${linker_onchip_data_size_address} -% if ram_numbanks_cont > 1 and ram_numbanks_il > 0: - ram_il (rwxai) : ORIGIN = 0x${linker_onchip_il_start_address}, LENGTH = 0x${linker_onchip_il_size_address} -% endif + % for i, section in enumerate(xheep.iter_linker_sections()): + ram${i} (rwxai) : ORIGIN = ${f"{section.start:#08x}"}, LENGTH = ${f"{section.size:#08x}"} +% endfor } /* @@ -294,14 +292,16 @@ SECTIONS PROVIDE(__freertos_irq_stack_top = .); } >ram1 -% if ram_numbanks_cont > 1 and ram_numbanks_il > 0: - .data_interleaved : +% for i, section in enumerate(xheep.iter_linker_sections()): +% if not section.name in ["code", "data"]: + .${section.name} : { . = ALIGN(4); - *(.xheep_data_interleaved) + *(.xheep_${section.name}) . = ALIGN(4); - } >ram_il + } >ram${i} % endif +% endfor /* Stabs debugging sections. */ .stab 0 : { *(.stab) } diff --git a/sw/linker/link_flash_exec.ld.tpl b/sw/linker/link_flash_exec.ld.tpl index 8bd9e64e3..dca5e456a 100644 --- a/sw/linker/link_flash_exec.ld.tpl +++ b/sw/linker/link_flash_exec.ld.tpl @@ -8,7 +8,7 @@ ENTRY(_start) MEMORY { FLASH (rx) : ORIGIN = 0x${flash_mem_start_address}, LENGTH = 0x${flash_mem_size_address} - RAM (xrw) : ORIGIN = 0x${'{:08X}'.format(int(ram_start_address,16) + 4)}, LENGTH = 0x${'{:08X}'.format(int(ram_size_address,16) - 4)} + RAM (xrw) : ORIGIN = 0x${f'{xheep.ram_start_address()+4:08X}'}, LENGTH = 0x${f'{xheep.ram_size_address()-4:08X}'} } SECTIONS { diff --git a/sw/linker/link_flash_load.ld.tpl b/sw/linker/link_flash_load.ld.tpl index 541b22338..f704f364f 100644 --- a/sw/linker/link_flash_load.ld.tpl +++ b/sw/linker/link_flash_load.ld.tpl @@ -7,15 +7,13 @@ ENTRY(_start) MEMORY { - ram0 (rwxai) : ORIGIN = 0x${linker_onchip_code_start_address}, LENGTH = 0x${linker_onchip_code_size_address} - ram1 (rwxai) : ORIGIN = 0x${linker_onchip_data_start_address}, LENGTH = 0x${linker_onchip_data_size_address} - FLASH0 (rx) : ORIGIN = 0x${linker_flash_code_start_address}, LENGTH = 0x${linker_onchip_code_size_address} - FLASH1 (rx) : ORIGIN = 0x${linker_flash_data_start_address}, LENGTH = 0x${linker_onchip_data_size_address} - % if ram_numbanks_cont > 1 and ram_numbanks_il > 0: - ram_il (rwxai) : ORIGIN = 0x${linker_onchip_il_start_address}, LENGTH = 0x${linker_onchip_il_size_address} - FLASH_il (rx) : ORIGIN = 0x${linker_flash_il_start_address}, LENGTH = 0x${linker_onchip_il_size_address} - % endif - FLASH_left (rx) : ORIGIN = 0x${linker_flash_left_start_address}, LENGTH = 0x${linker_flash_left_size_address} +<%flash_end = 0%> +% for i, section in enumerate(xheep.iter_linker_sections()): + ram${i} (rwxai) : ORIGIN = ${f"{section.start:#08x}"}, LENGTH = ${f"{section.size:#08x}"} + FLASH${i} (rx) : ORIGIN = ${f"{section.start + int(flash_mem_start_address,16):#08x}"}, LENGTH = ${f"{section.size:#08x}"} +<%flash_end = section.end%> +% endfor + FLASH_left (rx) : ORIGIN = ${f"{flash_end + int(flash_mem_start_address,16):#08x}"}, LENGTH = ${f"{int(flash_mem_size_address,16) - flash_end:#08x}"} } @@ -62,7 +60,7 @@ SECTIONS { KEEP (*(.text.spi_wait_for_rx_watermark*)) KEEP (*(.text.spi_read_word*)) KEEP (*(.text.memcpy)) - KEEP (*(.text.w25q128jw_read_standard)) /* as this function is used in the crt0, link it in the top, should be before 1024 Bytes loaded by the bootrom */ + KEEP (*(.text.w25q128jw_read_standard*)) /* as this function is used in the crt0, link it in the top, should be before 1024 Bytes loaded by the bootrom */ *(.xheep_init_data_crt0) /* this global variables are used in the crt0 */ } >ram0 AT >FLASH0 @@ -158,21 +156,23 @@ SECTIONS { PROVIDE(__freertos_irq_stack_top = .); } >ram1 - % if ram_numbanks_cont > 1 and ram_numbanks_il > 0: - .data_interleaved : ALIGN_WITH_INPUT + % for i, section in enumerate(xheep.iter_linker_sections()): + % if not section.name in ["code", "data"]: + .${section.name} : ALIGN_WITH_INPUT { - PROVIDE(__data_interleaved_start = .); - _lma_data_interleaved_start = LOADADDR(.data_interleaved); + PROVIDE(__${section.name}_start = .); + _lma_${section.name}_start = LOADADDR(.${section.name}); . = ALIGN(4); - *(.xheep_data_interleaved) + *(.xheep_${section.name}) . = ALIGN(4); - } >ram_il AT >FLASH_il + } >ram${i} AT >FLASH${i} . = ALIGN(4); - _eddata_interleaved = .; - _lma_data_interleaved_end = _lma_data_interleaved_start + SIZEOF(.data_interleaved); + _ed${section.name} = .; + _lma_${section.name}_end = _lma_${section.name}_start + SIZEOF(.${section.name}); % endif + % endfor .data_flash_only : ALIGN(256) { diff --git a/tb/XHEEP_CmdLineOptions.cpp b/tb/XHEEP_CmdLineOptions.cpp new file mode 100644 index 000000000..947d31ef3 --- /dev/null +++ b/tb/XHEEP_CmdLineOptions.cpp @@ -0,0 +1,100 @@ +#include "XHEEP_CmdLineOptions.hh" +#include +#include + +XHEEP_CmdLineOptions::XHEEP_CmdLineOptions(int argc, char* argv[]) // define default constructor +{ + this->argc = argc; + this->argv = argv; +} + +std::string XHEEP_CmdLineOptions::getCmdOption(int argc, char* argv[], const std::string& option) +{ + std::string cmd; + for( int i = 0; i < argc; ++i) + { + std::string arg = argv[i]; + size_t arg_size = arg.length(); + size_t option_size = option.length(); + + if(arg.find(option)==0){ + cmd = arg.substr(option_size,arg_size-option_size); + } + } + return cmd; +} + +bool XHEEP_CmdLineOptions::get_use_openocd() +{ + + std::string arg_openocd = this->getCmdOption(this->argc, this->argv, "+openOCD=");; + + bool use_openocd = false; + + if(arg_openocd.empty()){ + std::cout<<"[TESTBENCH]: No OpenOCD is used"<getCmdOption(this->argc, this->argv, "+firmware="); + + if(firmware.empty()){ + std::cout<<"[TESTBENCH]: No firmware specified"<getCmdOption(this->argc, this->argv, "+max_sim_time="); + unsigned int max_sim_time; + + max_sim_time = 0; + if(arg_max_sim_time.empty()){ + std::cout<<"[TESTBENCH]: No Max time specified"<getCmdOption(this->argc, this->argv, "+boot_sel="); + unsigned int boot_sel = 0; + + if(arg_boot_sel.empty()){ + std::cout<<"[TESTBENCH]: No Boot Option specified, using jtag (boot_sel=0)"< + +class XHEEP_CmdLineOptions // declare Calculator class +{ + + public: // public members + XHEEP_CmdLineOptions(int argc, char* argv[]); // default constructor + + std::string getCmdOption(int argc, char* argv[], const std::string& option); // get options from cmd lines + bool get_use_openocd(); + std::string get_firmware(); + unsigned int get_max_sim_time(bool& run_all); + unsigned int get_boot_sel(); + int argc; + char** argv; + +}; + + + +#endif diff --git a/tb/systemc_tb/Cache.h b/tb/systemc_tb/Cache.h new file mode 100644 index 000000000..d9c94b5d0 --- /dev/null +++ b/tb/systemc_tb/Cache.h @@ -0,0 +1,221 @@ +#ifndef CACHE_H +#define CACHE_H + +#include +#include +#include +#include + + +// Target module representing a simple direct mapped cache +class CacheMemory +{ + +public: + uint32_t cache_size_byte = 4*1024; + uint32_t number_of_blocks = 256; + + uint32_t nbits_blocks = 0; + uint32_t nbits_tags = 0; + uint32_t nbits_index = 0; + uint32_t block_size_byte = 0; + + enum { ARCHITECTURE_bits = 32 }; + + std::ofstream cacheFile; + + typedef struct cache_line { + uint32_t tag; + bool valid; + uint8_t* data; + } cache_line_t; + + cache_line_t* cache_array; + + + CacheMemory(): cacheFile("cache_status.log") + { + cache_array = NULL; + } + + void create_cache() { + cache_array = new cache_line_t[number_of_blocks]; + this->block_size_byte = get_block_size(); + this->nbits_blocks = log2(block_size_byte); + this->nbits_index = log2(number_of_blocks); + this->nbits_tags = ARCHITECTURE_bits - nbits_index - nbits_blocks; + printf("bits block %d, index %d, tags %d\n",nbits_blocks, nbits_index, nbits_tags ); + } + + void create_cache(uint32_t cache_size_byte, uint32_t number_of_blocks) { + this->cache_size_byte = cache_size_byte; + this->number_of_blocks = number_of_blocks; + cache_array = new cache_line_t[number_of_blocks]; + this->block_size_byte = get_block_size(); + this->nbits_blocks = log2(block_size_byte); + this->nbits_index = log2(number_of_blocks); + this->nbits_tags = ARCHITECTURE_bits - nbits_index - nbits_blocks; + } + + uint32_t initialize_cache() { + if(cache_array == NULL) { + return -1; + } + // Initialize memory with random data + for (int i = 0; i < number_of_blocks; i++) { + cache_array[i].valid = false; + cache_array[i].tag = 0; + cache_array[i].data = new uint8_t[block_size_byte]; + for(int j = 0; j> nbits_blocks) & mask_index ); + } + + uint32_t get_block_offset(uint32_t address) { + uint32_t mask_block = (1 << nbits_blocks) - 1; + return (uint32_t)(address & mask_block); + } + + uint32_t get_base_address(uint32_t address) { + return (uint32_t)((address >> nbits_blocks) << nbits_blocks); + } + + uint32_t get_tag(uint32_t address) { + return (uint32_t)(address >> (nbits_index+nbits_blocks)); + } + + uint32_t get_tag_from_index(uint32_t index) { + return cache_array[index].tag; + } + + + bool cache_hit(uint32_t address) { + uint32_t index = get_index(address); + uint32_t tag = get_tag(address); + return ( cache_array[index].valid && tag == cache_array[index].tag); + + } + + void add_entry(uint32_t address, uint8_t* new_data) { + uint32_t index = get_index(address); + uint32_t tag = get_tag(address); + cache_array[index].valid = true; + cache_array[index].tag = tag; + memcpy(cache_array[index].data, new_data, block_size_byte); + } + + void get_data(uint32_t address, uint8_t* new_data) { + uint32_t index = get_index(address); + memcpy(new_data, cache_array[index].data, block_size_byte); + } + + void get_data_at_index(uint32_t index, uint8_t* new_data) { + memcpy(new_data, cache_array[index].data, block_size_byte); + } + + uint32_t get_address(uint32_t address){ + uint32_t index = get_index(address); + uint32_t tag = cache_array[index].tag; + uint32_t new_address = (tag << (nbits_index+nbits_blocks)) | (index<get_block_offset(address); + uint8_t* new_data = new uint8_t[block_size_byte]; + this->get_data(address, new_data); + data_word = *((int32_t *)&new_data[block_offset]); + delete new_data; + return data_word; + } + + void set_word(uint32_t address, int32_t data_word) { + uint32_t block_offset = this->get_block_offset(address); + uint8_t* new_data = new uint8_t[block_size_byte]; + this->get_data(address, new_data); + *((int32_t *)&new_data[block_offset]) = data_word; + for(int i=0;iadd_entry(address, new_data); + delete new_data; + } + + bool is_entry_valid(uint32_t address) { + uint32_t index = get_index(address); + return cache_array[index].valid; + } + + bool is_entry_valid_at_index(uint32_t index) { + return cache_array[index].valid; + } + + void print_cache_status(uint32_t operation_id, std::string time_str) { + if (cacheFile.is_open()) { + std::string log_cache = ""; + std::ostringstream ss; + + log_cache+= std::to_string(operation_id) + "): " + time_str + "\n"; + log_cache+= "INDEX | TAG | DATA BLOCK | VALID\n"; + + for(int i=0;inbits_index/4) << std::setfill('0') << std::hex << static_cast(i); + log_cache+= ss.str() + " | "; + ss.str(""); + ss.clear(); + ss << "0x" << std::setw(this->nbits_tags/4) << std::setfill('0') << std::hex << cache_array[i].tag; + log_cache+= ss.str() + " | 0x"; + ss.str(""); + ss.clear(); + for(int j = 0; j(cache_array[i].data[j]); + log_cache+= ss.str() + " | "; + log_cache+= std::string( cache_array[i].valid ? "1" : "0" ) + "\n"; + + cacheFile << log_cache; + ss.str(""); + ss.clear(); + log_cache = std::string(""); + } + } else { + std::cout << "Failed to create the Cache file." << std::endl; + } + } + + /* main memory address + 0x7052 = 'b111_0000_0101_0010' + + cache size = 4KB, + number_of_blocks = 256, thus index is on 8bit + block_size_in_byte = 4KB/256 = 16bytes, i.e. 4 words + + 111: tag + 0000_0101: used as index + 0010: used for block offset , 4bits as 16 bytes + + + get_tag(0x7052) --> 0x7 + get_index(0x7052) --> 0x5 + get_block_offset(0x7052) --> 0x2 + + + */ +}; + +#endif diff --git a/tb/systemc_tb/MainMemory.h b/tb/systemc_tb/MainMemory.h new file mode 100644 index 000000000..984f81cf3 --- /dev/null +++ b/tb/systemc_tb/MainMemory.h @@ -0,0 +1,68 @@ +#ifndef MEMORY_H +#define MEMORY_H + +// Needed for the simple_target_socket +#define SC_INCLUDE_DYNAMIC_PROCESSES + +#include "systemc" +using namespace sc_core; +using namespace sc_dt; +using namespace std; + +#include "tlm.h" +#include "tlm_utils/simple_target_socket.h" + + +// Target module representing a simple direct mapped cache +SC_MODULE(MainMemory) +{ + // TLM-2 socket, defaults to 32-bits wide, base protocol + tlm_utils::simple_target_socket socket; + + enum { SIZE = 32*1024/4 }; //32KB word addressable + + int32_t mem[SIZE]; + + + SC_CTOR(MainMemory) + : socket("socket") + { + // Register callback for incoming b_transport interface method call + socket.register_b_transport(this, &MainMemory::b_transport); + + // Initialize memory with random data + for (int i = 0; i < SIZE; i++) + mem[i] = 0xAA000000 | (rand() % 256); + } + + // TLM-2 blocking transport method + virtual void b_transport( tlm::tlm_generic_payload& trans, sc_time& delay ) + { + tlm::tlm_command cmd = trans.get_command(); + sc_dt::uint64 adr = trans.get_address() / 4; + unsigned char* ptr = trans.get_data_ptr(); + unsigned int len = trans.get_data_length(); + unsigned char* byt = trans.get_byte_enable_ptr(); + unsigned int wid = trans.get_streaming_width(); + + // Obliged to check address range and check for unsupported features, + // i.e. byte enables, streaming, and bursts + // Can ignore DMI hint and extensions + // Using the SystemC report handler is an acceptable way of signalling an error + + if (adr >= sc_dt::uint64(SIZE) || byt != 0 || len > 4 || wid < len) + SC_REPORT_ERROR("TLM-2", "Target does not support given generic payload transaction"); + + // Obliged to implement read and write commands + if ( cmd == tlm::TLM_READ_COMMAND ) + memcpy(ptr, &mem[adr], len); + else if ( cmd == tlm::TLM_WRITE_COMMAND ) + memcpy(&mem[adr], ptr, len); + + // Obliged to set response status to indicate successful completion + trans.set_response_status( tlm::TLM_OK_RESPONSE ); + } + +}; + +#endif diff --git a/tb/systemc_tb/MemoryRequest.h b/tb/systemc_tb/MemoryRequest.h new file mode 100644 index 000000000..c0a7d1c7f --- /dev/null +++ b/tb/systemc_tb/MemoryRequest.h @@ -0,0 +1,241 @@ +#ifndef MEMORYREQUEST_H +#define MEMORYREQUEST_H + +#include "systemc" +using namespace sc_core; +using namespace sc_dt; +using namespace std; + +#include "tlm.h" +#include "tlm_utils/simple_initiator_socket.h" + +#include "Cache.h" + +#include +#include +#include +#include + +// MemoryRequest module generating generic payload transactions + +SC_MODULE(MemoryRequest) +{ + // TLM-2 socket, defaults to 32-bits wide, base protocol + tlm_utils::simple_initiator_socket socket; + bool we_i; + uint32_t be_i; + uint32_t addr_i; + uint32_t rwdata_io; + CacheMemory* cache; + std::ofstream heep_mem_transactions; + bool bypass_state = false; + + typedef struct cache_statistics + { + uint32_t number_of_transactions; + uint32_t number_of_hit; + uint32_t number_of_miss; + } cache_statistics_t; + + cache_statistics_t cache_stat; + + SC_CTOR(MemoryRequest) + : socket("socket"), // Construct and name socket + heep_mem_transactions("heep_mem_transactions.log") + { + + cache = new CacheMemory; + cache->create_cache(); + cache->initialize_cache(); + cache_stat.number_of_transactions = 0; + cache_stat.number_of_hit = 0; + cache_stat.number_of_miss = 0; + cache->print_cache_status(cache_stat.number_of_transactions++, sc_time_stamp().to_string()); + + SC_THREAD(thread_process); + } + + + uint32_t memory_copy(uint32_t addr, int32_t* buffer_data, int N, bool write_enable, tlm::tlm_generic_payload* trans, sc_time delay) { + + tlm::tlm_command cmd = write_enable ? tlm::TLM_WRITE_COMMAND : tlm::TLM_READ_COMMAND; + + //first read block_size bytes from memory to place them in cache regardless of the cmd + for(int i=0; i < N; i++){ + trans->set_command( cmd ); + trans->set_address( (addr + i*4) & 0x00007FFF ); //15bits + trans->set_data_ptr( reinterpret_cast(&buffer_data[i]) ); + trans->set_data_length( 4 ); + trans->set_streaming_width( 4 ); // = data_length to indicate no streaming + trans->set_byte_enable_ptr( 0 ); // 0 indicates unused + trans->set_dmi_allowed( false ); // Mandatory initial value + trans->set_response_status( tlm::TLM_INCOMPLETE_RESPONSE ); // Mandatory initial value + socket->b_transport( *trans, delay ); // Blocking transport call + + if(bypass_state){ + if(write_enable) + heep_mem_transactions << "Writing to Mem[" << hex << ((addr + i*4) & 0x00007FFF) << "]: " << buffer_data[i] << " at time " << sc_time_stamp() <is_response_error() ) + SC_REPORT_ERROR("TLM-2", "Response error from b_transport"); + } + return N; + } + + + void thread_process() + { + // TLM-2 generic payload transaction, reused across calls to b_transport + tlm::tlm_generic_payload* trans = new tlm::tlm_generic_payload; + + sc_time delay_gnt_miss = sc_time(100, SC_NS); + sc_time delay_rvalid_miss = sc_time(100, SC_NS); + + sc_time delay_rvalid_hit = sc_time(20, SC_NS); //as of today, it must be >=20 + + sc_time delay = sc_time(1, SC_NS); + + uint32_t cache_block_size_byte = cache->get_block_size(); + uint32_t cache_block_size_word = cache->get_block_size()/4; + uint8_t* cache_data = new uint8_t[cache_block_size_byte]; + int32_t* main_mem_data = new int32_t[cache_block_size_word]; + uint32_t address_to_replace; + uint32_t cache_flushed; + + while(true) { + + wait(obi_new_req); + + heep_mem_transactions << "X-HEEP tlm_generic_payload REQ: { " << (we_i ? 'W' : 'R') << ", @0x" << hex << addr_i + << " , DATA = 0x" << hex << rwdata_io << " BE = " << hex << be_i <<", at time " << sc_time_stamp() << " }" << std::endl; + + if(be_i!=0xF) { + SC_REPORT_ERROR("OBI External Memory SystemC", "ByteEnable different than 0xF is not supported"); + } + + //if we are writing 1 or 2 to last address, flush cache or bypass + if(we_i && ((addr_i & 0x00007FFF) == 0x7FFC)){ + + if(rwdata_io == 1){ + //FLUSH Cache + heep_mem_transactions << "X-HEEP Flush Cache, at time " << sc_time_stamp() << " }" << std::endl; + uint32_t cache_number_of_blocks = cache->number_of_blocks; + heep_mem_transactions<<"Cache Flushing at time "<is_entry_valid_at_index(i)) { + cache_flushed++; + //if we are going to replace a valid entry + cache->get_data_at_index(i, cache_data); + address_to_replace = cache->get_address_at_index(i); + //write back + memory_copy(address_to_replace, (uint32_t *)cache_data, cache_block_size_word, true, trans, delay); + } + } + heep_mem_transactions<<"Cache Flushed "<< dec << cache_flushed << " entries"<cache_hit(addr_i)){ + + heep_mem_transactions << "Cache HIT on address " << hex << addr_i << " at time " << sc_time_stamp() <get_word(addr_i); + //if Write, writes to cache + if(we_i) + cache->set_word(addr_i, rwdata_io); + else + rwdata_io = main_mem_data[0]; + wait(delay_rvalid_hit); + } + + else { //miss case + + cache_stat.number_of_miss++; + + heep_mem_transactions << "Cache MISS on address " << hex << addr_i << " at time " << sc_time_stamp() <get_base_address(addr_i); + uint32_t addr_offset = cache->get_block_offset(addr_i); + + //first read block_size bytes from memory to place them in cache regardless of the cmd + memory_copy(addr_to_read, main_mem_data, cache_block_size_word, false, trans, delay); + uint32_t index_to_add = cache->get_index(addr_i); + uint32_t tag_to_add = cache->get_tag(addr_i); + + heep_mem_transactions << "Adding to Cache TAG " << hex << tag_to_add << " and index " << hex << index_to_add <is_entry_valid(addr_i)) { + //if we are going to replace a valid entry + cache->get_data(addr_i, cache_data); + address_to_replace = cache->get_address(addr_i); + uint32_t index_to_replace = cache->get_index(addr_i); + uint32_t tag_to_replace = cache->get_tag_from_index(index_to_replace); + + heep_mem_transactions << "Cache Replace address " << hex << addr_i << " with address " << hex << address_to_replace << " due to the MISS at time " << sc_time_stamp() <add_entry(addr_i, (uint8_t*)main_mem_data); + + //if Write, writes to cache + if(we_i) + cache->set_word(addr_i, rwdata_io); + + //now give back the rdata + rwdata_io = main_mem_data[addr_offset>>2]; //>>2 as addr_offset is for byte address, not words + + //wait some time before giving the rvalid + wait(delay_rvalid_miss); + + } + } + } + + heep_mem_transactions << "X-HEEP tlm_generic_payload RESP: { DATA = 0x" << hex << rwdata_io <<", at time " << sc_time_stamp() << " }" << std::endl; + cache->print_cache_status(cache_stat.number_of_transactions++, sc_time_stamp().to_string()); + + obi_new_rvalid.notify(); + + } + } +}; + +#endif diff --git a/tb/tb_sc_top.cpp b/tb/tb_sc_top.cpp new file mode 100644 index 000000000..b619d9500 --- /dev/null +++ b/tb/tb_sc_top.cpp @@ -0,0 +1,342 @@ +// Copyright 2024 EPFL +// Solderpad Hardware License, Version 2.1, see LICENSE.md for details. +// SPDX-License-Identifier: Apache-2.0 WITH SHL-2.1 + +#include "verilated.h" +#include +#include "Vtestharness.h" +#include "Vtestharness__Syms.h" +#include "systemc.h" +#include +#include +#include "XHEEP_CmdLineOptions.hh" + +sc_event reset_done_event; +sc_event obi_new_gnt; +sc_event obi_new_rvalid; +sc_event obi_new_req; + + +#include "systemc_tb/MemoryRequest.h" +#include "systemc_tb/MainMemory.h" + + +#define CLK_PERIOD 10 + +SC_MODULE(external_memory) +{ + MemoryRequest *memory_request; + MainMemory *memory; + + sc_in clk_i; + sc_in ext_systemc_req_req_i; + sc_in ext_systemc_req_we_i; + sc_in ext_systemc_req_be_i; + sc_in ext_systemc_req_addr_i; + sc_in ext_systemc_req_wdata_i; + sc_out ext_systemc_resp_gnt_o; + sc_out ext_systemc_resp_rvalid_o; + sc_out ext_systemc_resp_rdata_o; + + void notify_obi_transaction () { + if(ext_systemc_req_req_i) { + obi_new_req.notify(); + memory_request->we_i = ext_systemc_req_we_i; + memory_request->be_i = ext_systemc_req_be_i; + memory_request->addr_i = ext_systemc_req_addr_i; + memory_request->rwdata_io = ext_systemc_req_wdata_i; + } + } + + void give_gnt_back () { + while (true) { + ext_systemc_resp_gnt_o.write(false); + wait(obi_new_gnt); + ext_systemc_resp_gnt_o.write(true); + wait(); + } + } + + void give_rvalid_rdata_back () { + while (true) { + ext_systemc_resp_rvalid_o.write(false); + wait(obi_new_rvalid); + ext_systemc_resp_rvalid_o.write(true); + ext_systemc_resp_rdata_o.write(memory_request->rwdata_io); + wait(); + } + } + + SC_CTOR(external_memory) + { + // Instantiate components + memory_request = new MemoryRequest("memory_request"); + memory = new MainMemory ("main_memory"); + + SC_METHOD(notify_obi_transaction); + sensitive << ext_systemc_req_req_i; + + SC_CTHREAD(give_gnt_back, clk_i.pos()); + SC_CTHREAD(give_rvalid_rdata_back, clk_i.pos()); + + // Bind memory_request socket to target socket + memory_request->socket.bind( memory->socket ); + } +}; + + +SC_MODULE(testbench) +{ + + sc_in clk_i; + sc_out clk_o; + sc_out rst_no; + sc_out boot_select_o; + sc_out execute_from_flash_o; + sc_out jtag_tck_o; + sc_out jtag_tms_o; + sc_out jtag_trst_n_o; + sc_out jtag_tdi_o; + + Vtestharness* dut; + std::string* firmware; + + bool boot_select_option; + unsigned int reset_cycles = 30; + + void make_clock () { + while(1) { + clk_o.write(false); + wait(); + clk_o.write(true); + wait(); + } + } + + void do_reset_cycle () { + //active low + //----- + rst_no.write(false); + + for(int i=0;itb_loadHEX(firmware->c_str()); + } + + void set_exit_loop () { + wait(); + dut->tb_set_exit_loop(); + } + + void make_stimuli () { + + boot_select_o.write(boot_select_option); + execute_from_flash_o.write(true); + jtag_tck_o.write(false); + jtag_tms_o.write(false); + jtag_trst_n_o.write(false); + jtag_tdi_o.write(false); + + std::cout<<"Start Reset Cycle: "<c_str()<get_use_openocd(); + firmware = cmd_lines_options->get_firmware(); + + if(firmware.empty() && use_openocd==false) { + std::cout<<"You must specify the firmware if you are not using OpenOCD"<get_max_sim_time(run_all); + + boot_sel = cmd_lines_options->get_boot_sel(); + + if(use_openocd) { + std::cout<<"[TESTBENCH]: ERROR: Executing from OpenOCD in SystemC is not supported (yet) in X-HEEP"<>1, SC_NS, 0.5); + + Vtestharness dut("TOP"); + testbench tb("testbench"); + external_memory ext_mem("external_memory"); + + svSetScope(svGetScopeFromName("TOP.testharness")); + svScope scope = svGetScope(); + if (!scope) { + std::cout<<"Warning: svGetScope failed"<< std::endl; + exit(EXIT_FAILURE); + } + + + // static values + tb.boot_select_option = boot_sel == 1; + + + // Vtestharness interface + sc_signal clk; + sc_signal rst_n; + sc_signal boot_select; + sc_signal execute_from_flash; + sc_signal jtag_tck; + sc_signal jtag_tms; + sc_signal jtag_trst_n; + sc_signal jtag_tdi; + sc_signal jtag_tdo; + sc_signal exit_value; + sc_signal exit_valid; + sc_signal ext_systemc_req_req; + sc_signal ext_systemc_req_we; + sc_signal ext_systemc_req_be; + sc_signal ext_systemc_req_addr; + sc_signal ext_systemc_req_wdata; + sc_signal ext_systemc_resp_gnt; + sc_signal ext_systemc_resp_rvalid; + sc_signal ext_systemc_resp_rdata; + + + + tb.clk_i(clock_sig); + tb.clk_o(clk); + tb.rst_no(rst_n); + tb.boot_select_o(boot_select); + tb.execute_from_flash_o(execute_from_flash); + tb.jtag_tck_o(jtag_tck); + tb.jtag_tms_o(jtag_tms); + tb.jtag_trst_n_o(jtag_trst_n); + tb.jtag_tdi_o(jtag_tdi); + + tb.dut = &dut; + tb.firmware = &firmware; + + dut.clk_i(clk); + dut.rst_ni(rst_n); + dut.boot_select_i(boot_select); + dut.execute_from_flash_i(execute_from_flash); + dut.jtag_tck_i(jtag_tck); + dut.jtag_tms_i(jtag_tms); + dut.jtag_trst_ni(jtag_trst_n); + dut.jtag_tdi_i(jtag_tdi); + dut.jtag_tdo_o(jtag_tdo); + dut.exit_value_o(exit_value); + dut.exit_valid_o(exit_valid); + dut.ext_systemc_req_req_o(ext_systemc_req_req); + dut.ext_systemc_req_we_o(ext_systemc_req_we); + dut.ext_systemc_req_be_o(ext_systemc_req_be); + dut.ext_systemc_req_addr_o(ext_systemc_req_addr); + dut.ext_systemc_req_wdata_o(ext_systemc_req_wdata); + dut.ext_systemc_resp_gnt_i(ext_systemc_resp_gnt); + dut.ext_systemc_resp_rvalid_i(ext_systemc_resp_rvalid); + dut.ext_systemc_resp_rdata_i(ext_systemc_resp_rdata); + + ext_mem.clk_i(clk); + ext_mem.ext_systemc_req_req_i(ext_systemc_req_req); + ext_mem.ext_systemc_req_we_i(ext_systemc_req_we); + ext_mem.ext_systemc_req_be_i(ext_systemc_req_be); + ext_mem.ext_systemc_req_addr_i(ext_systemc_req_addr); + ext_mem.ext_systemc_req_wdata_i(ext_systemc_req_wdata); + ext_mem.ext_systemc_resp_gnt_o(ext_systemc_resp_gnt); + ext_mem.ext_systemc_resp_rdata_o(ext_systemc_resp_rdata); + ext_mem.ext_systemc_resp_rvalid_o(ext_systemc_resp_rvalid); + + + + // You must do one evaluation before enabling waves, in order to allow + // SystemC to interconnect everything for testing. + sc_start(1, SC_NS); + + + VerilatedVcdSc* tfp = nullptr; + tfp = new VerilatedVcdSc; + dut.trace(tfp, 99); // Trace 99 levels of hierarchy + tfp->open("waveform.vcd"); + + // Simulate until $finish + while (!Verilated::gotFinish() && exit_valid !=1 ) { + // Flush the wave files each cycle so we can immediately see the output + // Don't do this in "real" programs, do it in an abort() handler instead + if (tfp) tfp->flush(); + // Simulate 1ns + sc_start(1, SC_NS); + } + + if(exit_valid == 1) { + std::cout<<"Program Finished with value "<< exit_value <close(); + tfp = nullptr; + } + + + exit(exit_val); + +} diff --git a/tb/tb_top.cpp b/tb/tb_top.cpp index 956b77d69..7dd4757e6 100644 --- a/tb/tb_top.cpp +++ b/tb/tb_top.cpp @@ -10,26 +10,10 @@ #include #include +#include "XHEEP_CmdLineOptions.hh" vluint64_t sim_time = 0; - -std::string getCmdOption(int argc, char* argv[], const std::string& option) -{ - std::string cmd; - for( int i = 0; i < argc; ++i) - { - std::string arg = argv[i]; - size_t arg_size = arg.length(); - size_t option_size = option.length(); - - if(arg.find(option)==0){ - cmd = arg.substr(option_size,arg_size-option_size); - } - } - return cmd; -} - void runCycles(unsigned int ncycles, Vtestharness *dut, VerilatedFstC *m_trace){ for(unsigned int i = 0; i < ncycles; i++) { dut->clk_i ^= 1; @@ -42,12 +26,11 @@ void runCycles(unsigned int ncycles, Vtestharness *dut, VerilatedFstC *m_trace){ int main (int argc, char * argv[]) { - unsigned int SRAM_SIZE; - std::string firmware, arg_max_sim_time, arg_openocd, arg_boot_sel, arg_execute_from_flash; - unsigned int max_sim_time; + std::string firmware; + unsigned int max_sim_time, boot_sel, exit_val; bool use_openocd; bool run_all = false; - int i,j, exit_val, boot_sel, execute_from_flash; + Verilated::commandArgs(argc, argv); // Instantiate the model @@ -59,59 +42,24 @@ int main (int argc, char * argv[]) dut->trace (m_trace, 99); m_trace->open ("waveform.vcd"); - arg_openocd = getCmdOption(argc, argv, "+openOCD="); - use_openocd = false; - if(arg_openocd.empty()){ - std::cout<<"[TESTBENCH]: No OpenOCD is used"<get_use_openocd(); + firmware = cmd_lines_options->get_firmware(); - arg_max_sim_time = getCmdOption(argc, argv, "+max_sim_time="); - max_sim_time = 0; - if(arg_max_sim_time.empty()){ - std::cout<<"[TESTBENCH]: No Max time specified"<get_max_sim_time(run_all); - arg_boot_sel = getCmdOption(argc, argv, "+execute_from_flash="); - execute_from_flash = 1; + boot_sel = cmd_lines_options->get_boot_sel(); if(boot_sel == 1) { std::cout<<"[TESTBENCH]: ERROR: Executing from SPI is not supported (yet) in Verilator"<jtag_tms_i = 0; dut->jtag_trst_ni = 0; dut->jtag_tdi_i = 0; - dut->execute_from_flash_i = execute_from_flash; + dut->execute_from_flash_i = 1; //this cause boot_sel cannot be 1 anyway dut->boot_select_i = boot_sel; dut->eval(); @@ -172,6 +120,7 @@ int main (int argc, char * argv[]) m_trace->close(); delete dut; + delete cmd_lines_options; exit(exit_val); diff --git a/tb/tb_util.svh.tpl b/tb/tb_util.svh.tpl index a0f295a49..2aafa4e87 100644 --- a/tb/tb_util.svh.tpl +++ b/tb/tb_util.svh.tpl @@ -6,8 +6,8 @@ // Task for loading 'mem' with SystemVerilog system task $readmemh() export "DPI-C" task tb_readHEX; export "DPI-C" task tb_loadHEX; -% for bank in range(ram_numbanks): -export "DPI-C" task tb_writetoSram${bank}; +% for bank in xheep.iter_ram_banks(): +export "DPI-C" task tb_writetoSram${bank.name()}; % endfor export "DPI-C" task tb_getMemSize; export "DPI-C" task tb_set_exit_loop; @@ -16,9 +16,7 @@ import core_v_mini_mcu_pkg::*; task tb_getMemSize; output int mem_size; - output int num_banks; mem_size = core_v_mini_mcu_pkg::MEM_SIZE; - num_banks = core_v_mini_mcu_pkg::NUM_BANKS; endtask task tb_readHEX; @@ -31,11 +29,11 @@ task tb_loadHEX; input string file; //whether to use debug to write to memories logic [7:0] stimuli[core_v_mini_mcu_pkg::MEM_SIZE]; - int i, stimuli_counter, bank, NumBytes, NumBanks; + int i, stimuli_base, w_addr, NumBytes; logic [31:0] addr; tb_readHEX(file, stimuli); - tb_getMemSize(NumBytes, NumBanks); + tb_getMemSize(NumBytes); `ifndef VERILATOR for (i = 0; i < NumBytes; i = i + 4) begin @@ -71,43 +69,34 @@ task tb_loadHEX; release x_heep_system_i.core_v_mini_mcu_i.debug_subsystem_i.dm_obi_top_i.master_wdata_o; `else - - stimuli_counter = 0; -% for bank in range(ram_numbanks_cont): - for (i = 0; i < NumBytes / NumBanks; i = i + 4) begin - tb_writetoSram${bank}(i / 4, stimuli[stimuli_counter+3], stimuli[stimuli_counter+2], - stimuli[stimuli_counter+1], stimuli[stimuli_counter]); - stimuli_counter = stimuli_counter + 4; +% for bank in xheep.iter_ram_banks(): + for (i=${bank.start_address()}; i < ${bank.end_address()}; i = i + 4) begin + if (((i/4) & ${2**bank.il_level()-1}) == ${bank.il_offset()}) begin + w_addr = ((i/4) >> ${bank.il_level()}) % ${bank.size()//4}; + tb_writetoSram${bank.name()}(w_addr, stimuli[i+3], stimuli[i+2], + stimuli[i+1], stimuli[i]); + end end % endfor -% if ram_numbanks_il != 0: - for (i = 0; i < NumBytes / NumBanks; i = i + 4) begin -% for bank in range(ram_numbanks_il): - tb_writetoSram${int(ram_numbanks_cont) + bank}(i / 4, stimuli[stimuli_counter+3], stimuli[stimuli_counter+2], - stimuli[stimuli_counter+1], stimuli[stimuli_counter]); - stimuli_counter = stimuli_counter + 4; -% endfor - end -% endif `endif endtask -% for bank in range(ram_numbanks): -task tb_writetoSram${bank}; +% for bank in xheep.iter_ram_banks(): +task tb_writetoSram${bank.name()}; input int addr; input [7:0] val3; input [7:0] val2; input [7:0] val1; input [7:0] val0; `ifdef VCS - force x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_i.gen_sram[${bank}].ram_i.tc_ram_i.sram[addr] = { + force x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_i.ram${bank.name()}_i.tc_ram_i.sram[addr] = { val3, val2, val1, val0 }; - release x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_i.gen_sram[${bank}].ram_i.tc_ram_i.sram[addr]; + release x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_i.ram${bank.name()}_i.tc_ram_i.sram[addr]; `else - x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_i.gen_sram[${bank}].ram_i.tc_ram_i.sram[addr] = { + x_heep_system_i.core_v_mini_mcu_i.memory_subsystem_i.ram${bank.name()}_i.tc_ram_i.sram[addr] = { val3, val2, val1, val0 }; `endif diff --git a/tb/testharness.sv b/tb/testharness.sv index 6b1b36519..2a4c70403 100644 --- a/tb/testharness.sv +++ b/tb/testharness.sv @@ -21,6 +21,17 @@ module testharness #( inout wire boot_select_i, inout wire execute_from_flash_i, +`ifdef SIM_SYSTEMC + output logic ext_systemc_req_req_o, + output logic ext_systemc_req_we_o, + output logic [ 3:0] ext_systemc_req_be_o, + output logic [31:0] ext_systemc_req_addr_o, + output logic [31:0] ext_systemc_req_wdata_o, + + input logic ext_systemc_resp_gnt_i, + input logic ext_systemc_resp_rvalid_i, + input logic [31:0] ext_systemc_resp_rdata_i, +`endif input wire jtag_tck_i, input wire jtag_tms_i, input wire jtag_trst_ni, @@ -102,10 +113,6 @@ module testharness #( reg_pkg::reg_req_t [testharness_pkg::EXT_NPERIPHERALS-1:0] ext_periph_slv_req; reg_pkg::reg_rsp_t [testharness_pkg::EXT_NPERIPHERALS-1:0] ext_periph_slv_rsp; - // External xbar slave example port - obi_req_t slow_ram_slave_req; - obi_resp_t slow_ram_slave_resp; - // External interrupts logic [NEXT_INT_RND-1:0] intr_vector_ext; logic memcopy_intr; @@ -366,20 +373,45 @@ module testharness #( .exit() ); - assign mux_jtag_tck = JTAG_DPI ? sim_jtag_tck : jtag_tck_i; - assign mux_jtag_tms = JTAG_DPI ? sim_jtag_tms : jtag_tms_i; - assign mux_jtag_tdi = JTAG_DPI ? sim_jtag_tdi : jtag_tdi_i; - assign mux_jtag_trstn = JTAG_DPI ? sim_jtag_trstn : jtag_trst_ni; + assign mux_jtag_tck = JTAG_DPI ? sim_jtag_tck : jtag_tck_i; + assign mux_jtag_tms = JTAG_DPI ? sim_jtag_tms : jtag_tms_i; + assign mux_jtag_tdi = JTAG_DPI ? sim_jtag_tdi : jtag_tdi_i; + assign mux_jtag_trstn = JTAG_DPI ? sim_jtag_trstn : jtag_trst_ni; + + assign sim_jtag_tdo = JTAG_DPI ? mux_jtag_tdo : '0; + assign jtag_tdo_o = !JTAG_DPI ? mux_jtag_tdo : '0; + + // External xbar slave example port + obi_req_t slow_ram_slave_req; + obi_resp_t slow_ram_slave_resp; - assign sim_jtag_tdo = JTAG_DPI ? mux_jtag_tdo : '0; - assign jtag_tdo_o = !JTAG_DPI ? mux_jtag_tdo : '0; +`ifndef SIM_SYSTEMC assign slow_ram_slave_req = ext_slave_req[SLOW_MEMORY_IDX]; assign ext_slave_resp[SLOW_MEMORY_IDX] = slow_ram_slave_resp; +`else + + obi_req_t ext_systemc_req; + obi_resp_t ext_systemc_resp; + + assign ext_systemc_req_req_o = ext_systemc_req.req; + assign ext_systemc_req_we_o = ext_systemc_req.we; + assign ext_systemc_req_be_o = ext_systemc_req.be; + assign ext_systemc_req_addr_o = ext_systemc_req.addr; + assign ext_systemc_req_wdata_o = ext_systemc_req.wdata; + + assign ext_systemc_resp.gnt = ext_systemc_resp_gnt_i; + assign ext_systemc_resp.rvalid = ext_systemc_resp_rvalid_i; + assign ext_systemc_resp.rdata = ext_systemc_resp_rdata_i; + + assign ext_systemc_req = ext_slave_req[SLOW_MEMORY_IDX]; + assign ext_slave_resp[SLOW_MEMORY_IDX] = ext_systemc_resp; +`endif generate if (USE_EXTERNAL_DEVICE_EXAMPLE) begin : gen_USE_EXTERNAL_DEVICE_EXAMPLE +`ifndef SIM_SYSTEMC obi_pkg::obi_req_t slave_fifoout_req; obi_pkg::obi_resp_t slave_fifoout_resp; @@ -395,14 +427,14 @@ module testharness #( // External xbar slave memory example slow_memory #( - .NumWords (128), + .NumWords (8192), .DataWidth(32'd32) ) slow_ram_i ( .clk_i, .rst_ni, .req_i(slave_fifoout_req.req), .we_i(slave_fifoout_req.we), - .addr_i(slave_fifoout_req.addr[8:2]), + .addr_i(slave_fifoout_req.addr[15:2]), .wdata_i(slave_fifoout_req.wdata), .be_i(slave_fifoout_req.be), // output ports @@ -410,6 +442,7 @@ module testharness #( .rdata_o(slave_fifoout_resp.rdata), .rvalid_o(slave_fifoout_resp.rvalid) ); +`endif parameter DMA_TRIGGER_SLOT_NUM = 4; diff --git a/tb/testharness_pkg.sv b/tb/testharness_pkg.sv index d014e0b24..d74011f44 100644 --- a/tb/testharness_pkg.sv +++ b/tb/testharness_pkg.sv @@ -40,19 +40,19 @@ package testharness_pkg; localparam logic [31:0] MEMCOPY_CTRL_IDX = 32'd0; // External AMS Peripheral - localparam logic [31:0] AMS_START_ADDRESS = core_v_mini_mcu_pkg::EXT_PERIPHERAL_START_ADDRESS + 32'h001000; + localparam logic [31:0] AMS_START_ADDRESS = core_v_mini_mcu_pkg::EXT_PERIPHERAL_START_ADDRESS + 32'h01000; localparam logic [31:0] AMS_SIZE = 32'h100; localparam logic [31:0] AMS_END_ADDRESS = AMS_START_ADDRESS + AMS_SIZE; localparam logic [31:0] AMS_IDX = 32'd1; // External InterFaced FIFO (IFFIFO) Peripheral - localparam logic [31:0] IFFIFO_START_ADDRESS = core_v_mini_mcu_pkg::EXT_PERIPHERAL_START_ADDRESS + 32'h002000; + localparam logic [31:0] IFFIFO_START_ADDRESS = core_v_mini_mcu_pkg::EXT_PERIPHERAL_START_ADDRESS + 32'h02000; localparam logic [31:0] IFFIFO_SIZE = 32'h100; localparam logic [31:0] IFFIFO_END_ADDRESS = IFFIFO_START_ADDRESS + IFFIFO_SIZE; localparam logic [31:0] IFFIFO_IDX = 32'd2; // External Simple Accelerator Peripheral - localparam logic [31:0] SIMPLE_ACC_START_ADDRESS = core_v_mini_mcu_pkg::EXT_PERIPHERAL_START_ADDRESS + 32'h003000; + localparam logic [31:0] SIMPLE_ACC_START_ADDRESS = core_v_mini_mcu_pkg::EXT_PERIPHERAL_START_ADDRESS + 32'h03000; localparam logic [31:0] SIMPLE_ACC_SIZE = 32'h100; localparam logic [31:0] SIMPLE_ACC_END_ADDRESS = SIMPLE_ACC_START_ADDRESS + SIMPLE_ACC_SIZE; localparam logic [31:0] SIMPLE_ACC_IDX = 32'd3; diff --git a/util/MakefileHelp b/util/MakefileHelp index dc5f29017..8e072d0a2 100755 --- a/util/MakefileHelp +++ b/util/MakefileHelp @@ -1,8 +1,7 @@ #!/bin/bash - -FILE=Makefile RULE_COLOR="$(tput setaf 6)" SECTION_COLOR="$(tput setaf 3)" +SSECTION_COLOR="$(tput setaf 5)" VARIABLE_COLOR="$(tput setaf 2)" VALUE_COLOR="$(tput setaf 1)" CLEAR_STYLE="$(tput sgr0)" @@ -17,13 +16,14 @@ echo "" TARGET_REGEX="^[a-zA-Z0-9%_\/%-]+:" SECTION_REGEX="^##\s*@section\s*(.*)$" +SSECTION_REGEX="^##\s*@subsection\s*(.*)$" DOCBLOCK_REGEX="^##\s*(.*)$" PARAM_REGEX="@param\s+([a-zA-Z_]+)(=([^\s]+))?\s*(.*$)?" COMMENT="" PARAMS="" PARAMS_DOC="" -cat $FILE | while read line +cat $FILE_FOR_HELP | while read line do # do something with $line here if [[ ! -z $line ]] @@ -31,7 +31,11 @@ do if [[ $line =~ $SECTION_REGEX ]] then SECTION_NAME=$(echo $line | sed -e "s/^##\s*@section\s*\(.*\)$/\1/g") - echo "$SECTION_COLOR$SECTION_NAME$CLEAR_STYLE:" + echo "$SECTION_COLOR$SECTION_NAME:$CLEAR_STYLE" + elif [[ $line =~ $SSECTION_REGEX ]] + then + SECTION_NAME=$(echo $line | sed -e "s/^##\s*@subsection\s*\(.*\)$/\1/g") + echo "$SSECTION_COLOR $SECTION_NAME:$CLEAR_STYLE" elif [[ $line =~ $TARGET_REGEX ]] then # if there is no comment for this target, we don't display it in the docs to keep private targets hidden diff --git a/util/generate-makefile-help b/util/generate-makefile-help deleted file mode 100755 index 30e4893cb..000000000 --- a/util/generate-makefile-help +++ /dev/null @@ -1,2 +0,0 @@ -help: - util/MakefileHelp \ No newline at end of file diff --git a/util/mcu_gen.py b/util/mcu_gen.py index 5c86308fa..c661d780d 100755 --- a/util/mcu_gen.py +++ b/util/mcu_gen.py @@ -18,6 +18,8 @@ from mako.template import Template import collections from math import log2 +import x_heep_gen.load_config +from x_heep_gen.system import BusType class Pad: @@ -30,13 +32,25 @@ def remove_comma_io_interface(self): def create_pad_ring(self): + # Mapping dictionary from string to integer + mapping_dict = { + 'top' : 'core_v_mini_mcu_pkg::TOP', + 'right' : 'core_v_mini_mcu_pkg::RIGHT', + 'bottom' : 'core_v_mini_mcu_pkg::BOTTOM', + 'left' : 'core_v_mini_mcu_pkg::LEFT' + } + + mapping = '' + if self.pad_mapping is not None: + mapping = ', .SIDE(' + mapping_dict[self.pad_mapping] + ')' + self.interface = ' inout wire ' + self.name + '_io,\n' if self.pad_type == 'input': self.pad_ring_io_interface = ' inout wire ' + self.io_interface + ',' self.pad_ring_ctrl_interface += ' output logic ' + self.signal_name + 'o,' self.pad_ring_instance = \ - 'pad_cell_input #(.PADATTR('+ str(self.attribute_bits) +')) ' + self.cell_name + ' ( \n' + \ + 'pad_cell_input #(.PADATTR('+ str(self.attribute_bits) +')' + mapping + ') ' + self.cell_name + ' ( \n' + \ ' .pad_in_i(1\'b0),\n' + \ ' .pad_oe_i(1\'b0),\n' + \ ' .pad_out_o(' + self.signal_name + 'o),\n' + \ @@ -45,7 +59,7 @@ def create_pad_ring(self): self.pad_ring_io_interface = ' inout wire ' + self.io_interface + ',' self.pad_ring_ctrl_interface += ' input logic ' + self.signal_name + 'i,' self.pad_ring_instance = \ - 'pad_cell_output #(.PADATTR('+ str(self.attribute_bits) +')) ' + self.cell_name + ' ( \n' + \ + 'pad_cell_output #(.PADATTR('+ str(self.attribute_bits) +')' + mapping + ') ' + self.cell_name + ' ( \n' + \ ' .pad_in_i(' + self.signal_name + 'i),\n' + \ ' .pad_oe_i(1\'b1),\n' + \ ' .pad_out_o(),\n' + \ @@ -56,7 +70,7 @@ def create_pad_ring(self): self.pad_ring_ctrl_interface += ' output logic ' + self.signal_name + 'o,\n' self.pad_ring_ctrl_interface += ' input logic ' + self.signal_name + 'oe_i,' self.pad_ring_instance = \ - 'pad_cell_inout #(.PADATTR('+ str(self.attribute_bits) +')) ' + self.cell_name + ' ( \n' + \ + 'pad_cell_inout #(.PADATTR('+ str(self.attribute_bits) +')' + mapping + ') ' + self.cell_name + ' ( \n' + \ ' .pad_in_i(' + self.signal_name + 'i),\n' + \ ' .pad_oe_i(' + self.signal_name + 'oe_i),\n' + \ ' .pad_out_o(' + self.signal_name + 'o),\n' + \ @@ -195,13 +209,14 @@ def create_pad_ring_bonding(self): self.pad_ring_bonding_bonding += ' .' + self.signal_name + 'oe_i(' + oe_internal_signals + '),' self.x_heep_system_interface += ' inout wire ' + self.signal_name + 'io,' - def __init__(self, name, cell_name, pad_type, index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, has_attribute, attribute_bits): + def __init__(self, name, cell_name, pad_type, pad_mapping, index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, has_attribute, attribute_bits): self.name = name self.cell_name = cell_name self.index = index self.localparam = 'PAD_' + name.upper() self.pad_type = pad_type + self.pad_mapping = pad_mapping self.pad_mux_list = pad_mux_list if('low' in pad_active): @@ -285,12 +300,18 @@ def write_template(tpl_path, outdir, outfile, **kwargs): def main(): parser = argparse.ArgumentParser(prog="mcugen") - parser.add_argument("--cfg", + parser.add_argument("--cfg_peripherals", "-c", metavar="file", type=argparse.FileType('r'), required=True, help="A configuration file") + + parser.add_argument("--config", + metavar="file", + type=str, + required=True, + help="X-Heep general configuration") parser.add_argument("--pads_cfg", "-pc", @@ -380,7 +401,7 @@ def main(): logging.basicConfig(level=logging.DEBUG) # Read HJSON description of System. - with args.cfg as file: + with args.cfg_peripherals as file: try: srcfull = file.read() obj = hjson.loads(srcfull, use_decimal=True) @@ -404,47 +425,21 @@ def main(): outfile = args.outfile + config_override = x_heep_gen.system.Override(None, None, None) + if args.cpu != None and args.cpu != '': cpu_type = args.cpu else: cpu_type = obj['cpu_type'] if args.bus != None and args.bus != '': - bus_type = args.bus - else: - bus_type = obj['bus_type'] + config_override.bus_type = BusType(args.bus) if args.memorybanks != None and args.memorybanks != '': - ram_numbanks_cont = int(args.memorybanks) - else: - ram_numbanks_cont = int(obj['ram']['numbanks']) + config_override.numbanks = int(args.memorybanks) if args.memorybanks_il != None and args.memorybanks_il != '': - ram_numbanks_il = int(args.memorybanks_il) - else: - ram_numbanks_il = int(obj['ram']['numbanks_interleaved']) - - if ram_numbanks_il != 0: - log_ram_numbanks_il = int(log2(ram_numbanks_il)) - - if not log2(ram_numbanks_il).is_integer(): - exit("ram interleaved numbanks must be a power of 2 instead of " + str(ram_numbanks_il)) - else: - log_ram_numbanks_il = 0 - - if ram_numbanks_il != 0 and bus_type == 'onetoM': - exit("bus type must be 'NtoM' instead 'onetoM' to access the interleaved memory banks in parallel" + str(args.bus)) - - if ram_numbanks_cont + ram_numbanks_il < 2 and ram_numbanks_cont + ram_numbanks_il > 16: - exit("ram numbanks must be between 2 and 16 instead of " + str(ram_numbanks_cont + ram_numbanks_il)) - else: - ram_numbanks = ram_numbanks_cont + ram_numbanks_il - - ram_start_address = string2int(obj['ram']['address']) - if int(ram_start_address,16) != 0: - exit("ram start address must be 0 instead of " + str(ram_start_address)) - - ram_size_address = '{:08X}'.format(ram_numbanks*32*1024) + config_override.numbanks_il = int(args.memorybanks_il) if args.external_domains != None and args.external_domains != '': external_domains = int(args.external_domains) @@ -454,6 +449,12 @@ def main(): if external_domains > 32: exit("external_domains must be less than 32 instead of " + str(external_domains)) + + + xheep = x_heep_gen.load_config.load_cfg_file(pathlib.PurePath(str(args.config)), config_override) + + + debug_start_address = string2int(obj['debug']['address']) if int(debug_start_address, 16) < int('10000', 16): exit("debug start address must be greater than 0x10000") @@ -519,47 +520,11 @@ def len_extracted_peripherals(peripherals): flash_mem_start_address = string2int(obj['flash_mem']['address']) flash_mem_size_address = string2int(obj['flash_mem']['length']) - linker_onchip_code_start_address = string2int(obj['linker_script']['onchip_ls']['code']['address']) - linker_onchip_code_size_address = string2int(obj['linker_script']['onchip_ls']['code']['lenght']) - - if int(linker_onchip_code_size_address,16) < 32*1024: - exit("The code section must be at least 32KB, instead it is " + str(linker_onchip_code_size_address)) - - linker_onchip_data_start_address = string2int(obj['linker_script']['onchip_ls']['data']['address']) - if (obj['linker_script']['onchip_ls']['data']['lenght'].split()[0].split(",")[0] == "whatisleft"): - if ram_numbanks_il == 0 or (ram_numbanks_cont == 1 and ram_numbanks_il > 0): - linker_onchip_data_size_address = str('{:08X}'.format(int(ram_size_address,16) - int(linker_onchip_code_size_address,16))) - else: - linker_onchip_data_size_address = str('{:08X}'.format(int(ram_size_address,16) - int(linker_onchip_code_size_address,16) - ram_numbanks_il*32*1024)) - else: - if ram_numbanks_il == 0 or (ram_numbanks_cont == 1 and ram_numbanks_il > 0): - linker_onchip_data_size_address = string2int(obj['linker_script']['onchip_ls']['data']['lenght']) - else: - linker_onchip_data_size_address = str('{:08X}'.format(int(string2int(obj['linker_script']['onchip_ls']['data']['lenght']),16) - ram_numbanks_il*32*1024)) - - linker_onchip_il_start_address = str('{:08X}'.format(int(linker_onchip_data_start_address,16) + int(linker_onchip_data_size_address,16))) - linker_onchip_il_size_address = str('{:08X}'.format(ram_numbanks_il*32*1024)) - stack_size = string2int(obj['linker_script']['stack_size']) heap_size = string2int(obj['linker_script']['heap_size']) - linker_flash_code_start_address = str('{:08X}'.format(int(linker_onchip_code_start_address,16) + int(flash_mem_start_address,16))) - linker_flash_data_start_address = str('{:08X}'.format(int(linker_onchip_data_start_address,16) + int(flash_mem_start_address,16))) - linker_flash_il_start_address = str('{:08X}'.format(int(linker_onchip_il_start_address,16) + int(flash_mem_start_address,16))) - - if ram_numbanks_il == 0 or (ram_numbanks_cont == 1 and ram_numbanks_il > 0): - linker_flash_left_start_address = str('{:08X}'.format(int(linker_flash_data_start_address,16) + int(linker_onchip_data_size_address,16))) - linker_flash_left_size_address = str('{:08X}'.format(int(flash_mem_size_address,16) - int(linker_onchip_code_size_address,16) - int(linker_onchip_data_size_address,16))) - else: - linker_flash_left_start_address = str('{:08X}'.format(int(linker_flash_il_start_address,16) + int(linker_onchip_il_size_address,16))) - linker_flash_left_size_address = str('{:08X}'.format(int(flash_mem_size_address,16) - int(linker_onchip_code_size_address,16) - int(linker_onchip_data_size_address,16) - int(linker_onchip_il_size_address,16))) - - - if ((int(linker_onchip_data_size_address,16) + int(linker_onchip_code_size_address,16)) > int(ram_size_address,16)): - exit("The code and data section must fit in the RAM size, instead they takes " + str(linker_onchip_data_size_address + linker_onchip_code_size_address)) - - if ((int(stack_size,16) + int(heap_size,16)) > int(ram_size_address,16)): + if ((int(stack_size,16) + int(heap_size,16)) > xheep.ram_size_address()): exit("The stack and heap section must fit in the RAM size, instead they takes " + str(stack_size + heap_size)) @@ -610,7 +575,7 @@ def len_extracted_peripherals(peripherals): pad_name = key pad_num = pads[key]['num'] - pad_type = pads[key]['type'] + pad_type = pads[key]['type'].strip(',') try: pad_offset = int(pads[key]['num_offset']) @@ -621,6 +586,11 @@ def len_extracted_peripherals(peripherals): pad_active = pads[key]['active'] except KeyError: pad_active = 'high' + + try: + pad_mapping = pads[key]['mapping'].strip(',') + except KeyError: + pad_mapping = None try: pad_mux_list_hjson = pads[key]['mux'] @@ -676,13 +646,13 @@ def len_extracted_peripherals(peripherals): except KeyError: pad_skip_declaration_mux = False - p = Pad(pad_mux, '', pads[key]['mux'][pad_mux]['type'], 0, pad_active_mux, pad_driven_manually_mux, pad_skip_declaration_mux, [], pads_attributes!=None, pads_attributes_bits) + p = Pad(pad_mux, '', pads[key]['mux'][pad_mux]['type'], pad_mapping, 0, pad_active_mux, pad_driven_manually_mux, pad_skip_declaration_mux, [], pads_attributes!=None, pads_attributes_bits) pad_mux_list.append(p) if pad_num > 1: for p in range(pad_num): pad_cell_name = "pad_" + key + "_" + str(p+pad_offset) + "_i" - pad_obj = Pad(pad_name + "_" + str(p+pad_offset), pad_cell_name, pad_type, pad_index_counter, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) + pad_obj = Pad(pad_name + "_" + str(p+pad_offset), pad_cell_name, pad_type, pad_mapping, pad_index_counter, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) if not pad_keep_internal: pad_obj.create_pad_ring() pad_obj.create_core_v_mini_mcu_ctrl() @@ -701,7 +671,7 @@ def len_extracted_peripherals(peripherals): else: pad_cell_name = "pad_" + key + "_i" - pad_obj = Pad(pad_name, pad_cell_name, pad_type, pad_index_counter, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) + pad_obj = Pad(pad_name, pad_cell_name, pad_type, pad_mapping, pad_index_counter, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) if not pad_keep_internal: pad_obj.create_pad_ring() pad_obj.create_core_v_mini_mcu_ctrl() @@ -736,6 +706,11 @@ def len_extracted_peripherals(peripherals): except KeyError: pad_active = 'high' + try: + pad_mapping = external_pads[key]['mapping'] + except KeyError: + pad_mapping = None + try: pad_mux_list_hjson = external_pads[key]['mux'] except KeyError: @@ -782,13 +757,13 @@ def len_extracted_peripherals(peripherals): except KeyError: pad_skip_declaration_mux = False - p = Pad(pad_mux, '', external_pads[key]['mux'][pad_mux]['type'], 0, pad_active_mux, pad_driven_manually_mux, pad_skip_declaration_mux, [], pads_attributes!=None, pads_attributes_bits) + p = Pad(pad_mux, '', external_pads[key]['mux'][pad_mux]['type'], pad_mapping, 0, pad_active_mux, pad_driven_manually_mux, pad_skip_declaration_mux, [], pads_attributes!=None, pads_attributes_bits) pad_mux_list.append(p) if pad_num > 1: for p in range(pad_num): pad_cell_name = "pad_" + key + "_" + str(p+pad_offset) + "_i" - pad_obj = Pad(pad_name + "_" + str(p+pad_offset), pad_cell_name, pad_type, external_pad_index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) + pad_obj = Pad(pad_name + "_" + str(p+pad_offset), pad_cell_name, pad_type, pad_mapping, external_pad_index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) pad_obj.create_pad_ring() pad_obj.create_pad_ring_bonding() pad_obj.create_internal_signals() @@ -804,7 +779,7 @@ def len_extracted_peripherals(peripherals): else: pad_cell_name = "pad_" + key + "_i" - pad_obj = Pad(pad_name, pad_cell_name, pad_type, external_pad_index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) + pad_obj = Pad(pad_name, pad_cell_name, pad_type, pad_mapping, external_pad_index, pad_active, pad_driven_manually, pad_skip_declaration, pad_mux_list, pads_attributes!=None, pads_attributes_bits) pad_obj.create_pad_ring() pad_obj.create_pad_ring_bonding() pad_obj.create_internal_signals() @@ -838,15 +813,9 @@ def len_extracted_peripherals(peripherals): total_pad_list.append(last_pad) kwargs = { + "xheep" : xheep, "cpu_type" : cpu_type, - "bus_type" : bus_type, - "ram_start_address" : ram_start_address, - "ram_numbanks" : ram_numbanks, - "ram_numbanks_cont" : ram_numbanks_cont, - "ram_numbanks_il" : ram_numbanks_il, - "log_ram_numbanks_il" : log_ram_numbanks_il, "external_domains" : external_domains, - "ram_size_address" : ram_size_address, "debug_start_address" : debug_start_address, "debug_size_address" : debug_size_address, "ao_peripheral_start_address" : ao_peripheral_start_address, @@ -861,17 +830,6 @@ def len_extracted_peripherals(peripherals): "ext_slave_size_address" : ext_slave_size_address, "flash_mem_start_address" : flash_mem_start_address, "flash_mem_size_address" : flash_mem_size_address, - "linker_flash_code_start_address" : linker_flash_code_start_address, - "linker_flash_data_start_address" : linker_flash_data_start_address, - "linker_flash_il_start_address" : linker_flash_il_start_address, - "linker_flash_left_start_address" : linker_flash_left_start_address, - "linker_flash_left_size_address" : linker_flash_left_size_address, - "linker_onchip_code_start_address" : linker_onchip_code_start_address, - "linker_onchip_code_size_address" : linker_onchip_code_size_address, - "linker_onchip_data_start_address" : linker_onchip_data_start_address, - "linker_onchip_data_size_address" : linker_onchip_data_size_address, - "linker_onchip_il_start_address" : linker_onchip_il_start_address, - "linker_onchip_il_size_address" : linker_onchip_il_size_address, "stack_size" : stack_size, "heap_size" : heap_size, "plic_used_n_interrupts" : plic_used_n_interrupts, diff --git a/util/structs_gen.py b/util/structs_gen.py index f815de761..3c776b42f 100644 --- a/util/structs_gen.py +++ b/util/structs_gen.py @@ -1,4 +1,5 @@ import hjson +from math import ceil import string import argparse import sys @@ -297,7 +298,7 @@ def add_registers(peripheral_json): n_bits += count_bits(f["bits"]) # computes the number of registers needed to pack all the bit fields needed - n_multireg = int((count * n_bits) / int(peripheral_json["regwidth"])) + n_multireg = ceil((count * n_bits) / int(peripheral_json["regwidth"])) # generate the multiregisters for r in range(n_multireg): diff --git a/util/test_all.sh b/util/test_all.sh index 60d45a286..0025cb154 100755 --- a/util/test_all.sh +++ b/util/test_all.sh @@ -338,7 +338,7 @@ sed 's/is_included: "no",/is_included: "yes",/' -i mcu_cfg.hjson if [ $DEBUG -eq 0 ]; then # The MCU is generated with several memory banks to avoid example code not fitting. - make mcu-gen MEMORY_BANKS=3 EXTERNAL_DOMAINS=1 + make mcu-gen X_HEEP_CFG=configs/testall.hjson EXTERNAL_DOMAINS=1 if [ "$SIMULATOR" != "none" ]; then # Make the simualtion model diff --git a/util/x_heep_gen/__init__.py b/util/x_heep_gen/__init__.py new file mode 100644 index 000000000..c1a6b0068 --- /dev/null +++ b/util/x_heep_gen/__init__.py @@ -0,0 +1,4 @@ +from .system import XHeep +from . import system +from . import load_config +from . import ram_bank \ No newline at end of file diff --git a/util/x_heep_gen/linker_section.py b/util/x_heep_gen/linker_section.py new file mode 100644 index 000000000..1f5fe4684 --- /dev/null +++ b/util/x_heep_gen/linker_section.py @@ -0,0 +1,80 @@ +from dataclasses import * +from typing import Optional + +@dataclass +class LinkerSection(): + """ + Object representing a section in the linker configuration. + + If the end address is set to `None` it will be infered in the building process. + """ + + name: str + """ + The name of the section + + The name can be anything that does not collide with section names used by the linker, + except code and data that are used to configure the size of the code and data part. + code and data do not only contain the actual .code and .data section but other related sections. + """ + + start: int + """The start address""" + + end: Optional[int] + """The end address""" + + def __post_init__(self): + self.check() + + def check(self): + """ + Does basic type checking and sanity checking. + + - Checks the type of all members + - Checks that the name is not empty + - Checks that the start address and size are positive + + :raise TypeError: when the type of a member is not the correct ine. + :raise ValueError: when the name is empty or start or size are negative + """ + if type(self.name) is not str: + raise TypeError("name should be of type str") + if type(self.start) is not int: + raise TypeError("start should be of type int") + if type(self.end) is not int and self.end is not None: + raise TypeError("end should be of type int") + + if self.name == "": + raise ValueError("name should not be empty") + if self.start < 0: + raise ValueError("start address should be positif") + if self.end is not None and self.end <= self.start: + raise ValueError("end address should be bigger than the start address") + + @staticmethod + def by_size(name: str, start: int, size: int) -> "LinkerSection": + """ + Creates a Linker Section by it's size rather than end address. + + :param str name: the name of the section + :param int start: the start address + :param int size: the size of the section + :return: the linker section + :rtype: LinkerSection + """ + if type(name) is not str: + raise TypeError("name should be of type str") + if type(start) is not int: + raise TypeError("start should be of type int") + if type(size) is not int: + raise TypeError("size should be of type int") + + return LinkerSection(name, start, start + size) + + @property + def size(self) -> Optional[int]: + """The size in Bytes""" + if self.end is not None: + return self.end - self.start + return None \ No newline at end of file diff --git a/util/x_heep_gen/load_config.py b/util/x_heep_gen/load_config.py new file mode 100644 index 000000000..1c2da738a --- /dev/null +++ b/util/x_heep_gen/load_config.py @@ -0,0 +1,293 @@ +import importlib +from pathlib import PurePath +from typing import List, Optional, Union +import hjson + +from .linker_section import LinkerSection +from .system import BusType, Override, XHeep + +def to_int(input) -> Union[int, None]: + if type(input) is int: + return input + + if type(input) is str: + base = 10 + if len(input) >= 2: + if input[0:2].upper() == "0X": + base = 16 + input = input[2:] + elif input[0:2] == "0o": + base = 8 + input = input[2:] + elif input[0:2].upper() == "0b": + base = 2 + input = input[2:] + + return int(input, base) + return None + + +def ram_list(l: "List[int]", entry): + """ + Parses the hjson ram bank configuration in continuous mode. + + :param List[int] l: the list where bank sizes in kiB should be added + :enrtry: the entry to be parsed. It should be a list an integer or an continuous dictionary + :raise RuntimeError: when an invalid configuration is processed. + """ + if type(l) is not list: + raise TypeError("l should be of type list") + + if type(entry) is int: + l.append(entry) + return + + if type(entry) is list: + for i in entry: + ram_list(l, i) + return + + if type(entry) is hjson.OrderedDict: + num = 1 + if "num" in entry: + if type(entry["num"]) is not int: + raise RuntimeError("if the num field is present in ram configuration it should be an integer") + num = entry["num"] + + if "sizes" in entry: + for _ in range(num): + ram_list(l, entry["sizes"]) + return + else: + raise RuntimeError("dictionaries in continuous ram configuration sections should at least have a sizes entry") + + raise RuntimeError("entries in ram configuration should either be integer, lists, or dictionaries") + + + +def load_ram_configuration(system: XHeep, mem: hjson.OrderedDict): + """ + Reads the whole ram configuration. + + :param XHeep system: the system object where the ram should be added. + :param hjson.OrderedDict mem: The configuration part with the ram informations. + :raise TypeError: when arguments do not have the right type + :raise RuntimeError: when an invalid configuration is processed. + """ + if not isinstance(system, XHeep): + raise TypeError("system should be an instance of XHeep object") + if type(mem) is not hjson.OrderedDict: + raise TypeError("mem should be of type hjson.OrderedDict") + + for key, value in mem.items(): + if type(value) is not hjson.OrderedDict: + raise RuntimeError("Ram configuration entries should be dictionaries") + + section_name = "" + if "auto_section" in value and value["auto_section"] == "auto": + section_name = key + + t = "continuous" + if "type" in value: + t = value["type"] + if type(t) is not str: + raise RuntimeError("ram type should be a string") + if t != "continuous" and t != "interleaved": + raise RuntimeError(f"ram type should be continuous or interleaved not {t}") + + if t == "interleaved": + if "num" not in value or type(value["num"]) is not int: + raise RuntimeError("The num field is required for interleaved ram section and should be an integer") + + if "size" not in value or type(value["size"]) is not int: + raise RuntimeError("The size field is required for interleaved ram section and should be an integer") + + system.add_ram_banks_il(int(value["num"]), int(value["size"]), section_name) + + elif t == "continuous": + banks: List[int] = [] + ram_list(banks, value) + system.add_ram_banks(banks, section_name) + + + +def load_linker_config(system: XHeep, config: list): + """ + Reads the whole linker section configuration. + + :param XHeep system: the system object where the sections should be added. + :param hjson.OrderedDict mem: The configuration part with the section informations. + :raise TypeError: when arguments do not have the right type + :raise RuntimeError: when an invalid configuration is processed. + """ + if type(config) is not list: + raise RuntimeError("Linker Section configuraiton should be a list.") + + for l in config: + if type(l) is not hjson.OrderedDict: + raise RuntimeError("Sections should be represented as Dictionaries") + if "name" not in l: + raise RuntimeError("All sections should have names") + + if "start" not in l: + raise RuntimeError("All sections should have a start") + + name = l["name"] + start = to_int(l["start"]) + + if type(name) is not str: + raise RuntimeError("Section names should be strings") + + if name == "": + raise RuntimeError("Section names should not be empty") + + if type(start) is not int: + raise RuntimeError("The start of a section should be an integer") + + if "size" in l and "end" in l: + raise RuntimeError("Each section should only specify end or size.") + + end = 0 + if "size" in l: + size = to_int(l["size"]) + if size is None: + raise RuntimeError("Section sizes should be an integer") + if size <= 0: + raise RuntimeError("Section sizes should be strictly positive") + end = start + size + + elif "end" in l: + end = to_int(l["end"]) + if end is None: + raise RuntimeError("End address should be an integer") + if end <= start: + raise RuntimeError("Sections should end after their start") + else: + end = None + + system.add_linker_section(LinkerSection(name, start, end)) + + + + +def load_cfg_hjson(src: str, override: Optional[Override] = None) -> XHeep: + """ + Loads the configuration passed as a hjson string and creates an object representing the mcu. + + :param str src: configuration content + :param Optional[Override] override: configs to be overriden + :return: the object representing the mcu configuration + :rtype: XHeep + :raise RuntimeError: when and invalid configuration is passed or when the sanity checks failed + """ + config = hjson.loads(src, parse_int=int, object_pairs_hook=hjson.OrderedDict) + mem_config = None + bus_config = None + ram_address_config = None + linker_config = None + + for key, value in config.items(): + if key == "ram_banks": + mem_config = value + elif key == "bus_type": + bus_config = value + elif key == "ram_address": + ram_address_config = value + elif key == "linker_sections": + linker_config = value + + if mem_config is None: + raise RuntimeError("No memory configuration found") + if bus_config is None: + raise RuntimeError("No bus type configuration found") + + ram_start = 0 + if ram_address_config is not None: + if type(ram_address_config) is not int: + RuntimeError("The ram_address should be an intger") + ram_start = ram_address_config + + system = XHeep(BusType(bus_config), ram_start, override=override) + load_ram_configuration(system, mem_config) + + if linker_config is not None: + load_linker_config(system, linker_config) + + system.build() + if not system.validate(): + raise RuntimeError("Could not validate system configuration") + return system + + + +def _chk_purep(f): + """ + Helper to check the type is `PurePath` + + :param f: object to check + :raise TypeError: when object is of wrong type. + """ + if not isinstance(f, PurePath): + raise TypeError("parameter should be of type PurePath") + + + +def load_cfg_hjson_file(f: PurePath, override: Optional[Override] = None) -> XHeep: + """ + Loads the configuration passed in the path as hjson and creates an object representing the mcu. + + :param PurePath f: path of the configuration + :param Optional[Override] override: configs to be overriden + :return: the object representing the mcu configuration + :rtype: XHeep + :raise RuntimeError: when and invalid configuration is passed or when the sanity checks failed + """ + _chk_purep(f) + + with open(f, "r") as file: + return load_cfg_hjson(file.read(), override) + + + +def load_cfg_script_file(f: PurePath) -> XHeep: + """ + Executes the python file passed as argument to cinfigure the system. + + This file should have a function config that takes no parameters and returns an instance (or subclass) of the XHeep type. + The script can import modules from the util directory. + The script should not have side effects as it is called multiple time in the current makefile. + + :param PurePath f: path of the configuration + :return: the object representing the mcu configuration + :rtype: XHeep + :raise RuntimeError: when and invalid configuration is passed or when the sanity checks failed + """ + _chk_purep(f) + + spec = importlib.util.spec_from_file_location("configs._config", f) + mod = importlib.util.module_from_spec(spec) + spec.loader.exec_module(mod) + + return mod.config() + + + +def load_cfg_file(f: PurePath, override: Optional[Override] = None) -> XHeep: + """ + Load the Configuration by extension type. It currently supports .hjson and .py + + :param PurePath f: path of the configuration + :param Optional[Override] override: configs to be overriden + :return: the object representing the mcu configuration + :rtype: XHeep + :raise RuntimeError: when and invalid configuration is passed or when the sanity checks failed + """ + _chk_purep(f) + + if f.suffix == ".hjson": + return load_cfg_hjson_file(f, override) + + if f.suffix == ".py": + return load_cfg_script_file(f) + + raise RuntimeError(f"unsupported file extension {f.suffix}") \ No newline at end of file diff --git a/util/x_heep_gen/ram_bank.py b/util/x_heep_gen/ram_bank.py new file mode 100644 index 000000000..20f043ebf --- /dev/null +++ b/util/x_heep_gen/ram_bank.py @@ -0,0 +1,133 @@ +from dataclasses import * + +def is_pow2(n: int) -> bool: + """ + check if n is a power of two + + :param int n: number to be checked + :return: True if n is a power of two + :rtype: bool + """ + return n > 0 and (n & (n-1)) == 0 + + + +class Bank(): + """ + Represents a ram bank + + :param int size_k: size in kiB + :param int start_address: start address of the bank, in interleaved mode it should be the start address od the whole group + :param int map_idx: index in the global address map. Has to be unique. Interleaved mode banks should have consecutive indices. + :param int il_level: number of bits used for interleaving. + :param int il_offset: position in interleaved bank group if in any else 0. Should be consistent with map_idx. + :raise TypeError: when parameters don't have the right type. + :raise ValueError: when size_k isn't a power of two. + :raise ValueError: when start_address is not aligned on size. + :raise ValueError: when il_offset is to big for the given il_level(). + """ + def __init__(self, size_k: int, start_address: int, map_idx: int, il_level: int = 0, il_offset: int = 0): + if not type(size_k) is int: + raise TypeError("Bank size should be an int") + + if not type(start_address) is int: + raise TypeError("Start address should be an int") + + if not type(map_idx) is int: + raise TypeError("map_idx size should be an int") + + if not type(il_level) is int: + raise TypeError("il_level should be an int") + + if not type(il_offset) is int: + raise TypeError("il_offset size should be an int") + + self._size_k: int = size_k + self._start_address: int = start_address + self._map_idx: int = map_idx + self._il_level: int = il_level + self._il_offset: int = il_offset + + # check if power of 2 + if not is_pow2(self._size_k): + raise ValueError(f"Bank size {self._size_k}kiB is not a positive power of two") + + if self._il_offset >= 2**self._il_level: + raise ValueError(f"il_offset is to big for an il_level of {self._il_level}") + + mask = 0b11 + if not self._start_address & mask == 0: + raise ValueError(f"start_address is not aligned on word size") + + + #TODO: Validate start address + + self._end_address = self._start_address + self._size_k*1024 * 2**self._il_level + + def size(self) -> int: + """ + :return: the bank size in Bytes + :rtype: int + """ + return self._size_k * 1024 + + def name(self) -> str: + """ + :return: the bank name + :rtype: str + """ + return str(self._map_idx-1) #TODO: do something better + + def start_address(self) -> int: + """ + :return: the start address + :rtype: int + """ + return self._start_address + + def end_address(self) -> int: + """ + :return: the end address + :rtype: int + """ + return self._end_address + + def map_idx(self) -> int: + """ + :return: the index used in global bus + :rtype: int + """ + return self._map_idx + + def il_level(self) -> int: + """ + :return: the number of bits used to choose the bank when it is in an interleaved group else 0 + :rtype: int + """ + return self._il_level + + def il_offset(self) -> int: + """ + :return: the position of the bank in an interleaved group. + :rtype: int + """ + return self._il_offset + +@dataclass +class ILRamGroup(): + """ + Dataclass to represent information about interleaved memory banks group. + """ + + start: int + """start address of the group""" + + size: int + """size of the group""" + + n: int + """number of banks in the group""" + + first_name: str + """name of the first bank""" + \ No newline at end of file diff --git a/util/x_heep_gen/system.py b/util/x_heep_gen/system.py new file mode 100644 index 000000000..09e4a855e --- /dev/null +++ b/util/x_heep_gen/system.py @@ -0,0 +1,430 @@ +from copy import deepcopy +from dataclasses import dataclass +from typing import Generator, Iterable, List, Optional, Set, Union +from enum import Enum +from .ram_bank import Bank, is_pow2, ILRamGroup +from .linker_section import LinkerSection + +class BusType(Enum): + """Enumeration of all supported bus types""" + + onetoM = 'onetoM' + NtoM = 'NtoM' + + +@dataclass +class Override(): + """ + Bundles information that can be overriden in the XHeep class. + """ + bus_type: Optional[BusType] + numbanks: Optional[int] + numbanks_il: Optional[int] + + + +class XHeep(): + """ + This object represents the whole mcu. + + An instance of this object is also passed to the mako templates. + + :param BusType bus_type: The bus type chosen for this mcu. + :param int ram_start_address: The address of the first ram bank. For now only 0 is tested. Defaults to 0. + :param Optional[Override] override: configs to be overriden + :raise TypeError: when parameters are of incorrect type. + """ + + IL_COMPATIBLE_BUS_TYPES: "Set[BusType]" = set([BusType.NtoM]) + """Constant set of bus types that support interleaved memory banks""" + + + + def __init__(self, bus_type: BusType, ram_start_address: int = 0, override: Optional[Override] = None): + if not type(bus_type) is BusType: + raise TypeError(f"XHeep.bus_type should be of type BusType not {type(self._bus_type)}") + if not type(ram_start_address) is int: + raise TypeError("ram_start_address should be of type int") + + if ram_start_address != 0: + raise ValueError(f"ram start address must be 0 instead of {ram_start_address}") + + self._bus_type: BusType = bus_type + if override is not None and override.bus_type is not None: + self._bus_type = override.bus_type + + + self._ram_start_address: int = ram_start_address + self._ram_banks: List[Bank] = [] + self._ram_banks_il_idx: List[int] = [] + self._ram_banks_il_groups: List[ILRamGroup] = [] + self._il_banks_present: bool = False + self._ram_next_idx: int = 1 + self._ram_next_addr: int = self._ram_start_address + self._linker_sections: List[LinkerSection] = [] + self._used_section_names: Set[str] = set() + + self._ignore_ram_continous: bool = False + self._ignore_ram_interleaved: bool = False + + if override is not None and override.numbanks is not None: + self.add_ram_banks([32]*override.numbanks) + self._ignore_ram_continous = True + if override is not None and override.numbanks_il is not None: + self._ignore_ram_interleaved = True + self._override_numbanks_il = override.numbanks_il + + + def add_ram_banks(self, bank_sizes: "List[int]", section_name: str = ""): + """ + Add ram banks in continuous address mode to the system. + The bank size should be a power of two and at least 1kiB. + + :param List[int] bank_sizes: list of bank sizes in kiB that should be added to the system + :param str section_name: If not empty adds automatically a linker section for this banks. The names must be unique and not be used by the linker for other purposes. + :raise TypeError: when arguments are of wrong type + :raise ValueError: when banks have an incorrect size. + :raise ValueError: if the name was allready used for another section or the first and second are not code and data. + :raise ValueError: if bank_sizes list is empty + """ + + if self._ignore_ram_continous: + return + + if not type(bank_sizes) == list: + raise TypeError("bank_sizes should be of type list") + if not type(section_name) == str: + raise TypeError("section_name should be of type str") + if len(bank_sizes) == 0: + raise ValueError("bank_sizes is empty") + + banks: List[Bank] = [] + for b in bank_sizes: + banks.append(Bank(b, self._ram_next_addr, self._ram_next_idx, 0, 0)) + self._ram_next_addr = banks[-1]._end_address + self._ram_next_idx += 1 + + if section_name != "": + self.add_linker_section_for_banks(banks, section_name) + # Add all new banks if no error was raised + self._ram_banks += banks + + + + + def add_ram_banks_il(self, num: int, bank_size: int, section_name: str = "", ignore_ignore: bool = False): + """ + Add ram banks in interleaved mode to the system. + The bank size should be a power of two and at least 1kiB, + the number of banks should also be a power of two. + + :param int num: number of banks to add + :param int bank_size: size of the banks in kiB + :param str section_name: If not empty adds automatically a linker section for this banks. The names must be unique and not be used by the linker for other purposes. + :param bool ignore_ignore: Ignores the fact that an override was set. For internal uses to apply this override. + :raise TypeError: when arguments are of wrong type + :raise ValueError: when banks have an incorrect size or their number is not a power of two. + :raise ValueError: if the name was allready used for another section or the first and second are not code and data. + """ + if self._ignore_ram_interleaved and not ignore_ignore: + return + + if not self._bus_type in self.IL_COMPATIBLE_BUS_TYPES: + raise RuntimeError(f"This system has a {self._bus_type} bus, one of {self.IL_COMPATIBLE_BUS_TYPES} is required for interleaved memory") + if not type(num) == int: + raise TypeError("num should be of type int") + if not is_pow2(num): + raise ValueError(f"A power of two is required for the number of banks, got {num}") + if not type(section_name) == str: + raise TypeError("section_name should be of type str") + + first_il = self.ram_numbanks() + + banks: List[Bank] = [] + for i in range(num): + banks.append(Bank(bank_size, self._ram_next_addr, self._ram_next_idx, num.bit_length()-1, i)) + self._ram_next_idx += 1 + + self._ram_next_addr = banks[-1]._end_address + + if section_name != "": + self.add_linker_section_for_banks(banks, section_name) + # Add all new banks if no error was raised + self._ram_banks += banks + + indices = range(first_il, first_il + num) + self._ram_banks_il_idx += indices + self._ram_banks_il_groups.append(ILRamGroup(banks[0].start_address(), bank_size*num*1024, len(banks), banks[0].name())) + self._il_banks_present = True + + + + def add_linker_section_for_banks(self, banks: "List[Bank]", name: str): + """ + Function to add linker sections coupled to some banks. + :param List[Bank] banks: list of banks that compose the section, assumed to be continuous in memory + :param str name: the name of the section. + :raise ValueError: if the name was allready used for another section or the first and second are not code and data. + """ + if name in self._used_section_names: + raise ValueError("linker section names should be unique") + + self._used_section_names.add(name) + self._linker_sections.append(LinkerSection(name, banks[0].start_address(), banks[-1].end_address())) + + def add_linker_section(self, section: LinkerSection): + """ + Function to add a linker section. + :param LinkerSection section: Linker section to add. + :param str name: the name of the section. + :raise ValueError: if the name was allready used for another section or the first and second are not code and data. + """ + + if not isinstance(section, LinkerSection): + raise TypeError("section should be an instance of LinkerSection") + + section.check() + + if section.name in self._used_section_names: + raise ValueError("linker section names should be unique") + + self._used_section_names.add(section.name) + self._linker_sections.append(deepcopy(section)) + + + + def bus_type(self) -> BusType: + """ + :return: the configured bus type + :rtype: BusType + """ + return self._bus_type + + def ram_start_address(self) -> int: + """ + :return: the address of the first ram bank. + :rtype: int + """ + return self._ram_start_address + + def ram_numbanks(self) -> int: + """ + :return: the number of banks. + :rtype: int + """ + return len(self._ram_banks) + + + + def ram_numbanks_il(self) -> int: + """ + :return: the number of interleaved banks. + :rtype: int + """ + return len(self._ram_banks_il_idx) + + + + def ram_numbanks_cont(self) -> int: + """ + :return: the number of continuous banks. + :rtype: int + """ + return self.ram_numbanks() - self.ram_numbanks_il() + + + + def validate(self) -> bool: + """ + Does some basics checks on the configuration + + This should be called before using the XHeep object to generate the project. + + :return: the validity of the configuration + :rtype: bool + """ + if not self.ram_numbanks() in range(2, 17): + print(f"The number of banks should be between 2 and 16 instead of {self.ram_numbanks()}") #TODO: clarify upper limit + return False + + if not ("code" in self._used_section_names and "data" in self._used_section_names): + print("The code and data sections are needed") + return False + + for l in self._linker_sections: + l.check() + + ret = True + old_sec: Union[LinkerSection,None] = None + + for i, sec in enumerate(self._linker_sections): + if i == 0 and sec.name != "code": + print("The first linker section sould be called code.") + ret = False + elif i == 1 and sec.name != "data": + print("The second linker section sould be called data.") + ret = False + + if old_sec is not None: + if sec.start < old_sec.end: + print(f"Section {sec.name} and {old_sec.name} overlap.") + + start = sec.start + found_start = False + found_end = False + for b in self._ram_banks: + if found_start: + if b.start_address() > start: + print(f"Section {sec.name} has a memory hole starting at {start:#08X}") + ret = False + found_end = True + break + else: + start = b.end_address() + + if sec.start >= b.start_address() and sec.start < b.end_address(): + found_start = True + start = b.end_address() + + if sec.end <= b.end_address() and sec.end > b.start_address(): + found_end = True + break + + if not found_start: + print(f"Section {sec.name} does not start in any ram bank.") + ret = False + + if not found_end: + ret = False + print(f"Section {sec.name} does not end in any ram bank.") + + old_sec = sec + + return ret + + + + def ram_size_address(self) -> int: + """ + :return: the size of the addressable ram memory. + :rtype: int + """ + size = 0 + for bank in self._ram_banks: + size += bank.size() + return size + + + + def ram_il_size(self) -> int: + """ + :return: the memory size of the interleaved sizes. + :rtype: int + """ + size = 0 + for i in self._ram_banks_il_idx: + size += self._ram_banks[i].size() + return size + + + + def iter_ram_banks(self) -> Iterable[Bank]: + """ + :return: an iterator over all banks. + :rtype: Iterable[Bank] + """ + return iter(self._ram_banks) + + + + def iter_cont_ram_banks(self) -> Iterable[Bank]: + """ + :return: an iterator over all continuous banks. + :rtype: Iterable[Bank] + """ + m = map((lambda b: None if b[0] in self._ram_banks_il_idx else b[1]), enumerate(self._ram_banks)) + return filter(None, m) + + + + def iter_il_ram_banks(self) -> Iterable[Bank]: + """ + :return: an iterator over all interleaved banks. + :rtype: Iterable[Bank] + """ + m = map((lambda b: None if not b[0] in self._ram_banks_il_idx else b[1]), enumerate(self._ram_banks)) + return filter(None, m) + + + + def has_il_ram(self) -> bool: + """ + :return: `True` if the system has interleaved ram. + :rtype: bool + """ + return self._il_banks_present + + + + def iter_il_groups(self) -> Iterable[ILRamGroup]: + """ + :return: an iterator over the interleaved ram bank groups. + :rtype: Iterable[ILRamGroup] + """ + return iter(self._ram_banks_il_groups) + + + + def iter_linker_sections(self) -> Iterable[LinkerSection]: + """ + :return: an iterator over the linker sections + :rtype: Iterable[LinkerSection] + """ + return iter(self._linker_sections) + + + + def iter_bank_numwords(self) -> Generator[int, None, None]: + """ + Iterates over the size of the ram banks in number of words. + + :return: Generator over the sizes + :rtype: Generator[int, None, None] + """ + sizes = set() + for b in self._ram_banks: + if b.size() not in sizes: + sizes.add(b.size()) + yield b.size() // 4 + + def build(self): + """ + Makes the system ready to be used. + + - Aplies the overrides for the interleaved memory as the normal memory needs to be configured first. + - Sorts the linker sections by starting address. + - Inferes the missing linker section ends with the start of the next section if present. If not it uses the end of the last memory bank. + """ + if self._ignore_ram_interleaved: + sec_name = "" + if self.ram_numbanks() > 1: + sec_name = "data_interleaved" + self.add_ram_banks_il(self._override_numbanks_il, 32, sec_name, ignore_ignore=True) #Add automatically a section for compatibility purposes. + + + self._linker_sections.sort(key=lambda l: l.start) + + old_sec: Optional[LinkerSection] = None + for sec in self._linker_sections: + if old_sec is not None: + old_sec.end = sec.start + + if sec.end is None: + old_sec = sec + else: + old_sec = None + if old_sec is not None: + if len(self._ram_banks) == 0: + raise RuntimeError("There is no ram bank to infere the end of a section") + old_sec.end = self._ram_banks[-1].end_address() + \ No newline at end of file diff --git a/x-heep-tb-utils.core b/x-heep-tb-utils.core index 82055fb6e..98252573e 100644 --- a/x-heep-tb-utils.core +++ b/x-heep-tb-utils.core @@ -71,6 +71,11 @@ filesets: - tb/tb_top.cpp file_type: cppSource + tb-sc-verilator: + files: + - tb/tb_sc_top.cpp + file_type: cppSource + targets: default: &default_target filesets: