diff --git a/CHANGELOG.md b/CHANGELOG.md index 96eb46d90..bf271fc48 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -29,6 +29,7 @@ mimpid = 0x01040312 -> Version 01.04.03.12 -> v1.4.3.12 | Date | Version | Comment | Ticket | |:----:|:-------:|:--------|:------:| +| 10.01.2025 | 1.10.9.2 | clean-up SMP dual-core configuration (HW and SW optimizations) | [#1146](https://github.com/stnolting/neorv32/pull/1146) | | 09.01.2025 | 1.10.9.1 | fix side-effects of CSR read instructions | [#1145](https://github.com/stnolting/neorv32/pull/1145) | | 08.01.2025 | [**:rocket:1.10.9**](https://github.com/stnolting/neorv32/releases/tag/v1.10.9) | **New release** | | | 07.01.2025 | 1.10.8.9 | rtl edits and cleanups; add dedicated "core complex" wrapper (CPU + L1 caches + bus switch) | [#1144](https://github.com/stnolting/neorv32/pull/1144) | diff --git a/docs/datasheet/cpu.adoc b/docs/datasheet/cpu.adoc index bc07f928c..6a93c501b 100644 --- a/docs/datasheet/cpu.adoc +++ b/docs/datasheet/cpu.adoc @@ -123,6 +123,7 @@ The generic type "suv(x:y)" represents a `std_ulogic_vector(x downto y)`. | `BOOT_ADDR` | suv(31:0) | CPU reset address. See section <<_address_space>>. | `DEBUG_PARK_ADDR` | suv(31:0) | "Park loop" entry address for the <<_on_chip_debugger_ocd>>, has to be 4-byte aligned. | `DEBUG_EXC_ADDR` | suv(31:0) | "Exception" entry address for the <<_on_chip_debugger_ocd>>, has to be 4-byte aligned. +| `ICC_EN` | boolean | Implement <<_inter_core_communication_icc>> module. Automatically enabled for the SMP <<_dual_core_configuration>>. | `RISCV_ISA_Sdext` | boolean | Implement RISC-V-compatible "debug" CPU operation mode required for the <<_on_chip_debugger_ocd>>. | `RISCV_ISA_Sdtrig` | boolean | Implement RISC-V-compatible trigger module. See section <<_on_chip_debugger_ocd>>. | `RISCV_ISA_Smpmp` | boolean | Implement RISC-V-compatible physical memory protection (PMP). See section <<_smpmp_isa_extension>>. diff --git a/docs/datasheet/cpu_csr.adoc b/docs/datasheet/cpu_csr.adoc index 48f137b4e..ffaa06188 100644 --- a/docs/datasheet/cpu_csr.adoc +++ b/docs/datasheet/cpu_csr.adoc @@ -77,9 +77,8 @@ to check if the targeted bits can actually be modified. | 0xf14 | <<_mhartid>> | `CSR_MHARTID` | MRO | Machine hardware thread ID | 0xf15 | <<_mconfigptr>> | `CSR_MCONFIGPTR` | MRO | Machine configuration pointer register 5+^| **<<_neorv32_specific_csrs>>** -| 0xbc0 | <<_mxiccrxd>> | `CSR_MXICCRXD` | MRW | ICC RX data -| 0xbc1 | <<_mxicctxd>> | `CSR_MXICCTXD` | MRW | ICC TX data -| 0xbc2 .. 0xbc3 | <<_mxiccsr, `mxiccsr0`>> .. <<_mxiccsr, `mxiccsr0`>> | `CSR_MXICCSR0` .. `CSR_MXICCSR3` | MRW | ICC control and status +| 0xbc0 | <<_mxiccsreg>> | `CSR_MXICCSREG` | MRW | Inter-core communication status register +| 0xbc1 | <<_mxiccdata>> | `CSR_MXICCDATA` | MRW | Inter-core communication data register | 0x800 .. 0x803 | <<_cfureg, `cfureg0`>> .. <<_cfureg, `cfureg3`>> | `CSR_CFUCREG0` .. `CSR_CFUCREG3` | URW | Custom CFU registers 0 to 3 | 0xfc0 | <<_mxisa>> | `CSR_MXISA` | MRO | Extended machine CPU ISA and extensions |======================= @@ -931,66 +930,49 @@ custom/implementation-specific use (assured by the RISC-V privileged specificati | Description | User-defined CSRs to be used within the <<_custom_functions_unit_cfu>>. |======================= - {empty} + [discrete] -===== **`mxiccrxd`** +===== **`mxiccsreg`** [cols="<1,<8"] [frame="topbot",grid="none"] |======================= -| Name | <<_inter_core_communication_icc>> RX data +| Name | <<_inter_core_communication_icc>> status register | Address | `0xbc0` -| Reset value | `0x00000000` +| Reset value | `0x40000000` | ISA | `Zicsr` & `X` -| Description | RX data from selected link. Buffered by a 4-entries-deep and 32-bit wide FIFO. -This CSR is hardwired to all-zero if there is just a single CPU core in the system. +| Description | Shows the status of the core's inter-core communication link (message queue / FIFO status flags). +The entire CSR is read-only. However, write accesses are ignored. +This CSR is hardwired to all-zero if the <<_dual_core_configuration>> is disabled. |======================= - -{empty} + -[discrete] -===== **`mxicctxd`** - -[cols="<1,<8"] -[frame="topbot",grid="none"] +.`mxiccsreg` CSR Bits +[cols="^1,^2,^1,<5"] +[options="header",grid="rows"] |======================= -| Name | <<_inter_core_communication_icc>> TX data -| Address | `0xbc1` -| Reset value | `0x00000000` -| ISA | `Zicsr` & `X` -| Description | TX data for selected link. Buffered by a 4-entries-deep and 32-bit wide FIFO. -This CSR is hardwired to all-zero if there is just a single CPU core in the system. +| Bit | Name [C] | R/W | Description +| 0 | `CSR_MXICCSREG_RX_AVAIL` | r/- | Set if RX data from the other core is available. +| 1 | `CSR_MXICCSREG_TX_FREE` | r/- | Set if there is free space for TX data for the other core. +| 31:2 | - | r/- | Reserved; hardwired to zero. |======================= + {empty} + [discrete] -===== **`mxiccsr`** +===== **`mxiccdata`** [cols="<1,<8"] [frame="topbot",grid="none"] |======================= -| Name | <<_inter_core_communication_icc>> control and status -| Address | `0xbc2` (`mxiccsr0`) -| | `0xbc3` (`mxiccsr1`) -| Reset value | `0x40000000` +| Name | <<_inter_core_communication_icc>> data register +| Address | `0xbc1` +| Reset value | `0x00000000` | ISA | `Zicsr` & `X` -| Description | Link selection and status. Note that `mxiccsr1` is just a mirrored copy of `mxiccsr0`. -This CSR is hardwired to all-zero if there is just a single CPU core in the system. -|======================= - -.`mxiccsr` CSR Bits -[cols="^1,^2,^1,<5"] -[options="header",grid="rows"] -|======================= -| Bit | Name [C] | R/W | Description -| 1:0 | `CSR_MXICCSR_LINK_MSB : CSR_MXICCSR_LINK_LSB` | r/w | Link select. The value in this memory corresponds -to the ID of the core to which a connection is to be established via a link. The ICC data registers <<_mxiccrxd>> -and <<_mxicctxd>> will only access the queue FIFOs of the selected link. Note that only bit 0 is writable. Bit 1 -is hardwaired to zero. -| 29:2 | - | r/- | Reserved; hardwired to zero. -| 30 | `CSR_MXICCSR_TX_FREE` | r/- | Set if there is free space for TX data for the selected link. -| 31 | `CSR_MXICCSR_RX_AVAIL` | r/- | Set if RX data from the selected link is available. +| Description | This CSR provides access to the inter-core communication message queues that are implemented +as simple FIFOs. Writing to this register will put data into the message queue so it can be read by the other +core. Reading from this register will return data received from the other core (i.e. this CSR has side effects +when reading). A read access will return all-zero of no RX data is available from the other core. +This CSR is hardwired to all-zero if the <<_dual_core_configuration>> is disabled. |======================= diff --git a/docs/datasheet/cpu_dual_core.adoc b/docs/datasheet/cpu_dual_core.adoc index a70e0ed0f..fec414fc5 100644 --- a/docs/datasheet/cpu_dual_core.adoc +++ b/docs/datasheet/cpu_dual_core.adoc @@ -1,9 +1,9 @@ :sectnums: === Dual-Core Configuration -.Dual-Core Example +.Dual-Core Example Programs [TIP] -A simple dual-core example program can be found in `sw/example/demo_dual_core`. +A set of rather simple dual-core example programs can be found in `sw/example/demo_dual_core*`. Optionally, the CPU core can be implemented as **symmetric multiprocessing (SMP) dual-core** system. This dual-core configuration is enabled by the `DUAL_CORE_EN` <<_processor_top_entity_generics, top generic>>. @@ -24,24 +24,30 @@ The following table summarizes the most important aspects when using the dual-co [cols="<2,<10"] [grid="rows"] |======================= +| **CPU configuration** | Both cores use the same cache, CPU and ISA configuration provided by the according top generics. | **Debugging** | A special SMP openOCD script (`sw/openocd/openocd_neorv32.dual_core.cfg`) is required to -debug both cores at one. SMP-debugging is fully supported by RISC-V gdb port. +debug both cores at one. SMP-debugging is fully supported by the RISC-V gdb port. | **Clock and reset** | Both cores use the same global processor clock and reset. If <<_cpu_clock_gating>> -is enabled the clock of each core can be individually halted by putting it into <<_sleep_mode>>. -| **Address space** | Both cores have access to the same <<_address_space>>. +is enabled, the clock of each core can be individually halted by putting the core into <<_sleep_mode>>. +| **Address space** | Both cores have full access to the same physical <<_address_space>>. | **Interrupts** | All <<_processor_interrupts>> are routed to both cores. Hence, each core has access to all <<_neorv32_specific_fast_interrupt_requests>> (FIRQs). Additionally, the RISC-V machine-level _external interrupt_ (via the top `mext_irq_i` port) is also send to both cores. In contrast, the RISC-V machine level -_software_ and _timer_ interrupts are exclusive for each core (provided by the <<_core_local_interruptor_clint>>). -| **RTE** | The <<_neorv32_runtime_environment>> also supports the dual-core configuration. However, it needs -to be explicitly initialized on each core individually. The RTE trap handling provides a individual handler -tables for each core. +_software_ and _timer_ interrupts are core-exclusive (provided by the <<_core_local_interruptor_clint>>). +| **RTE** | The <<_neorv32_runtime_environment>> fully supports the dual-core configuration and provides +core-individual trap handler tables. However, the RTE needs to be explicitly initialized on each core +(executing `neorv32_rte_setup()`). | **Memory** | Each core has its own stack. The top of stack of core 0 is defined by the <<_linker_script>> while the top of stack of core 1 has to be explicitly defined by core 0 (see <<_dual_core_boot>>). Both -cores share the same heap, `.data` and `.bss` sections. -| **Constructors and destructors** | Constructors and destructors are executed on core 0 only. -(see ) -| **Core communication** | See section <<_inter_core_communication_icc>>. +cores share the same heap, `.data` and `.bss` sections. Hence, only core 0 setups the `.data` and `.bss` +sections at boot-up. +| **Constructors and destructors** | Constructors and destructors are executed by core 0 only +(see section <<_c_standard_library>>). +| **Cache coherency** | Be aware that there is no cache snooping available. If any level-1 cache is enabled +(<<_processor_internal_instruction_cache_icache>> and/or <<_processor_internal_data_cache_dcache>>) care +must be taken to prevent access to outdated data - either by using cache synchronization (`fence[.]` +instructions) or by using <<_atomic_memory_access>>. +| **Inter-core communication** | See section <<_inter_core_communication_icc>>. | **Bootloader** | Only core 0 will boot and execute the bootloader while core 1 is held in standby. | **Booting** | See section <<_dual_core_boot>>. |======================= @@ -55,22 +61,18 @@ shared-memory communication. Additionally, communication using these links is gu The inter-core communication (ICC) module is implemented as dedicated hardware module within each CPU core (VHDL file `rtl/core/neorv32_cpu_icc.vhd`). This module is automatically included if the dual-core option -is enabled. Each core provides a 32-bit wide and 4 entries deep FIFO for sending data to the other core. +is enabled. Each core provides a **32-bit wide** and **4 entries deep** FIFO for sending data to the other core. Hence, there are two FIFOs: one for sending data from core 0 to core 1 and another one for sending data the opposite way. -The ICC communication links are accessed via NEORV32-specific CSRs. Hence, those FIFOs are accessible only -by the CPU core itself and cannot be accessed by the DMA or any other CPU core. In total, three CSRs are -provided to handle communications: +The ICC communication links are accessed via two NEORV32-specific CSRs. Hence, those FIFOs are accessible only +by the CPU core itself and cannot be accessed by the DMA or any other CPU core. -The <<_mxiccsr>> is used to select the core with which to communicate. In the dual-core configuration core 1 -can only select core 0 and vice versa. The core selection in this register allows access to the according -message FIFOs via the two other CSRs. Additionally, the CSR provides status flags (TX FIFO data available; -RX FIFO free space) related to the selected communication link. - -The <<_mxiccrxd>> and <<_mxicctxd>> CSRs are used for the actual data read and write operations. Writing data -to <<<<_mxicctxd>>> will send to the message queue of the core selected by <<_mxiccsr>>. Conversely, reading -data from <<_mxiccrxd>> will return data received from the core selected by <<_mxiccsr>>. +The <<_mxiccsreg>> provides read-only status information about the core's ICC links: bit 0 becomes set if +there is RX data available for _this_ core (send from the the other core). Bit 1 is set as long there is +free space in _this_ core's TX data FIFO. The <<_mxiccdata>> CSR is used for actual data send/receive operations. +Writing this register will put the according data word into the TX link FIFO of _this_ core. Reading this CSR +will return a data word from the RX FIFO of _this_ core. The ICC FIFOs do not provide any interrupt capabilities. Software is expected to use the machine-software interrupt of the receiving core (provided by the <<_core_local_interruptor_clint>>) to inform it about @@ -94,11 +96,20 @@ To boot-up core 1, the primary core has to use a special library function provid .CPU Core 1 launch function prototype (note that this function can only be executed on core 0) [source,c] ---- -int neorv32_smp_launch(int hart_id, void (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes); +int neorv32_smp_launch(int (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes); ---- -When executed, core 0 use the <<_inter_core_communication_icc>> to send launch data that includes the entry point +When executed, core 0 uses the <<_inter_core_communication_icc>> to send launch data that includes the entry point for core 1 (via `entry_point`) and the actual stack configuration (via `stack_memory` and `stack_size_bytes`). +Note that the main function for core 1 has to use a specific type (return `int`, no arguments): + +.CPU Core 1 Main Function +[source,c] +---- +int core1_main(void) { + return 0; // return to crt0 and go to sleep mode +} +---- .Core 1 Stack Memory [NOTE] diff --git a/docs/datasheet/overview.adoc b/docs/datasheet/overview.adoc index 4aefe9113..c435c248a 100644 --- a/docs/datasheet/overview.adoc +++ b/docs/datasheet/overview.adoc @@ -190,7 +190,6 @@ rtl/core ├-neorv32_clint.vhd - Core local interruptor ├-neorv32_clockgate.vhd - Generic clock gating switch ├-neorv32_cfs.vhd - Custom functions subsystem -├-neorv32_core_complex.vhd - NEORV32 CORE COMPLEX TOP ENTITY ├-neorv32_cpu.vhd - NEORV32 CPU TOP ENTITY ├-neorv32_cpu_alu.vhd - Arithmetic/logic unit ├-neorv32_cpu_control.vhd - CPU control, exception system and CSRs diff --git a/docs/datasheet/software.adoc b/docs/datasheet/software.adoc index 944084796..61e643439 100644 --- a/docs/datasheet/software.adoc +++ b/docs/datasheet/software.adoc @@ -380,7 +380,8 @@ Note that `\n` (newline) is automatically converted to `\r\n` (carriage-return a .Constructors and Destructors [NOTE] Constructors and destructors for plain C code or for C++ applications are supported by the software framework. -See `sw/example/hello_cpp` for a minimal example. +See `sw/example/hello_cpp` for a minimal example. Note that constructor and destructors are only executed +by core 0 (primary core) in the SMP <<_dual_core_configuration>>. .Newlib Test/Demo Program [TIP] diff --git a/rtl/core/neorv32_application_image.vhd b/rtl/core/neorv32_application_image.vhd index eb13dff5d..f2162b293 100644 --- a/rtl/core/neorv32_application_image.vhd +++ b/rtl/core/neorv32_application_image.vhd @@ -1,7 +1,7 @@ -- The NEORV32 RISC-V Processor - github.com/stnolting/neorv32 -- Auto-generated memory initialization image (for internal IMEM) -- Source: demo_blink_led/build/main.bin --- Built: 07.01.2025 21:36:11 +-- Built: 10.01.2025 10:25:11 library ieee; use ieee.std_logic_1164.all; @@ -11,7 +11,7 @@ use neorv32.neorv32_package.all; package neorv32_application_image is -constant application_init_size_c : natural := 1228; -- bytes +constant application_init_size_c : natural := 1216; -- bytes constant application_init_image_c : mem32_t := ( x"f14020f3", x"80002217", @@ -23,11 +23,11 @@ x"000022b7", x"80028293", x"30029073", x"00000317", -x"19430313", +x"18830313", x"30531073", x"30401073", x"00000397", -x"49838393", +x"48c38393", x"80000417", x"fc440413", x"80000497", @@ -37,7 +37,7 @@ x"fb450513", x"80000597", x"fac58593", x"00000617", -x"19c60613", +x"19060613", x"00000693", x"00000713", x"00000793", @@ -57,26 +57,23 @@ x"00000e13", x"00000e93", x"00000f13", x"00000f93", -x"04008a63", +x"04008463", x"00000797", x"01878793", x"30579073", x"30446073", x"30046073", -x"0e80006f", +x"0dc0006f", x"fff40737", -x"00209793", -x"00f70733", -x"00072023", -x"bc201073", -x"bc0026f3", +x"00072223", +x"bc1026f3", x"ffab4737", x"32170713", x"00d70463", x"30200073", +x"bc102173", +x"bc102673", x"bc171073", -x"bc002173", -x"bc002673", x"0540006f", x"00838e63", x"00945c63", diff --git a/rtl/core/neorv32_bootloader_image.vhd b/rtl/core/neorv32_bootloader_image.vhd index bd18f9856..d2858cfaa 100644 --- a/rtl/core/neorv32_bootloader_image.vhd +++ b/rtl/core/neorv32_bootloader_image.vhd @@ -1,7 +1,7 @@ -- The NEORV32 RISC-V Processor - github.com/stnolting/neorv32 -- Auto-generated memory initialization image (for internal BOOTROM) -- Source: bootloader/build/main.bin --- Built: 07.01.2025 21:35:42 +-- Built: 10.01.2025 10:24:53 library ieee; use ieee.std_logic_1164.all; @@ -11,7 +11,7 @@ use neorv32.neorv32_package.all; package neorv32_bootloader_image is -constant bootloader_init_size_c : natural := 4044; -- bytes +constant bootloader_init_size_c : natural := 4032; -- bytes constant bootloader_init_image_c : mem32_t := ( x"f14020f3", x"80200217", @@ -23,11 +23,11 @@ x"000022b7", x"80028293", x"30029073", x"00000317", -x"10430313", +x"0f830313", x"30531073", x"30401073", x"00001397", -x"f9838393", +x"f8c38393", x"80200417", x"fc440413", x"80200497", @@ -37,30 +37,27 @@ x"fb450513", x"80200597", x"fb458593", x"00000617", -x"10c60613", +x"10060613", x"00000693", x"00000713", x"00000793", -x"04008a63", +x"04008463", x"00000797", x"01878793", x"30579073", x"30446073", x"30046073", -x"0980006f", +x"08c0006f", x"fff40737", -x"00209793", -x"00f70733", -x"00072023", -x"bc201073", -x"bc0026f3", +x"00072223", +x"bc1026f3", x"ffab4737", x"32170713", x"00d70463", x"30200073", +x"bc102173", +x"bc102673", x"bc171073", -x"bc002173", -x"bc002673", x"0300006f", x"00838e63", x"00945c63", @@ -112,7 +109,7 @@ x"ffe017b7", x"00112823", x"00812623", x"00912423", -x"a4078793", +x"a3478793", x"30579073", x"fffe07b7", x"0087a783", @@ -196,54 +193,54 @@ x"30479073", x"00800793", x"3007a073", x"ffe01537", -x"dbc50513", +x"db050513", x"6b4000ef", x"f1302573", x"648000ef", x"ffe01537", -x"df450513", +x"de850513", x"6a0000ef", x"fffe0437", x"00042503", x"630000ef", x"ffe01537", -x"dfc50513", +x"df050513", x"688000ef", x"30102573", x"61c000ef", x"ffe01537", -x"e0450513", +x"df850513", x"674000ef", x"fc002573", x"608000ef", x"ffe01537", -x"e0c50513", +x"e0050513", x"660000ef", x"00842503", x"00100493", x"5f0000ef", x"ffe01537", -x"e1450513", +x"e0850513", x"648000ef", x"00444503", x"00a49533", x"ffc57513", x"5d4000ef", x"ffe01537", -x"e1c50513", +x"e1050513", x"62c000ef", x"00544783", x"00f49533", x"ffc57513", x"5b8000ef", x"ffe014b7", -x"db848513", +x"dac48513", x"610000ef", x"00842783", x"00f79713", x"06075063", x"ffe01537", -x"e2450513", +x"e1850513", x"5f8000ef", x"2e0000ef", x"00042703", @@ -263,13 +260,13 @@ x"00f69613", x"0a065463", x"ffe01537", x"00472783", -x"e5050513", +x"e4450513", x"5a8000ef", x"ffe017b7", -x"e5c78513", +x"e5078513", x"59c000ef", x"ffe01537", -x"edc50513", +x"ed050513", x"590000ef", x"fff507b7", x"0007a703", @@ -279,14 +276,14 @@ x"0047a403", x"0ff47413", x"00040513", x"4f4000ef", -x"db848513", +x"dac48513", x"568000ef", x"f9b40413", x"0ff47413", x"01300793", x"2287e863", x"ffe017b7", -x"f5878793", +x"f4c78793", x"00241413", x"00f40433", x"00042783", @@ -310,7 +307,7 @@ x"00b41463", x"f2f564e3", x"00100513", x"6f8000ef", -x"db848513", +x"dac48513", x"4ec000ef", x"00000513", x"031000ef", @@ -323,20 +320,20 @@ x"800007b7", x"0047a403", x"00041863", x"ffe01537", -x"ee450513", +x"ed850513", x"f1dff06f", x"ffe01537", -x"f0050513", +x"ef450513", x"4ac000ef", x"00040513", x"440000ef", x"ffe01537", -x"f0850513", +x"efc50513", x"498000ef", x"00400537", x"42c000ef", x"ffe01537", -x"f2050513", +x"f1450513", x"484000ef", x"fff507b7", x"0007a703", @@ -354,7 +351,7 @@ x"00050663", x"00300513", x"498000ef", x"ffe01537", -x"f2c50513", +x"f2050513", x"43c000ef", x"01045793", x"00178793", @@ -392,7 +389,7 @@ x"00850513", x"40e005b3", x"2a8000ef", x"ffe01537", -x"da050513", +x"d9450513", x"e09ff06f", x"00f12223", x"1ec000ef", @@ -418,14 +415,14 @@ x"800007b7", x"0047a783", x"e60790e3", x"ffe01537", -x"f3c50513", +x"f3050513", x"da1ff06f", x"fffe07b7", x"0087a783", x"2007f793", x"00079863", x"ffe01537", -x"f4c50513", +x"f4050513", x"d85ff06f", x"00100513", x"e35ff06f", @@ -614,7 +611,7 @@ x"01c00493", x"00945733", x"ffe017b7", x"00f77713", -x"fa878793", +x"f9c78793", x"00e787b3", x"0007c503", x"ffc48493", @@ -650,13 +647,13 @@ x"ff810113", x"00812023", x"00050413", x"ffe01537", -x"d4850513", +x"d3c50513", x"00112223", x"f99ff0ef", x"00241793", x"ffe01537", x"008787b3", -x"fb850513", +x"fac50513", x"00f50533", x"f81ff0ef", x"00800793", @@ -737,7 +734,7 @@ x"0087a783", x"00e79713", x"04075263", x"ffe01537", -x"d5050513", +x"d4450513", x"e41ff0ef", x"00048513", x"dd5ff0ef", @@ -750,7 +747,7 @@ x"da5ff0ef", x"34302573", x"db5ff0ef", x"ffe01537", -x"db850513", +x"dac50513", x"e0dff0ef", x"00440413", x"34141073", @@ -765,7 +762,7 @@ x"00a12023", x"00f4a023", x"02051863", x"ffe01537", -x"d5c50513", +x"d5050513", x"dd1ff0ef", x"00012503", x"004005b7", @@ -776,12 +773,12 @@ x"04f50863", x"00000513", x"0380006f", x"ffe01537", -x"d7c50513", +x"d7050513", x"da5ff0ef", x"00400537", x"d39ff0ef", x"ffe01537", -x"d9850513", +x"d8c50513", x"d91ff0ef", x"fffe07b7", x"0087a783", @@ -813,7 +810,7 @@ x"00d787b3", x"00200513", x"fa0792e3", x"ffe01537", -x"da050513", +x"d9450513", x"d11ff0ef", x"800007b7", x"0087a223", @@ -847,12 +844,12 @@ x"40a00533", x"e0400437", x"00a47433", x"ffe01537", -x"da450513", +x"d9850513", x"c89ff0ef", x"00040513", x"c1dff0ef", x"ffe01537", -x"db450513", +x"da850513", x"c75ff0ef", x"975ff0ef", x"00050863", @@ -901,8 +898,8 @@ x"72656461", x"0a3e3e20", x"444c420a", x"4a203a56", -x"20206e61", -x"30322037", +x"31206e61", +x"30322030", x"480a3532", x"203a5657", x"00000020", @@ -995,26 +992,26 @@ x"00002e65", x"61766e49", x"2064696c", x"00444d43", -x"ffe00644", -x"ffe0066c", -x"ffe0066c", -x"ffe003f0", -x"ffe0066c", -x"ffe0066c", -x"ffe0066c", -x"ffe0063c", -x"ffe0066c", -x"ffe0066c", -x"ffe0066c", -x"ffe0066c", -x"ffe0066c", -x"ffe004b4", -x"ffe004c8", -x"ffe0066c", +x"ffe00638", +x"ffe00660", +x"ffe00660", +x"ffe003e4", +x"ffe00660", +x"ffe00660", +x"ffe00660", +x"ffe00630", +x"ffe00660", +x"ffe00660", +x"ffe00660", +x"ffe00660", +x"ffe00660", +x"ffe004a8", x"ffe004bc", -x"ffe0066c", -x"ffe0066c", -x"ffe0065c", +x"ffe00660", +x"ffe004b0", +x"ffe00660", +x"ffe00660", +x"ffe00650", x"33323130", x"37363534", x"62613938", diff --git a/rtl/core/neorv32_core_complex.vhd b/rtl/core/neorv32_core_complex.vhd deleted file mode 100644 index aa56ac146..000000000 --- a/rtl/core/neorv32_core_complex.vhd +++ /dev/null @@ -1,257 +0,0 @@ --- ================================================================================ -- --- NEORV32 SoC - Core Complex Top -- --- -------------------------------------------------------------------------------- -- --- CPU core + optional L1 I-cache + optional L1 D-cache + bus switch -- --- -------------------------------------------------------------------------------- -- --- The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 -- --- Copyright (c) NEORV32 contributors. -- --- Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. -- --- Licensed under the BSD-3-Clause license, see LICENSE for details. -- --- SPDX-License-Identifier: BSD-3-Clause -- --- ================================================================================ -- - -library ieee; -use ieee.std_logic_1164.all; - -library neorv32; -use neorv32.neorv32_package.all; - -entity neorv32_core_complex is - generic ( - -- General -- - HART_ID : natural range 0 to 3; - NUM_HARTS : natural range 1 to 4; - VENDOR_ID : std_ulogic_vector(31 downto 0); - BOOT_ADDR : std_ulogic_vector(31 downto 0); - DEBUG_PARK_ADDR : std_ulogic_vector(31 downto 0); - DEBUG_EXC_ADDR : std_ulogic_vector(31 downto 0); - -- RISC-V ISA Extensions -- - RISCV_ISA_C : boolean; - RISCV_ISA_E : boolean; - RISCV_ISA_M : boolean; - RISCV_ISA_U : boolean; - RISCV_ISA_Zaamo : boolean; - RISCV_ISA_Zba : boolean; - RISCV_ISA_Zbb : boolean; - RISCV_ISA_Zbkb : boolean; - RISCV_ISA_Zbkc : boolean; - RISCV_ISA_Zbkx : boolean; - RISCV_ISA_Zbs : boolean; - RISCV_ISA_Zfinx : boolean; - RISCV_ISA_Zicntr : boolean; - RISCV_ISA_Zicond : boolean; - RISCV_ISA_Zihpm : boolean; - RISCV_ISA_Zknd : boolean; - RISCV_ISA_Zkne : boolean; - RISCV_ISA_Zknh : boolean; - RISCV_ISA_Zksed : boolean; - RISCV_ISA_Zksh : boolean; - RISCV_ISA_Zmmul : boolean; - RISCV_ISA_Zxcfu : boolean; - RISCV_ISA_Sdext : boolean; - RISCV_ISA_Sdtrig : boolean; - RISCV_ISA_Smpmp : boolean; - -- Tuning Options -- - CPU_CLOCK_GATING_EN : boolean; - CPU_FAST_MUL_EN : boolean; - CPU_FAST_SHIFT_EN : boolean; - CPU_RF_HW_RST_EN : boolean; - -- Physical Memory Protection (PMP) -- - PMP_NUM_REGIONS : natural range 0 to 16; - PMP_MIN_GRANULARITY : natural; - PMP_TOR_MODE_EN : boolean; - PMP_NAP_MODE_EN : boolean; - -- Hardware Performance Monitors (HPM) -- - HPM_NUM_CNTS : natural range 0 to 13; - HPM_CNT_WIDTH : natural range 0 to 64; - -- Instruction Cache (iCACHE) -- - ICACHE_EN : boolean; - ICACHE_NUM_BLOCKS : natural range 1 to 256; - ICACHE_BLOCK_SIZE : natural range 4 to 2**16; - ICACHE_UC_BEGIN : std_ulogic_vector(31 downto 0); - -- Data Cache (dCACHE) -- - DCACHE_EN : boolean; - DCACHE_NUM_BLOCKS : natural range 1 to 256; - DCACHE_BLOCK_SIZE : natural range 4 to 2**16; - DCACHE_UC_BEGIN : std_ulogic_vector(31 downto 0) - ); - port ( - -- global control -- - clk_i : in std_ulogic; - rstn_i : in std_ulogic; - -- interrupts -- - msi_i : in std_ulogic; - mei_i : in std_ulogic; - mti_i : in std_ulogic; - firq_i : in std_ulogic_vector(15 downto 0); - dbi_i : in std_ulogic; - -- inter-core communication links -- - icc_tx_o : out icc_t; -- TX links - icc_rx_i : in icc_t; -- RX links - -- system bus interface -- - bus_req_o : out bus_req_t; - bus_rsp_i : in bus_rsp_t - ); -end neorv32_core_complex; - -architecture neorv32_core_complex_rtl of neorv32_core_complex is - - -- bus system -- - signal cpu_i_req, cpu_d_req, icache_req, dcache_req : bus_req_t; - signal cpu_i_rsp, cpu_d_rsp, icache_rsp, dcache_rsp : bus_rsp_t; - -begin - - -- CPU Core ------------------------------------------------------------------------------- - -- ------------------------------------------------------------------------------------------- - neorv32_cpu_inst: entity neorv32.neorv32_cpu - generic map ( - -- General -- - HART_ID => HART_ID, - NUM_HARTS => NUM_HARTS, - VENDOR_ID => VENDOR_ID, - BOOT_ADDR => BOOT_ADDR, - DEBUG_PARK_ADDR => DEBUG_PARK_ADDR, - DEBUG_EXC_ADDR => DEBUG_EXC_ADDR, - -- RISC-V ISA Extensions -- - RISCV_ISA_C => RISCV_ISA_C, - RISCV_ISA_E => RISCV_ISA_E, - RISCV_ISA_M => RISCV_ISA_M, - RISCV_ISA_U => RISCV_ISA_U, - RISCV_ISA_Zaamo => RISCV_ISA_Zaamo, - RISCV_ISA_Zba => RISCV_ISA_Zba, - RISCV_ISA_Zbb => RISCV_ISA_Zbb, - RISCV_ISA_Zbkb => RISCV_ISA_Zbkb, - RISCV_ISA_Zbkc => RISCV_ISA_Zbkc, - RISCV_ISA_Zbkx => RISCV_ISA_Zbkx, - RISCV_ISA_Zbs => RISCV_ISA_Zbs, - RISCV_ISA_Zfinx => RISCV_ISA_Zfinx, - RISCV_ISA_Zicntr => RISCV_ISA_Zicntr, - RISCV_ISA_Zicond => RISCV_ISA_Zicond, - RISCV_ISA_Zihpm => RISCV_ISA_Zihpm, - RISCV_ISA_Zknd => RISCV_ISA_Zknd, - RISCV_ISA_Zkne => RISCV_ISA_Zkne, - RISCV_ISA_Zknh => RISCV_ISA_Zknh, - RISCV_ISA_Zksed => RISCV_ISA_Zksed, - RISCV_ISA_Zksh => RISCV_ISA_Zksh, - RISCV_ISA_Zmmul => RISCV_ISA_Zmmul, - RISCV_ISA_Zxcfu => RISCV_ISA_Zxcfu, - RISCV_ISA_Sdext => RISCV_ISA_Sdext, - RISCV_ISA_Sdtrig => RISCV_ISA_Sdtrig, - RISCV_ISA_Smpmp => RISCV_ISA_Smpmp, - -- Tuning Options -- - CPU_CLOCK_GATING_EN => CPU_CLOCK_GATING_EN, - CPU_FAST_MUL_EN => CPU_FAST_MUL_EN, - CPU_FAST_SHIFT_EN => CPU_FAST_SHIFT_EN, - CPU_RF_HW_RST_EN => CPU_RF_HW_RST_EN, - -- Physical Memory Protection (PMP) -- - PMP_NUM_REGIONS => PMP_NUM_REGIONS, - PMP_MIN_GRANULARITY => PMP_MIN_GRANULARITY, - PMP_TOR_MODE_EN => PMP_TOR_MODE_EN, - PMP_NAP_MODE_EN => PMP_NAP_MODE_EN, - -- Hardware Performance Monitors (HPM) -- - HPM_NUM_CNTS => HPM_NUM_CNTS, - HPM_CNT_WIDTH => HPM_CNT_WIDTH - ) - port map ( - -- global control -- - clk_i => clk_i, - rstn_i => rstn_i, - -- interrupts -- - msi_i => msi_i, - mei_i => mei_i, - mti_i => mti_i, - firq_i => firq_i, - dbi_i => dbi_i, - -- inter-core communication links -- - icc_tx_o => icc_tx_o, - icc_rx_i => icc_rx_i, - -- instruction bus interface -- - ibus_req_o => cpu_i_req, - ibus_rsp_i => cpu_i_rsp, - -- data bus interface -- - dbus_req_o => cpu_d_req, - dbus_rsp_i => cpu_d_rsp - ); - - - -- CPU L1 Instruction Cache (I-Cache) ----------------------------------------------------- - -- ------------------------------------------------------------------------------------------- - neorv32_icache_enabled: - if ICACHE_EN generate - neorv32_icache_inst: entity neorv32.neorv32_cache - generic map ( - NUM_BLOCKS => ICACHE_NUM_BLOCKS, - BLOCK_SIZE => ICACHE_BLOCK_SIZE, - UC_BEGIN => ICACHE_UC_BEGIN(31 downto 28), - UC_ENABLE => true, - READ_ONLY => true - ) - port map ( - clk_i => clk_i, - rstn_i => rstn_i, - host_req_i => cpu_i_req, - host_rsp_o => cpu_i_rsp, - bus_req_o => icache_req, - bus_rsp_i => icache_rsp - ); - end generate; - - neorv32_icache_disabled: - if not ICACHE_EN generate - icache_req <= cpu_i_req; - cpu_i_rsp <= icache_rsp; - end generate; - - - -- CPU L1 Data Cache (D-Cache) ------------------------------------------------------------ - -- ------------------------------------------------------------------------------------------- - neorv32_dcache_enabled: - if DCACHE_EN generate - neorv32_dcache_inst: entity neorv32.neorv32_cache - generic map ( - NUM_BLOCKS => DCACHE_NUM_BLOCKS, - BLOCK_SIZE => DCACHE_BLOCK_SIZE, - UC_BEGIN => DCACHE_UC_BEGIN(31 downto 28), - UC_ENABLE => true, - READ_ONLY => false - ) - port map ( - clk_i => clk_i, - rstn_i => rstn_i, - host_req_i => cpu_d_req, - host_rsp_o => cpu_d_rsp, - bus_req_o => dcache_req, - bus_rsp_i => dcache_rsp - ); - end generate; - - neorv32_dcache_disabled: - if not DCACHE_EN generate - dcache_req <= cpu_d_req; - cpu_d_rsp <= dcache_rsp; - end generate; - - - -- Core Instruction/Data Bus Switch ------------------------------------------------------- - -- ------------------------------------------------------------------------------------------- - neorv32_core_bus_switch_inst: entity neorv32.neorv32_bus_switch - generic map ( - ROUND_ROBIN_EN => false, -- use prioritizing arbitration - PORT_A_READ_ONLY => false, - PORT_B_READ_ONLY => true -- instruction fetch is read-only - ) - port map ( - clk_i => clk_i, - rstn_i => rstn_i, - a_lock_i => '0', -- no exclusive accesses - a_req_i => dcache_req, -- data accesses are prioritized - a_rsp_o => dcache_rsp, - b_req_i => icache_req, - b_rsp_o => icache_rsp, - x_req_o => bus_req_o, - x_rsp_i => bus_rsp_i - ); - - -end neorv32_core_complex_rtl; diff --git a/rtl/core/neorv32_cpu.vhd b/rtl/core/neorv32_cpu.vhd index 4922f587c..8391fc482 100644 --- a/rtl/core/neorv32_cpu.vhd +++ b/rtl/core/neorv32_cpu.vhd @@ -22,12 +22,12 @@ use neorv32.neorv32_package.all; entity neorv32_cpu is generic ( -- General -- - HART_ID : natural range 0 to 3; -- hardware thread ID - NUM_HARTS : natural range 1 to 4; -- total number of harts in the system, has to be a power of 2 + HART_ID : natural range 0 to 1023; -- hardware thread ID VENDOR_ID : std_ulogic_vector(31 downto 0); -- vendor's JEDEC ID BOOT_ADDR : std_ulogic_vector(31 downto 0); -- cpu boot address DEBUG_PARK_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug mode parking loop entry address DEBUG_EXC_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug mode exception entry address + ICC_EN : boolean; -- implement inter-core communication (ICC) links -- RISC-V ISA Extensions -- RISCV_ISA_C : boolean; -- implement compressed extension RISCV_ISA_E : boolean; -- implement embedded RF extension @@ -183,10 +183,6 @@ begin -- simulation notifier -- assert not is_simulation_c report "[NEORV32] Assuming this is a simulation." severity warning; - -- ID checks -- - assert is_power_of_two_f(NUM_HARTS) report "[NEORV32] NUM_HARTS has to be a power of two." severity error; - assert (HART_ID < NUM_HARTS) report "[NEORV32] HART_ID out of range." severity error; - -- Clock Gating --------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- @@ -438,12 +434,8 @@ begin -- Inter-Core Communication (ICC) --------------------------------------------------------- -- ------------------------------------------------------------------------------------------- icc_enabled: - if NUM_HARTS > 1 generate + if ICC_EN generate neorv32_cpu_icc_inst: entity neorv32.neorv32_cpu_icc - generic map ( - HART_ID => HART_ID, -- ID of this core - NUM_HARTS => NUM_HARTS -- number of cores, has to be a power of two - ) port map ( -- global control -- clk_i => clk_i, -- global clock, rising edge @@ -461,7 +453,7 @@ begin end generate; icc_disabled: - if NUM_HARTS = 1 generate + if not ICC_EN generate xcsr_rdata_icc <= (others => '0'); icc_tx_o <= icc_terminate_c; end generate; diff --git a/rtl/core/neorv32_cpu_control.vhd b/rtl/core/neorv32_cpu_control.vhd index d1b46d23f..586da6d80 100644 --- a/rtl/core/neorv32_cpu_control.vhd +++ b/rtl/core/neorv32_cpu_control.vhd @@ -29,7 +29,7 @@ use neorv32.neorv32_package.all; entity neorv32_cpu_control is generic ( -- General -- - HART_ID : natural range 0 to 3; -- hardware thread ID + HART_ID : natural range 0 to 1023; -- hardware thread ID VENDOR_ID : std_ulogic_vector(31 downto 0); -- vendor's JEDEC ID BOOT_ADDR : std_ulogic_vector(31 downto 0); -- cpu boot address DEBUG_PARK_ADDR : std_ulogic_vector(31 downto 0); -- cpu debug-mode parking loop entry address, 4-byte aligned @@ -930,10 +930,10 @@ begin csr_valid(2) <= bool_to_ulogic_f(RISCV_ISA_Zfinx); -- available if FPU implemented -- machine trap setup/handling, environment/information registers, etc. -- - when csr_mstatus_c | csr_mstatush_c | csr_misa_c | csr_mie_c | csr_mtvec_c | csr_mscratch_c | - csr_mepc_c | csr_mcause_c | csr_mip_c | csr_mtval_c | csr_mtinst_c | csr_mcountinhibit_c | - csr_mvendorid_c | csr_marchid_c | csr_mimpid_c | csr_mhartid_c | csr_mconfigptr_c | csr_mxisa_c | - csr_mxiccrxd_c | csr_mxicctxd_c | csr_mxiccsr0_c | csr_mxiccsr1_c => + when csr_mstatus_c | csr_mstatush_c | csr_misa_c | csr_mie_c | csr_mtvec_c | + csr_mscratch_c | csr_mepc_c | csr_mcause_c | csr_mip_c | csr_mtval_c | + csr_mtinst_c | csr_mcountinhibit_c | csr_mvendorid_c | csr_marchid_c | csr_mimpid_c | + csr_mhartid_c | csr_mconfigptr_c | csr_mxisa_c | csr_mxiccsreg_c | csr_mxiccdata_c => csr_valid(2) <= '1'; -- always implemented -- machine-controlled user-mode CSRs -- @@ -1671,7 +1671,7 @@ begin -- -------------------------------------------------------------------- -- inter-core communication -- -------------------------------------------------------------------- - when csr_mxiccrxd_c | csr_mxicctxd_c | csr_mxiccsr0_c | csr_mxiccsr1_c => + when csr_mxiccsreg_c | csr_mxiccdata_c => csr.rdata <= xcsr_rdata_i; -- implemented externally -- -------------------------------------------------------------------- diff --git a/rtl/core/neorv32_cpu_icc.vhd b/rtl/core/neorv32_cpu_icc.vhd index f68ac64d8..881c410d8 100644 --- a/rtl/core/neorv32_cpu_icc.vhd +++ b/rtl/core/neorv32_cpu_icc.vhd @@ -16,10 +16,6 @@ library neorv32; use neorv32.neorv32_package.all; entity neorv32_cpu_icc is - generic ( - HART_ID : natural range 0 to 3; -- ID of this core - NUM_HARTS : natural range 1 to 4 -- number of cores, has to be a power of two - ); port ( -- global control -- clk_i : in std_ulogic; -- global clock, rising edge @@ -38,109 +34,57 @@ end neorv32_cpu_icc; architecture neorv32_cpu_icc_rtl of neorv32_cpu_icc is - -- link select -- - constant id_width_c : natural := index_size_f(NUM_HARTS); - signal link_id : std_ulogic_vector(id_width_c-1 downto 0); - - -- link control -- - signal link_sel, tx_fifo_we, tx_fifo_free : std_ulogic_vector(NUM_HARTS-1 downto 0); - - -- incoming data as array -- - type rx_data_t is array (0 to NUM_HARTS-1) of std_ulogic_vector(XLEN-1 downto 0); - signal rx_data : rx_data_t; + signal tx_fifo_we, tx_fifo_free : std_ulogic; begin -- CSR Access ----------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - csr_write: process(rstn_i, clk_i) - begin - if (rstn_i = '0') then - link_id <= (others => '0'); - elsif rising_edge(clk_i) then - if (csr_we_i = '1') and (csr_addr_i(11 downto 1) = csr_mxiccsr0_c(11 downto 1)) then - link_id <= csr_wdata_i(id_width_c-1 downto 0); - end if; - end if; - end process csr_write; - - csr_read: process(csr_addr_i, link_id, icc_rx_i, tx_fifo_free, rx_data) + csr_read: process(csr_addr_i, icc_rx_i, tx_fifo_free) begin csr_rdata_o <= (others => '0'); -- default - if (csr_addr_i(11 downto 2) = csr_mxiccrxd_c(11 downto 2)) then -- ICC CSRs base address - if (csr_addr_i(1) = '0') then -- data register(s) - csr_rdata_o <= rx_data(to_integer(unsigned(link_id))); - else -- control and status register(s) - csr_rdata_o(XLEN-1) <= icc_rx_i.rdy(to_integer(unsigned(link_id))); - csr_rdata_o(XLEN-2) <= tx_fifo_free(to_integer(unsigned(link_id))); - csr_rdata_o(id_width_c-1 downto 0) <= link_id; + if (csr_addr_i(11 downto 1) = csr_mxiccsreg_c(11 downto 1)) then -- ICC CSR base address + if (csr_addr_i(0) = '0') then -- csr_mxiccsreg_c - control and status register + csr_rdata_o(0) <= icc_rx_i.rdy; + csr_rdata_o(1) <= tx_fifo_free; + else -- csr_mxiccdata_c - data register + if (icc_rx_i.rdy = '1') then -- "output gate": read zero if no RX data is available + csr_rdata_o <= icc_rx_i.dat; + end if; end if; end if; end process csr_read; + -- link read/write -- + icc_tx_o.ack <= '1' when (csr_re_i = '1') and (csr_addr_i = csr_mxiccdata_c) else '0'; + tx_fifo_we <= '1' when (csr_we_i = '1') and (csr_addr_i = csr_mxiccdata_c) else '0'; - -- Communication Links -------------------------------------------------------------------- - -- ------------------------------------------------------------------------------------------- - link_gen: - for i in 0 to NUM_HARTS-1 generate - - -- TX FIFOs for outgoing links -- - queue_gen: - if i /= HART_ID generate - queue_inst: entity neorv32.neorv32_fifo - generic map ( - FIFO_DEPTH => 4, -- yes, this is fixed - FIFO_WIDTH => XLEN, - FIFO_RSYNC => true, - FIFO_SAFE => true, - FULL_RESET => true - ) - port map ( - -- control -- - clk_i => clk_i, - rstn_i => rstn_i, - clear_i => '0', - half_o => open, - -- write port -- - wdata_i => csr_wdata_i, - we_i => tx_fifo_we(i), - free_o => tx_fifo_free(i), - -- read port -- - re_i => icc_rx_i.ack(i), - rdata_o => icc_tx_o.dat(i*XLEN+(XLEN-1) downto i*XLEN), - avail_o => icc_tx_o.rdy(i) - ); - end generate; - - -- no FIFO/link for *this* core -- - queue_terminate: - if i = HART_ID generate - tx_fifo_free(i) <= '0'; - icc_tx_o.dat(i*XLEN+(XLEN-1) downto i*XLEN) <= (others => '0'); - icc_tx_o.rdy(i) <= '0'; - end generate; - - -- reorganize incoming links as 2d-array -- - rx_data(i) <= icc_rx_i.dat(i*XLEN+(XLEN-1) downto i*XLEN); - - -- link control -- - link_sel(i) <= '1' when (unsigned(link_id) = to_unsigned(i, id_width_c)) else '0'; - icc_tx_o.ack(i) <= '1' when (csr_re_i = '1') and (csr_addr_i = csr_mxiccrxd_c) and (link_sel(i) = '1') else '0'; - tx_fifo_we(i) <= '1' when (csr_we_i = '1') and (csr_addr_i = csr_mxicctxd_c) and (link_sel(i) = '1') else '0'; - end generate; - - - -- terminate unused links -- - link_terminate: - if NUM_HARTS < 4 generate - link_terminate_gen: - for i in NUM_HARTS to 3 generate - icc_tx_o.rdy(i) <= '0'; - icc_tx_o.ack(i) <= '0'; - icc_tx_o.dat(i*XLEN+(XLEN-1) downto i*XLEN) <= (others => '0'); - end generate; - end generate; + -- Outgoing/TX Message Queue (FIFO) ------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + tx_queue_inst: entity neorv32.neorv32_fifo + generic map ( + FIFO_DEPTH => 4, -- yes, this is fixed + FIFO_WIDTH => XLEN, + FIFO_RSYNC => true, + FIFO_SAFE => true, + FULL_RESET => false -- no need for a full HW reset as we have an "output gate" + ) + port map ( + -- control -- + clk_i => clk_i, + rstn_i => rstn_i, + clear_i => '0', + half_o => open, + -- write port -- + wdata_i => csr_wdata_i, + we_i => tx_fifo_we, + free_o => tx_fifo_free, + -- read port -- + re_i => icc_rx_i.ack, + rdata_o => icc_tx_o.dat, + avail_o => icc_tx_o.rdy + ); end neorv32_cpu_icc_rtl; diff --git a/rtl/core/neorv32_package.vhd b/rtl/core/neorv32_package.vhd index b1b7150a2..4c29eb752 100644 --- a/rtl/core/neorv32_package.vhd +++ b/rtl/core/neorv32_package.vhd @@ -29,7 +29,7 @@ package neorv32_package is -- Architecture Constants ----------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100901"; -- hardware version + constant hw_version_c : std_ulogic_vector(31 downto 0) := x"01100902"; -- hardware version constant archid_c : natural := 19; -- official RISC-V architecture ID constant XLEN : natural := 32; -- native data path width @@ -224,19 +224,18 @@ package neorv32_package is err => '0' ); - -- Inter-Core Communication (ICC) Links --------------------------------------------------- + -- Inter-Core Communication (ICC) Link ---------------------------------------------------- -- ------------------------------------------------------------------------------------------- - -- icc link (for up to 4 cores) -- type icc_t is record - rdy : std_ulogic_vector(4-1 downto 0); -- data available - ack : std_ulogic_vector(4-1 downto 0); -- read-enable - dat : std_ulogic_vector(4*XLEN-1 downto 0); -- data word + rdy : std_ulogic; -- data available + ack : std_ulogic; -- read-enable + dat : std_ulogic_vector(XLEN-1 downto 0); -- data word end record; -- endpoint termination -- constant icc_terminate_c : icc_t := ( - rdy => (others => '0'), - ack => (others => '0'), + rdy => '0', + ack => '0', dat => (others => '0') ); @@ -482,10 +481,8 @@ package neorv32_package is constant csr_mhpmcounter14h_c : std_ulogic_vector(11 downto 0) := x"b8e"; constant csr_mhpmcounter15h_c : std_ulogic_vector(11 downto 0) := x"b8f"; -- NEORV32-specific read/write machine registers -- - constant csr_mxiccrxd_c : std_ulogic_vector(11 downto 0) := x"bc0"; - constant csr_mxicctxd_c : std_ulogic_vector(11 downto 0) := x"bc1"; - constant csr_mxiccsr0_c : std_ulogic_vector(11 downto 0) := x"bc2"; - constant csr_mxiccsr1_c : std_ulogic_vector(11 downto 0) := x"bc3"; + constant csr_mxiccsreg_c : std_ulogic_vector(11 downto 0) := x"bc0"; + constant csr_mxiccdata_c : std_ulogic_vector(11 downto 0) := x"bc1"; -- user counters/timers -- constant csr_cycle_c : std_ulogic_vector(11 downto 0) := x"c00"; --constant csr_time_c : std_ulogic_vector(11 downto 0) := x"c01"; diff --git a/rtl/core/neorv32_top.vhd b/rtl/core/neorv32_top.vhd index f079b3d72..46304b4c8 100644 --- a/rtl/core/neorv32_top.vhd +++ b/rtl/core/neorv32_top.vhd @@ -310,15 +310,15 @@ architecture neorv32_top_rtl of neorv32_top is signal dci_ndmrstn : std_ulogic; signal dci_haltreq : std_ulogic_vector(num_cores_c-1 downto 0); - -- CPU ICC links (up to 4 instances) -- - type multicore_icc_t is array (0 to 3) of icc_t; - signal icc_tx, icc_rx : multicore_icc_t; + -- CPU ICC links -- + type core_complex_icc_t is array (0 to num_cores_c-1) of icc_t; + signal icc_tx, icc_rx : core_complex_icc_t; - -- bus: CPU core complex (up to 4 instances) -- - type multicore_req_t is array (0 to 3) of bus_req_t; - type multicore_rsp_t is array (0 to 3) of bus_rsp_t; - signal core_req : multicore_req_t; - signal core_rsp : multicore_rsp_t; + -- bus: CPU core complex -- + type core_complex_req_t is array (0 to num_cores_c-1) of bus_req_t; + type core_complex_rsp_t is array (0 to num_cores_c-1) of bus_rsp_t; + signal cpu_i_req, cpu_d_req, icache_req, dcache_req, core_req : core_complex_req_t; + signal cpu_i_rsp, cpu_d_rsp, icache_rsp, dcache_rsp, core_rsp : core_complex_rsp_t; -- bus: system bus (including DMA complex) -- signal sys1_req, sys2_req, dma_req, sys3_req : bus_req_t; @@ -494,21 +494,21 @@ begin cpu_firq(14) <= firq(FIRQ_SLINK_RX); cpu_firq(15) <= firq(FIRQ_SLINK_TX); - -- CPU core(s) + optional L1 caches -- + -- CPU core(s) + optional L1 caches + bus switch -- core_complex_gen: for i in 0 to num_cores_c-1 generate - -- Core Complex --------------------------------------------------------------------------- + -- CPU Core ------------------------------------------------------------------------------- -- ------------------------------------------------------------------------------------------- - neorv32_cpu_neorv32_core_complex: entity neorv32.neorv32_core_complex + neorv32_cpu_inst: entity neorv32.neorv32_cpu generic map ( -- General -- HART_ID => i, - NUM_HARTS => num_cores_c, VENDOR_ID => vendorid_c, BOOT_ADDR => cpu_boot_addr_c, DEBUG_PARK_ADDR => dm_park_entry_c, DEBUG_EXC_ADDR => dm_exc_entry_c, + ICC_EN => DUAL_CORE_EN, -- RISC-V ISA Extensions -- RISCV_ISA_C => RISCV_ISA_C, RISCV_ISA_E => RISCV_ISA_E, @@ -547,60 +547,115 @@ begin PMP_NAP_MODE_EN => PMP_NAP_MODE_EN, -- Hardware Performance Monitors (HPM) -- HPM_NUM_CNTS => HPM_NUM_CNTS, - HPM_CNT_WIDTH => HPM_CNT_WIDTH, - -- Instruction Cache (iCACHE) -- - ICACHE_EN => ICACHE_EN, - ICACHE_NUM_BLOCKS => ICACHE_NUM_BLOCKS, - ICACHE_BLOCK_SIZE => ICACHE_BLOCK_SIZE, - ICACHE_UC_BEGIN => mem_uncached_begin_c, - -- Data Cache (dCACHE) -- - DCACHE_EN => DCACHE_EN, - DCACHE_NUM_BLOCKS => DCACHE_NUM_BLOCKS, - DCACHE_BLOCK_SIZE => DCACHE_BLOCK_SIZE, - DCACHE_UC_BEGIN => mem_uncached_begin_c + HPM_CNT_WIDTH => HPM_CNT_WIDTH ) port map ( -- global control -- - clk_i => clk_i, - rstn_i => rstn_sys, + clk_i => clk_i, + rstn_i => rstn_sys, -- interrupts -- - msi_i => msw_irq(i), - mei_i => mext_irq_i, - mti_i => mtime_irq(i), - firq_i => cpu_firq, - dbi_i => dci_haltreq(i), + msi_i => msw_irq(i), + mei_i => mext_irq_i, + mti_i => mtime_irq(i), + firq_i => cpu_firq, + dbi_i => dci_haltreq(i), -- inter-core communication links -- - icc_tx_o => icc_tx(i), - icc_rx_i => icc_rx(i), - -- system bus interface -- - bus_req_o => core_req(i), - bus_rsp_i => core_rsp(i) + icc_tx_o => icc_tx(i), + icc_rx_i => icc_rx(i), + -- instruction bus interface -- + ibus_req_o => cpu_i_req(i), + ibus_rsp_i => cpu_i_rsp(i), + -- data bus interface -- + dbus_req_o => cpu_d_req(i), + dbus_rsp_i => cpu_d_rsp(i) ); - -- inter-core communication (ICC) links -- - icc_connect: process(icc_tx) - begin - icc_rx(i) <= icc_terminate_c; - for j in 0 to num_cores_c-1 loop -- connect this core with every other core - icc_rx(i).rdy(j) <= icc_tx(j).rdy(i); - icc_rx(i).ack(j) <= icc_tx(j).ack(i); - icc_rx(i).dat(j*32+31 downto j*32) <= icc_tx(j).dat(i*32+31 downto i*32); - end loop; - end process icc_connect; - end generate; -- /core_complex + -- CPU L1 Instruction Cache (I-Cache) ----------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + neorv32_icache_enabled: + if ICACHE_EN generate + neorv32_icache_inst: entity neorv32.neorv32_cache + generic map ( + NUM_BLOCKS => ICACHE_NUM_BLOCKS, + BLOCK_SIZE => ICACHE_BLOCK_SIZE, + UC_BEGIN => mem_uncached_begin_c(31 downto 28), + UC_ENABLE => true, + READ_ONLY => true + ) + port map ( + clk_i => clk_i, + rstn_i => rstn_sys, + host_req_i => cpu_i_req(i), + host_rsp_o => cpu_i_rsp(i), + bus_req_o => icache_req(i), + bus_rsp_i => icache_rsp(i) + ); + end generate; - -- terminate unused interfaces -- - core_complex_terminate: - if num_cores_c < 4 generate - core_complex_terminate_gen: - for i in num_cores_c to 3 generate - core_req(i) <= req_terminate_c; - core_rsp(i) <= rsp_terminate_c; - icc_rx(i) <= icc_terminate_c; - icc_tx(i) <= icc_terminate_c; + neorv32_icache_disabled: + if not ICACHE_EN generate + icache_req(i) <= cpu_i_req(i); + cpu_i_rsp(i) <= icache_rsp(i); end generate; - end generate; + + + -- CPU L1 Data Cache (D-Cache) ------------------------------------------------------------ + -- ------------------------------------------------------------------------------------------- + neorv32_dcache_enabled: + if DCACHE_EN generate + neorv32_dcache_inst: entity neorv32.neorv32_cache + generic map ( + NUM_BLOCKS => DCACHE_NUM_BLOCKS, + BLOCK_SIZE => DCACHE_BLOCK_SIZE, + UC_BEGIN => mem_uncached_begin_c(31 downto 28), + UC_ENABLE => true, + READ_ONLY => false + ) + port map ( + clk_i => clk_i, + rstn_i => rstn_sys, + host_req_i => cpu_d_req(i), + host_rsp_o => cpu_d_rsp(i), + bus_req_o => dcache_req(i), + bus_rsp_i => dcache_rsp(i) + ); + end generate; + + neorv32_dcache_disabled: + if not DCACHE_EN generate + dcache_req(i) <= cpu_d_req(i); + cpu_d_rsp(i) <= dcache_rsp(i); + end generate; + + + -- Core Instruction/Data Bus Switch ------------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + neorv32_core_bus_switch_inst: entity neorv32.neorv32_bus_switch + generic map ( + ROUND_ROBIN_EN => false, -- use prioritizing arbitration + PORT_A_READ_ONLY => false, + PORT_B_READ_ONLY => true -- instruction fetch is read-only + ) + port map ( + clk_i => clk_i, + rstn_i => rstn_sys, + a_lock_i => '0', -- no exclusive accesses + a_req_i => dcache_req(i), -- data accesses are prioritized + a_rsp_o => dcache_rsp(i), + b_req_i => icache_req(i), + b_rsp_o => icache_rsp(i), + x_req_o => core_req(i), + x_rsp_i => core_rsp(i) + ); + + end generate; -- /core_complex + + + -- Inter-Core Communication (ICC) Links --------------------------------------------------- + -- ------------------------------------------------------------------------------------------- + icc_rx(icc_rx'left) <= icc_tx(icc_tx'right); + icc_rx(icc_rx'right) <= icc_tx(icc_tx'left); -- Core Complex Bus Arbiter --------------------------------------------------------------- @@ -617,10 +672,10 @@ begin clk_i => clk_i, rstn_i => rstn_sys, a_lock_i => '0', - a_req_i => core_req(0), - a_rsp_o => core_rsp(0), - b_req_i => core_req(1), - b_rsp_o => core_rsp(1), + a_req_i => core_req(core_req'left), + a_rsp_o => core_rsp(core_rsp'left), + b_req_i => core_req(core_req'right), + b_rsp_o => core_rsp(core_rsp'right), x_req_o => sys1_req, x_rsp_i => sys1_rsp ); diff --git a/rtl/file_list_soc.f b/rtl/file_list_soc.f index c1eeace41..76da9d04d 100644 --- a/rtl/file_list_soc.f +++ b/rtl/file_list_soc.f @@ -19,7 +19,6 @@ NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cpu.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_bus.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_cache.vhd -NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_core_complex.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_dma.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_application_image.vhd NEORV32_RTL_PATH_PLACEHOLDER/core/neorv32_imem.vhd diff --git a/sw/common/crt0.S b/sw/common/crt0.S index c653bc55a..ed7747c57 100644 --- a/sw/common/crt0.S +++ b/sw/common/crt0.S @@ -72,41 +72,38 @@ __crt0_entry: // ************************************************************************************************ -// SMP multi-core setup - wait for configuration if we are not core 0. +// SMP dual-core setup - wait for configuration if we are not core 0. // ************************************************************************************************ -#ifndef DISABLE_MULTICORE -__crt0_multicore_check: - beqz x1, __crt0_multicore_primary // proceed with normal boot-up if we are core 0 +#ifndef DISABLE_DUALCORE +__crt0_dualcore_check: + beqz x1, __crt0_dualcore_primary // proceed with normal boot-up if we are core 0 // setup machine software interrupt - la x15, __crt0_multicore_wakeup + la x15, __crt0_dualcore_wakeup csrw mtvec, x15 // install interrupt handler csrsi mie, 1 << 3 // enable software interrupt source csrsi mstatus, 1 << 3 // enable machine-level interrupts j __crt0_sleep // wait for interrupt in sleep mode // machine software interrupt handler -__crt0_multicore_wakeup: +__crt0_dualcore_wakeup: li x14, 0xfff40000 // CLINT.MSWI base address - slli x15, x1, 2 // offset = hart_id * 4 - add x14, x14, x15 - sw zero, 0(x14) // CLINT.MSWI[hart_id] + sw zero, 4(x14) // clear CLINT.MSWI[1] // check launch configuration from core 0 - csrw 0xbc2, zero // ICC.SR: link select = 0 - csrr x13, 0xbc0 // ICC.RX: signature + csrr x13, 0xbc1 // ICC_DATA: signature li x14, 0xffab4321 // expected signature - beq x14, x13, __crt0_multicore_launch + beq x14, x13, __crt0_dualcore_launch mret // go back to sleep if incorrect signature // get launch configuration from core 0 -__crt0_multicore_launch: - csrw 0xbc1, x14 // ICC.TX: acknowledge start - csrr x2, 0xbc0 // ICC.RX: stack top -> sp - csrr x12, 0xbc0 // ICC.RX: entry point +__crt0_dualcore_launch: + csrr x2, 0xbc1 // ICC_DATA: stack top -> sp + csrr x12, 0xbc1 // ICC_DATA: entry point + csrw 0xbc1, x14 // ICC_DATA: acknowledge start j __crt0_main_entry // start at entry point -__crt0_multicore_primary: +__crt0_dualcore_primary: #endif diff --git a/sw/example/demo_dual_core/main.c b/sw/example/demo_dual_core/main.c index ae080c3c5..57dd880e6 100644 --- a/sw/example/demo_dual_core/main.c +++ b/sw/example/demo_dual_core/main.c @@ -8,29 +8,42 @@ /**********************************************************************//** * @file demo_dual_core/main.c - * @author Stephan Nolting * @brief Simple dual-core SMP demo program. **************************************************************************/ #include #include "spinlock.h" /** User configuration */ -#define BAUD_RATE 19200 // UART0 Baud rate - -/** Function prototypes */ -void main_core1(void); -void clint_mtime_handler_core0(void); // core0 MTIMER interrupt handler -void clint_mtime_handler_core1(void); // core1 MTIMER interrupt handler +#define BAUD_RATE 19200 /** Global variables */ volatile uint8_t __attribute__ ((aligned (16))) core1_stack[2048]; // stack memory for core1 +/**********************************************************************//** + * Main function for core 1 (secondary core). + * + * @return Irrelevant (but can be inspected by the debugger). + **************************************************************************/ +int main_core1(void) { + + // setup NEORV32 runtime-environment (RTE) for _this_ core (core1) + neorv32_rte_setup(); + + // print message from core 0 + spin_lock(); + neorv32_uart0_printf("Hello world! This is core 1 running!\n"); + spin_unlock(); + + return 0; // return to crt0 and halt +} + + /**********************************************************************//** * Main function for core 0 (primary core). * * @attention This program requires the dual-core configuration, the CLINT, UART0 - * and the Zaamo ISA extension. + * and the A/Zaamo ISA extension. * * @return Irrelevant (but can be inspected by the debugger). **************************************************************************/ @@ -45,11 +58,11 @@ int main(void) { return -1; } neorv32_uart0_setup(BAUD_RATE, 0); - neorv32_uart0_printf("\n<< NEORV32 Dual-Core SMP Demo >>\n\n"); + neorv32_uart0_printf("\n<< NEORV32 Simple SMP Dual-Core Demo >>\n\n"); // check hardware/software configuration - if (NEORV32_SYSINFO->MISC[SYSINFO_MISC_HART] != 2) { // two cores available? + if (neorv32_sysinfo_get_numcores() < 2) { // two cores available? neorv32_uart0_printf("[ERROR] dual-core option not enabled!\n"); return -1; } @@ -58,40 +71,29 @@ int main(void) { return -1; } if ((neorv32_cpu_csr_read(CSR_MXISA) & (1< /**********************************************************************//** - * Private spinlock locked variable. We can only use a single spinlock - * as the processor only features a single reservation set. + * Private spinlock locked variable. **************************************************************************/ static volatile uint32_t __spin_locked = 0; @@ -14,7 +13,7 @@ static volatile uint32_t __spin_locked = 0; /**********************************************************************//** * Spinlock: set lock. * - * @warning This function is blocking until the lock is acquired. + * @warning This function is blocking until the lock is acquired and set. **************************************************************************/ void spin_lock(void) { diff --git a/sw/example/demo_dual_core/spinlock.h b/sw/example/demo_dual_core/spinlock.h index 534d98f2d..fb07b1841 100644 --- a/sw/example/demo_dual_core/spinlock.h +++ b/sw/example/demo_dual_core/spinlock.h @@ -1,6 +1,6 @@ /** * @file spinlock.h - * @brief Single simple spin-lock based on atomic lr/sc operations. + * @brief Single simple spin-lock based on atomic memory operations. */ #ifndef spinlock_h diff --git a/sw/example/demo_dual_core_icc/Makefile b/sw/example/demo_dual_core_icc/Makefile new file mode 100644 index 000000000..7715e365b --- /dev/null +++ b/sw/example/demo_dual_core_icc/Makefile @@ -0,0 +1,33 @@ +# Application makefile. +# Use this makefile to configure all relevant CPU / compiler options. + +# Override the default CPU ISA +MARCH = rv32ia_zicsr_zifencei + +# Override the default RISC-V GCC prefix +#RISCV_PREFIX ?= riscv-none-elf- + +# Override default optimization goal +EFFORT = -Os + +# Add extended debug symbols +USER_FLAGS += -ggdb -gdwarf-3 + +# Adjust processor IMEM size +USER_FLAGS += -Wl,--defsym,__neorv32_rom_size=16k + +# Adjust processor DMEM size +USER_FLAGS += -Wl,--defsym,__neorv32_ram_size=8k + +# Adjust maximum heap size +#USER_FLAGS += -Wl,--defsym,__neorv32_heap_size=3k + +# Additional sources +#APP_SRC += $(wildcard ./*.c) +#APP_INC += -I . + +# Set path to NEORV32 root directory +NEORV32_HOME ?= ../../.. + +# Include the main NEORV32 makefile +include $(NEORV32_HOME)/sw/common/common.mk diff --git a/sw/example/demo_dual_core_icc/main.c b/sw/example/demo_dual_core_icc/main.c new file mode 100644 index 000000000..69fadf673 --- /dev/null +++ b/sw/example/demo_dual_core_icc/main.c @@ -0,0 +1,158 @@ +// ================================================================================ // +// The NEORV32 RISC-V Processor - https://github.com/stnolting/neorv32 // +// Copyright (c) NEORV32 contributors. // +// Copyright (c) 2020 - 2025 Stephan Nolting. All rights reserved. // +// Licensed under the BSD-3-Clause license, see LICENSE for details. // +// SPDX-License-Identifier: BSD-3-Clause // +// ================================================================================ // + +/**********************************************************************//** + * @file demo_dual_core_icc/main.c + * @brief Set up the second core to accept, and run, any function pointer + * pushed into its ICC FIFO. Push in a few pieces of code and get answers back. + * Shamelessly copied from (BSD-3-Clause license): + * https://github.com/raspberrypi/pico-examples/tree/master/multicore/multicore_runner + **************************************************************************/ +#include + +/** User configuration */ +#define BAUD_RATE 19200 // UART0 Baud rate +#define TEST_NUM 11 // test number + +/** Global variables */ +volatile uint8_t __attribute__ ((aligned (16))) core1_stack[2048]; // stack memory for core1 + + +/**********************************************************************//** + * Main function for core 1 (secondary core). + * + * @return Irrelevant (but can be inspected by the debugger). + **************************************************************************/ +int core1_entry(void ) { + + // setup NEORV32 runtime-environment (RTE) for _this_ core (core1) + neorv32_rte_setup(); + + // Function pointer is passed via the ICC RX FIFO of _this_ core. + // We have one incoming int32_t as a parameter, and will provide an + // int32_t return value by simply pushing it back on the FIFO + // which also indicates the result is ready. + while (1) { + int32_t (*func)() = (int32_t(*)())neorv32_smp_icc_pop(); + int32_t p = neorv32_smp_icc_pop(); + int32_t result = (*func)(p); + neorv32_smp_icc_push(result); + } + + return 0; +} + + +/**********************************************************************//** + * Compute factorial. + * + * @param[in] n Compute factorial of n. + * @return Factorial of n. + **************************************************************************/ +int32_t factorial(int32_t n) { + + int32_t i = 0, f = 1; + for (i = 2; i <= n; i++) { + f *= i; + } + return f; +} + + +/**********************************************************************//** + * Compute n-th Fibonacci number. + * + * @param[in] n Compute factorial of n. + * @return n-th Fibonacci number. + **************************************************************************/ +int32_t fibonacci(int32_t n) { + + if (n == 0) { + return 0; + } + if (n == 1) { + return 1; + } + + int32_t i = 0, n1 = 0, n2 = 1, n3 = 0; + for (i = 2; i <= n; i++) { + n3 = n1 + n2; + n1 = n2; + n2 = n3; + } + return n3; +} + + +/**********************************************************************//** + * Set up the second core to accept, and run, any function pointer + * pushed into its ICC FIFO. Push in a few pieces of code and get answers back. + * + * @warning This program requires the dual-core configuration, the CLINT and UART0. + * + * @note This program was "inspired" by https://github.com/raspberrypi/pico-examples/tree/master. + * BSD-3-Clause license. + * + * @return Irrelevant (but can be inspected by the debugger). + **************************************************************************/ +int main(void) { + + // setup NEORV32 runtime-environment (RTE) for _this_ core (core0) + neorv32_rte_setup(); + + + // setup UART0 at default baud rate, no interrupts + if (neorv32_uart0_available() == 0) { // UART0 available? + return -1; + } + neorv32_uart0_setup(BAUD_RATE, 0); + neorv32_uart0_printf("\n<< NEORV32 SMP Dual-Core Inter-Core Communication Demo >>\n\n"); + + + // check hardware/software configuration + if (neorv32_sysinfo_get_numcores() < 2) { // two cores available? + neorv32_uart0_printf("[ERROR] dual-core option not enabled!\n"); + return -1; + } + if (neorv32_clint_available() == 0) { // CLINT available? + neorv32_uart0_printf("[ERROR] CLINT module not available!\n"); + return -1; + } + + + // Core one is halted in crt0 right after reset and wait for its machine-level software + // interrupt before resuming. Before the interrupt is triggered, a launch configuration + // for core 1 has to be provided. This launch configuration defines the entry point for + // core 1 as well as the stack setup. All this is handle by "neorv32_smp_launch()". + + neorv32_uart0_printf("Launching core 1...\n"); + int smp_launch_rc = neorv32_smp_launch(core1_entry, (uint8_t*)core1_stack, sizeof(core1_stack)); + + // check if launching was successful + if (smp_launch_rc) { + neorv32_uart0_printf("[ERROR] Launching core1 failed (%d)!\n", smp_launch_rc); + return -1; + } + + + // This example dispatches arbitrary functions to run on the second core. To do this we + // run a dispatcher on the second core that accepts a function pointer and runs it. + + neorv32_smp_icc_push((uintptr_t) &factorial); + neorv32_smp_icc_push(TEST_NUM); + // We could now do a load of stuff on core 0 and get our result later + neorv32_uart0_printf("Factorial(%d) is %d\n", TEST_NUM, neorv32_smp_icc_pop()); + + // Now try a different function + neorv32_smp_icc_push((uintptr_t) &fibonacci); + neorv32_smp_icc_push(TEST_NUM); + neorv32_uart0_printf("Fibonacci(%d) is %d\n", TEST_NUM, neorv32_smp_icc_pop()); + + + return 0; +} diff --git a/sw/example/processor_check/main.c b/sw/example/processor_check/main.c index c8e6430cf..68f2f03e4 100644 --- a/sw/example/processor_check/main.c +++ b/sw/example/processor_check/main.c @@ -68,7 +68,7 @@ void xirq_trap_handler0(void); void xirq_trap_handler1(void); void test_ok(void); void test_fail(void); -void core1_main(void); +int core1_main(void); // MCAUSE value that will be NEVER set by the hardware const uint32_t mcause_never_c = 0x80000000UL; // = reserved @@ -2154,7 +2154,7 @@ int main() { neorv32_cpu_csr_write(CSR_MIE, 1 << CSR_MIE_MSIE); // launch core 1 - tmp_a = (uint32_t)neorv32_smp_launch(1, core1_main, (uint8_t*)core1_stack, sizeof(core1_stack)); + tmp_a = (uint32_t)neorv32_smp_launch(core1_main, (uint8_t*)core1_stack, sizeof(core1_stack)); // wait for software interrupt (issued by core 1) in sleep mode neorv32_cpu_sleep(); @@ -2405,7 +2405,7 @@ void test_fail(void) { /**********************************************************************//** * Test code to be run on second CPU core **************************************************************************/ -void core1_main(void) { +int core1_main(void) { // trigger software interrupt of core0 neorv32_clint_msi_set(0); diff --git a/sw/lib/include/neorv32_cpu_csr.h b/sw/lib/include/neorv32_cpu_csr.h index d6432c31e..8db482a26 100644 --- a/sw/lib/include/neorv32_cpu_csr.h +++ b/sw/lib/include/neorv32_cpu_csr.h @@ -138,10 +138,8 @@ enum NEORV32_CSR_enum { CSR_MHPMCOUNTER15H = 0xb8f, /**< 0xb8f - mhpmcounter15h: Machine hardware performance monitor 15 counter high word */ /* inter-core communication */ - CSR_MXICCRXD = 0xbc0, /**< 0xbc0 - mxiccrxd: Machine ICC link RX data */ - CSR_MXICCTXD = 0xbc1, /**< 0xbc1 - mxicctxd: Machine ICC link TX data */ - CSR_MXICCSR0 = 0xbc2, /**< 0xbc1 - mxiccsr0: Machine ICC link status register 0 (#NEORV32_CSR_MXICCSR_enum) */ - CSR_MXICCSR1 = 0xbc3, /**< 0xbc1 - mxiccsr1: Machine ICC link status register 1 (#NEORV32_CSR_MXICCSR_enum) */ + CSR_MXICCSREG = 0xbc0, /**< 0xbc0 - mxiccsreg: Machine ICC status register (#)*/ + CSR_MXICCDATA = 0xbc1, /**< 0xbc1 - mxiccdata: Machine ICC RX/TX data register */ /* user counters and timers */ CSR_CYCLE = 0xc00, /**< 0xc00 - cycle: User cycle counter low word */ @@ -347,14 +345,11 @@ enum NEORV32_CSR_MXISA_enum { /**********************************************************************//** - * CPU mxiccsr CSR (r/w): Inter-core communication control and status (NEORV32-specific) + * CPU mxiccsreg CSR (r/w): Inter-core communication status register (NEORV32-specific) **************************************************************************/ -enum NEORV32_CSR_MXICCSR_enum { - CSR_MXICCSR_LINK_LSB = 0, /**< CPU mxiccsr CSR (0): link/hart select LSB (r/w)*/ - CSR_MXICCSR_LINK_MSB = 1, /**< CPU mxiccsr CSR (1): link/hart select MSB (r/w)*/ - - CSR_MXICCSR_TX_FREE = 30, /**< CPU mxiccsr CSR (30): Free space in selected link's TX FIFO (r/-)*/ - CSR_MXICCSR_RX_AVAIL = 31 /**< CPU mxiccsr CSR (31): Data available in selected link's RX FIFO (r/-)*/ +enum NEORV32_CSR_MXICCSREG_enum { + CSR_MXICCSREG_RX_AVAIL = 0, /**< CPU mxiccsreg CSR (0): Data available in link's RX FIFO (r/-)*/ + CSR_MXICCSREG_TX_FREE = 1 /**< CPU mxiccsreg CSR (1): Free space in link's TX FIFO (r/-)*/ }; diff --git a/sw/lib/include/neorv32_smp.h b/sw/lib/include/neorv32_smp.h index 4c44ea67a..b5753eb7a 100644 --- a/sw/lib/include/neorv32_smp.h +++ b/sw/lib/include/neorv32_smp.h @@ -8,7 +8,7 @@ /** * @file neorv32_smp.h - * @brief SMP HW driver header file. + * @brief Symmetric multiprocessing (SMP) library header file. */ #ifndef neorv32_smp_h @@ -19,7 +19,9 @@ * @name Prototypes **************************************************************************/ /**@{*/ -int neorv32_smp_launch(int hart_id, void (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes); +int neorv32_smp_launch(int (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes); +void neorv32_smp_icc_push(uint32_t data); +uint32_t neorv32_smp_icc_pop(void); /**@}*/ @@ -35,56 +37,48 @@ inline uint32_t __attribute__ ((always_inline)) neorv32_smp_whoami(void) { /**********************************************************************//** - * Get data from core via ICC link. + * Get data from other core via ICC link. * Check link status before #neorv32_smp_icc_avail(). * - * @param[in] hart_sel Source core. - * @return Data word (32-bit) received from selected core. + * @return Data word (32-bit) received from other core. **************************************************************************/ -inline uint32_t __attribute__ ((always_inline)) neorv32_smp_icc_get(int hart_sel) { +inline uint32_t __attribute__ ((always_inline)) neorv32_smp_icc_get(void) { - neorv32_cpu_csr_write(CSR_MXICCSR0, (uint32_t)hart_sel); - return neorv32_cpu_csr_read(CSR_MXICCRXD); + return neorv32_cpu_csr_read(CSR_MXICCDATA); } /**********************************************************************//** - * Send data to core via ICC link. + * Send data to other core via ICC link. * Check link status before #neorv32_smp_icc_free(). * - * @param[in] hart_sel Destination core. - * @param[in] data Data word (32-bit) to be send to selected core. + * @param[in] data Data word (32-bit) to be send to other core. **************************************************************************/ -inline void __attribute__ ((always_inline)) neorv32_smp_icc_put(int hart_sel, uint32_t data) { +inline void __attribute__ ((always_inline)) neorv32_smp_icc_put(uint32_t data) { - neorv32_cpu_csr_write(CSR_MXICCSR0, (uint32_t)hart_sel); - neorv32_cpu_csr_write(CSR_MXICCTXD, data); + neorv32_cpu_csr_write(CSR_MXICCDATA, data); } /**********************************************************************//** * Check if ICC link data is available. * - * @param[in] hart_sel Source core. * @return 0 = no data available, nonzero = data available. **************************************************************************/ -inline int __attribute__ ((always_inline)) neorv32_smp_icc_avail(int hart_sel) { +inline int __attribute__ ((always_inline)) neorv32_smp_icc_avail(void) { - neorv32_cpu_csr_write(CSR_MXICCSR0, (uint32_t)hart_sel); - return neorv32_cpu_csr_read(CSR_MXICCSR0) & (1 << CSR_MXICCSR_RX_AVAIL); + return neorv32_cpu_csr_read(CSR_MXICCSREG) & (1 << CSR_MXICCSREG_RX_AVAIL); } /**********************************************************************//** * Check if free space in ICC link. * - * @param[in] hart_sel Destination core. * @return 0 = no free space available, nonzero = free space available. **************************************************************************/ -inline int __attribute__ ((always_inline)) neorv32_smp_icc_free(int hart_sel) { +inline int __attribute__ ((always_inline)) neorv32_smp_icc_free(void) { - neorv32_cpu_csr_write(CSR_MXICCSR0, (uint32_t)hart_sel); - return neorv32_cpu_csr_read(CSR_MXICCSR0) & (1 << CSR_MXICCSR_TX_FREE); + return neorv32_cpu_csr_read(CSR_MXICCSREG) & (1 << CSR_MXICCSREG_TX_FREE); } #endif // neorv32_smp_h diff --git a/sw/lib/source/neorv32_smp.c b/sw/lib/source/neorv32_smp.c index 1eb0e1d39..ec790fb0f 100644 --- a/sw/lib/source/neorv32_smp.c +++ b/sw/lib/source/neorv32_smp.c @@ -8,36 +8,40 @@ /** * @file neorv32_smp.c - * @brief SMP HW driver source file. + * @brief Symmetric multiprocessing (SMP) library source file. */ #include /**********************************************************************//** - * Configure and start SMP core. + * Configure and start SMP core 1. * * @warning This function can be executed on core 0 only. * - * @param[in] hart_id Hart/core select. - * @param[in] entry_point Core's main function (must be of type "void entry_point(void)"). - * @param[in] stack_memory Pointer to beginning of core's stack memory array. Should be at least 512 bytes. - * @param[in] stack_size_bytes Core's stack size in bytes. - * @return 0 if launching succeeded. -1 if invalid hart ID or CLINT not available. -2 if core is not responding. + * @param[in] entry_point Core1's main function; + * must be of type "int entry_point(void)". + * + * @param[in] stack_memory Pointer to beginning of core1's stack memory array. + * Should be at least 512 bytes. + * + * @param[in] stack_size_bytes Core1's stack size in bytes. + * + * @return 0 if launching succeeded. -1 if invalid hart ID or CLINT not available. + * -2 if core1 is not responding. **************************************************************************/ -int neorv32_smp_launch(int hart_id, void (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes) { +int neorv32_smp_launch(int (*entry_point)(void), uint8_t* stack_memory, size_t stack_size_bytes) { // sanity checks - if ((neorv32_cpu_csr_read(CSR_MHARTID) != 0) || // this can be executed on core 0 only - (hart_id == 0) || // we cannot launch core 0 - (hart_id > (neorv32_sysinfo_get_numcores()-1)) || // selected core not available + if ((neorv32_cpu_csr_read(CSR_MHARTID) != 0) || // this can be executed on core0 only + (neorv32_sysinfo_get_numcores() < 2) || // core1 not available (neorv32_clint_available() == 0)) { // we need the CLINT return -1; } // drain input queue from selected core - while (neorv32_smp_icc_avail(hart_id)) { - neorv32_smp_icc_get(hart_id); + while (neorv32_smp_icc_avail()) { + neorv32_smp_icc_get(); } // align end of stack to 16-bytes according to the RISC-V ABI (#1021) @@ -45,24 +49,52 @@ int neorv32_smp_launch(int hart_id, void (*entry_point)(void), uint8_t* stack_me // send launch configuration const uint32_t magic_number = 0xffab4321u; - neorv32_smp_icc_put(hart_id, magic_number); // identifies valid configuration - neorv32_smp_icc_put(hart_id, stack_top); // top of core's stack - neorv32_smp_icc_put(hart_id, (uint32_t)entry_point); // entry point + neorv32_smp_icc_put(magic_number); // identifies valid configuration + neorv32_smp_icc_put(stack_top); // top of core1's stack + neorv32_smp_icc_put((uint32_t)entry_point); // entry point - // start core by triggering its software interrupt - neorv32_clint_msi_set(hart_id); + // start core1 by triggering its software interrupt + neorv32_clint_msi_set(1); // wait for start acknowledge int cnt = 0; while (1) { - if (neorv32_smp_icc_avail(hart_id)) { - if (neorv32_smp_icc_get(hart_id) == magic_number) { + if (neorv32_smp_icc_avail()) { + if (neorv32_smp_icc_get() == magic_number) { return 0; } } if (cnt > 1000) { - return -2; // timeout; core did not respond + return -2; // timeout; core1 did not respond } cnt++; } } + + +/**********************************************************************//** + * Send data to other core via ICC link (blocking). + * + * @warning This functions is blocking until data has been send. + * + * @param[in] data Data word (32-bit) to be send to other core. + **************************************************************************/ +void neorv32_smp_icc_push(uint32_t data) { + + while (neorv32_smp_icc_free() == 0); // wait for free FIFO space + neorv32_smp_icc_put(data); +} + + +/**********************************************************************//** + * Get data from other core via ICC link (blocking). + * + * @warning This functions is blocking until data has been received. + * + * @return Data word (32-bit) received from other core. + **************************************************************************/ +uint32_t neorv32_smp_icc_pop(void) { + + while (neorv32_smp_icc_avail() == 0); // wait until FIFO data is available + return neorv32_smp_icc_get(); +}