diff --git a/pythondata_cpu_mor1kx/verilog/.gitignore b/pythondata_cpu_mor1kx/verilog/.gitignore new file mode 100644 index 0000000..397b4a7 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/.gitignore @@ -0,0 +1 @@ +*.log diff --git a/pythondata_cpu_mor1kx/verilog/.travis.yml b/pythondata_cpu_mor1kx/verilog/.travis.yml new file mode 100755 index 0000000..e13c409 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/.travis.yml @@ -0,0 +1,42 @@ +language: c +cache: ccache +sudo: true +dist: trusty + +services: + - docker + +before_install: + - docker pull librecores/librecores-ci-openrisc + - docker images + +script: + - docker run --rm -v $(pwd):/src -e "JOB=$JOB" -e "SIM=$SIM" -e "PIPELINE=$PIPELINE" -e "EXPECTED_FAILURES=$EXPECTED_FAILURES" -e "EXTRA_CORE_ARGS=$EXTRA_CORE_ARGS" librecores/librecores-ci-openrisc /src/.travis/test.sh + +matrix: + fast_finish: true + +jobs: + allow_failures: + - env: JOB=or1k-tests SIM=icarus PIPELINE=ESPRESSO + include: + - stage: verilator + env: JOB=verilator + - stage: testing + env: JOB=or1k-tests SIM=icarus PIPELINE=CAPPUCCINO EXPECTED_FAILURES="or1k-cy" + - stage: testing + env: JOB=or1k-tests SIM=icarus PIPELINE=CAPPUCCINO EXPECTED_FAILURES="or1k-cy" EXTRA_CORE_ARGS="--feature_dmmu NONE" + - stage: testing + env: JOB=or1k-tests SIM=icarus PIPELINE=CAPPUCCINO EXPECTED_FAILURES="or1k-cy or1k-dsxinsn" EXTRA_CORE_ARGS="--feature_immu NONE" + - stage: testing + env: JOB=or1k-tests SIM=icarus PIPELINE=CAPPUCCINO EXPECTED_FAILURES="or1k-cy" EXTRA_CORE_ARGS="--feature_datacache NONE" + - stage: testing + env: JOB=or1k-tests SIM=icarus PIPELINE=CAPPUCCINO EXPECTED_FAILURES="or1k-cy" EXTRA_CORE_ARGS="--feature_instructioncache NONE" + - stage: testing + env: JOB=or1k-tests SIM=icarus PIPELINE=CAPPUCCINO EXPECTED_FAILURES="or1k-cy" EXTRA_CORE_ARGS="--feature_debugunit NONE" + - stage: testing + env: JOB=or1k-tests SIM=icarus PIPELINE=CAPPUCCINO EXPECTED_FAILURES="or1k-cy or1k-cmov" EXTRA_CORE_ARGS="--feature_cmov NONE" + - stage: testing + env: JOB=or1k-tests SIM=icarus PIPELINE=CAPPUCCINO EXPECTED_FAILURES="or1k-cy or1k-ext" EXTRA_CORE_ARGS="--feature_ext NONE" + - stage: testing + env: JOB=or1k-tests SIM=icarus PIPELINE=ESPRESSO diff --git a/pythondata_cpu_mor1kx/verilog/.travis/run-or1k-tests.sh b/pythondata_cpu_mor1kx/verilog/.travis/run-or1k-tests.sh new file mode 100755 index 0000000..c86ad76 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/.travis/run-or1k-tests.sh @@ -0,0 +1,24 @@ +#!/bin/sh + +set -x + +PATH="$HOME/tools/or1k-elf/bin:${PATH}" +PATH="$HOME/tools/bin:${PATH}" +export PATH + +# allow overriding root dir if we aren't running in travis +if [ -z $OR1K_TESTS_ROOT ] ; then + OR1K_TESTS_ROOT=$HOME/src/tools/or1k-tests +fi + +cd $OR1K_TESTS_ROOT/native +export CORE_ARGS="--pipeline=$PIPELINE $EXTRA_CORE_ARGS" +export TARGET=mor1kx_tb +export TARGET_ARGS="--tool=$SIM" +./runtests.sh $@ +result=$? + +if [ $result != 0 ] ; then + cat runtests.log +fi +exit $result diff --git a/pythondata_cpu_mor1kx/verilog/.travis/run-verilator.sh b/pythondata_cpu_mor1kx/verilog/.travis/run-verilator.sh new file mode 100755 index 0000000..2a8d310 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/.travis/run-verilator.sh @@ -0,0 +1,5 @@ +#!/bin/sh + +export PATH=$HOME/tools/bin:$PATH + +verilator --lint-only rtl/verilog/*.v +incdir+rtl/verilog diff --git a/pythondata_cpu_mor1kx/verilog/.travis/test.sh b/pythondata_cpu_mor1kx/verilog/.travis/test.sh new file mode 100755 index 0000000..93079ef --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/.travis/test.sh @@ -0,0 +1,19 @@ +#!/bin/bash + +export HOME=/tmp + +# Setup fusesoc and add the cores required by or1k-tests +fusesoc init -y +fusesoc library add mor1kx-generic https://github.com/stffrdhrn/mor1kx-generic.git +fusesoc library add intgen https://github.com/stffrdhrn/intgen.git +fusesoc library add mor1kx /src + +cd $HOME/src/tools + +cd /src + +echo "Running Job $JOB $SIM $PIPELINE" +echo "Expected failures: $EXPECTED_FAILURES" +echo "Extra core args: $EXTRA_CORE_ARGS" + +./.travis/run-${JOB}.sh diff --git a/pythondata_cpu_mor1kx/verilog/Jenkinsfile b/pythondata_cpu_mor1kx/verilog/Jenkinsfile new file mode 100644 index 0000000..bdfb9af --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/Jenkinsfile @@ -0,0 +1,83 @@ +openriscPipeline { + + yosysReport { + core 'mor1kx' + target 'synth' + logPath 'build/mor1kx_*/synth-icestorm/yosys.log' + } + + job('verilator') { + job 'verilator' + } + + job('icarus-cappuccino') { + job 'or1k-tests' + sim 'icarus' + pipeline 'CAPPUCCINO' + expectedFailures 'or1k-cy' + } + + job('icarus-cappuccino-dmmu-none') { + job 'or1k-tests' + sim 'icarus' + pipeline 'CAPPUCCINO' + expectedFailures 'or1k-cy' + extraCoreArgs '--feature_dmmu NONE' + } + + job('icarus-cappuccino-immu-none') { + job 'or1k-tests' + sim 'icarus' + pipeline 'CAPPUCCINO' + expectedFailures 'or1k-cy or1k-dsxinsn' + extraCoreArgs '--feature_immu NONE' + } + + job('icarus-cappuccino-datacache-none') { + job 'or1k-tests' + sim 'icarus' + pipeline 'CAPPUCCINO' + expectedFailures 'or1k-cy' + extraCoreArgs '--feature_datacache NONE' + } + + job('icarus-cappuccino-instructioncache-none') { + job 'or1k-tests' + sim 'icarus' + pipeline 'CAPPUCCINO' + expectedFailures 'or1k-cy' + extraCoreArgs '--feature_instructioncache NONE' + } + + job('icarus-cappuccino-debugunit-none') { + job 'or1k-tests' + sim 'icarus' + pipeline 'CAPPUCCINO' + expectedFailures 'or1k-cy' + extraCoreArgs '--feature_debugunit NONE' + } + + job('icarus-cappuccino-cmov-none') { + job 'or1k-tests' + sim 'icarus' + pipeline 'CAPPUCCINO' + expectedFailures 'or1k-cy or1k-cmov' + extraCoreArgs '--feature_cmov NONE' + } + + job('icarus-cappuccino-ext-none') { + job 'or1k-tests' + sim 'icarus' + pipeline 'CAPPUCCINO' + expectedFailures 'or1k-cy or1k-ext' + extraCoreArgs '--feature_ext NONE' + } + +// TODO: Fix failing Job +// +// job('icarus-espresso') { +// job 'or1k-tests' +// sim 'icarus' +// pipeline 'ESPRESSO' +// } +} diff --git a/pythondata_cpu_mor1kx/verilog/LICENSE b/pythondata_cpu_mor1kx/verilog/LICENSE new file mode 100644 index 0000000..da50884 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/LICENSE @@ -0,0 +1,370 @@ +Open Hardware Description License Version 1.0 +(Based on the MPL 2.0 RC2) +======================================================== + +1. Definitions +-------------- + +1.1. "Contributor" + means each individual or legal entity that creates, contributes to + the creation of, or owns a Covered Hardware Description. + +1.2. "Contributor Version" + means the combination of the Contributions of others (if any) used + by a Contributor and that particular Contributor's Contribution. + +1.3. "Contribution" + means Covered Hardware Description of a particular Contributor. + +1.4. "Covered Hardware Description" + means Source Code Form to which the initial Contributor has attached + the notice in Exhibit A, the Processed Form of such Source Code + Form, and Modifications of such Source Code Form, in each case + including portions thereof. + +1.5. "Incompatible With Secondary Licenses" + means that the initial Contributor has attached the notice described in + Exhibit B to the Covered Hardware Description + +1.6. "Processed Form" + means any form of the work other than Source Code Form. + +1.7. "Larger Work" + means a work that combines a Covered Hardware Description with code in a + separate file or files not governed by the terms of this License. + +1.8. "License" + means this document. + +1.9. "Licensable" + means having the right to grant, to the maximum extent possible, + whether at the time of the initial grant or subsequently, any and + all of the rights conveyed by this License. + +1.10. "Modifications" + means any of the following: + + (a) any file in Source Code Form that results from an addition to, + deletion from, or modification of the contents of a Covered + Hardware Description; or + + (b) any new file in Source Code Form that contains any Covered + Hardware Description Source. + +1.11. "Patent Claims" of a Contributor + means any patent claim(s), including without limitation, method, + process, and apparatus claims, in any patent Licensable by such + Contributor that would be infringed, but for the grant of the + License, by the making, using, selling, offering for sale, having + made, import, or transfer of either its Contributions or its + Contributor Version. + +1.12. "Secondary License" + means either the GNU General Public License, Version 2.0 or later, + the GNU Lesser General Public License, Version 2.1 or later, or the + GNU Affero General Public License, Version 3.0 or later, or the + TAPR Open Hardware License, Version 1.0 or later, or the CERN OHL, + Verstion 1.1 or later. + +1.13. "Source Code Form" + means the form of the work preferred for making modifications. + +1.14. "You" (or "Your") + means an individual or a legal entity exercising rights under this + License. For legal entities, "You" includes any entity that + controls, is controlled by, or is under common control with You. For + purposes of this definition, "control" means (a) the power, direct + or indirect, to cause the direction or management of such entity, + whether by contract or otherwise, or (b) ownership of more than + fifty percent (50%) of the outstanding shares or beneficial + ownership of such entity. + +2. License Grants and Conditions +-------------------------------- + +2.1. Grants + +Each Contributor hereby grants You a world-wide, royalty-free, +non-exclusive license: + +(a) under intellectual property rights (other than patent or trademark) + Licensable by such Contributor to use, reproduce, make available, + modify, display, perform, distribute, and otherwise exploit its + Contributions, either on an unmodified basis, with Modifications, or + as part of a Larger Work; and + +(b) under Patent Claims of such Contributor to make, use, sell, offer + for sale, have made, import, and otherwise transfer either its + Contributions or its Contributor Version. + +2.2. Effective Date + +The licenses granted in Section 2.1 with respect to any Contribution +become effective for each Contribution on the date the Contributor first +distributes such Contribution. + +2.3. Limitations on Grant Scope + +The licenses granted in this Section 2 are the only rights granted under +this License. No additional rights or licenses will be implied from the +distribution or licensing of Covered Hardware Description under this License. +Notwithstanding Section 2.1(b) above, no patent license is granted by a +Contributor: + +(a) for any code that a Contributor has removed from Covered Hardware + Description; or + +(b) for infringements caused by: (i) Your and any other third party's + modifications of a Covered Hardware Description, or (ii) the combination + of its Contributions with other Source (except as part of its Contributor + Version); or + +(c) under Patent Claims infringed by a Covered Hardware Description in the + absence of its Contributions. + +This License does not grant any rights in the trademarks, service marks, +or logos of any Contributor (except as may be necessary to comply with +the notice requirements in Section 3.4). + +2.4. Subsequent Licenses + +No Contributor makes additional grants as a result of Your choice to +distribute the Covered Hardware Description under a subsequent version of this +License (see Section 10.2) or under the terms of a Secondary License (if +permitted under the terms of Section 3.3). + +2.5. Representation + +Each Contributor represents that the Contributor believes its +Contributions are its original creation(s) or it has sufficient rights +to grant the rights to its Contributions conveyed by this License. + +2.6. Fair Use + +This License is not intended to limit any rights You have under +applicable copyright doctrines of fair use, fair dealing, or other +equivalents. + +2.7. Conditions + +Sections 3.1, 3.2, 3.3, and 3.4 are conditions of the licenses granted +in Section 2.1. + +3. Responsibilities +------------------- + +3.1. Distribution of Source Form + +All distribution of Covered Hardware Description in Source Code Form, +including any Modifications that You create or to which You contribute, must be +under the terms of this License. You must inform recipients that the Source +Code Form of the Covered Hardware Description is governed by the terms of this +License, and how they can obtain a copy of this License. You may not +attempt to alter or restrict the recipients' rights in the Source Code +Form. + +3.2. Distribution of Processed Form + +If You distribute Covered Hardware Description in Processed Form then: + +(a) such Covered Hardware Description must also be made available in Source + Code Form, as described in Section 3.1, and You must inform recipients of + the Processed Form how they can obtain a copy of such Source Code + Form by reasonable means in a timely manner, at a charge no more + than the cost of distribution to the recipient; and + +(b) You may distribute such Processed Form under the terms of this + License, or sublicense it under different terms, provided that the + license for the Processed Form does not attempt to limit or alter + the recipients' rights in the Source Code Form under this License. + +3.3. Distribution of a Larger Work + +You may create and distribute a Larger Work under terms of Your choice, +provided that You also comply with the requirements of this License for +the Covered Hardware Description. If the Larger Work is a combination of a +Covered Hardware Description with a work governed by a Secondary License, and +the Covered Hardware Description is not Incompatible With Secondary Licenses, +this License permits You to additionally distribute such Covered Hardware +Description under the terms of that Secondary License, so that the recipient of +the Larger Work may, at their option, further distribute the Covered Hardware +Description under the terms of either this License or that Secondary License. + +3.4. Notices + +You may not remove or alter the substance of any license notices +(including copyright notices, patent notices, disclaimers of warranty, +or limitations of liability) contained within the Source Code Form of +the Covered Hardware Description, except that You may alter any license notices +to the extent required to remedy known factual inaccuracies. + +3.5. Application of Additional Terms + +You may choose to offer, and to charge a fee for, warranty, support, +indemnity or liability obligations to one or more recipients of a Covered +Hardware Description. However, You may do so only on Your own behalf, and not +on behalf of any Contributor. You must make it absolutely clear that any +such warranty, support, indemnity, or liability obligation is offered by +You alone, and You hereby agree to indemnify every Contributor for any +liability incurred by such Contributor as a result of warranty, support, +indemnity or liability terms You offer. You may include additional +disclaimers of warranty and limitations of liability specific to any +jurisdiction. + +4. Inability to Comply Due to Statute or Regulation +--------------------------------------------------- + +If it is impossible for You to comply with any of the terms of this +License with respect to some or all of the Covered Hardware Description due to +statute, judicial order, or regulation then You must: (a) comply with +the terms of this License to the maximum extent possible; and (b) +describe the limitations and the code they affect. Such description must +be placed in a text file included with all distributions of the Covered +Hardware Description under this License. Except to the extent prohibited by +statute or regulation, such description must be sufficiently detailed for a +recipient of ordinary skill to be able to understand it. + +5. Termination +-------------- + +5.1. The rights granted under this License will terminate automatically +if You fail to comply with any of its terms. However, if You become +compliant, then the rights granted under this License from a particular +Contributor are reinstated (a) provisionally, unless and until such +Contributor explicitly and finally terminates Your grants, and (b) on an +ongoing basis, if such Contributor fails to notify You of the +non-compliance by some reasonable means prior to 60 days after You have +come back into compliance. Moreover, Your grants from a particular +Contributor are reinstated on an ongoing basis if such Contributor +notifies You of the non-compliance by some reasonable means, this is the +first time You have received notice of non-compliance with this License +from such Contributor, and You become compliant prior to 30 days after +Your receipt of the notice. + +5.2. If You initiate litigation against any entity by asserting a patent +infringement claim (excluding declaratory judgment actions, +counter-claims, and cross-claims) alleging that a Contributor Version +directly or indirectly infringes any patent, then the rights granted to +You by any and all Contributors for the Covered Hardware Description under +Section 2.1 of this License shall terminate. + +5.3. In the event of termination under Sections 5.1 or 5.2 above, all +end user license agreements (excluding distributors and resellers) which +have been validly granted by You or Your distributors under this License +prior to termination shall survive termination. + +************************************************************************ +* * +* 6. Disclaimer of Warranty * +* ------------------------- * +* * +* The Covered Hardware Description is provided under this License on * +* an "as is" basis, without warranty of any kind, either expressed, * +* implied, or statutory, including, without limitation, warranties * +* that the Covered Hardware Description is free of defects, * +* merchantable, fit for a particular purpose or non-infringing. The * +* entire risk as to the quality and performance of the Covered * +* Hardware Description is with You. Should any Covered Hardware * +* Description prove defective in any respect, You (not any * +* Contributor) assume the cost of any necessary servicing, repair, or * +* correction. This disclaimer of warranty constitutes an essential * +* part of this License. No use of any Covered Hardware Description is * +* authorized under this License except under this disclaimer. * +* * +************************************************************************ + +************************************************************************ +* * +* 7. Limitation of Liability * +* -------------------------- * +* * +* Under no circumstances and under no legal theory, whether tort * +* (including negligence), contract, or otherwise, shall any * +* Contributor, or anyone who distributes Covered Hardware Description * +* as permitted above, be liable to You for any direct, indirect, * +* special, incidental, or consequential damages of any character * +* including, without limitation, damages for lost profits, loss of * +* goodwill, work stoppage, computer failure or malfunction, or any * +* and all other commercial damages or losses, even if such party * +* shall have been informed of the possibility of such damages. This * +* limitation of liability shall not apply to liability for death or * +* personal injury resulting from such party's negligence to the * +* extent applicable law prohibits such limitation. Some * +* jurisdictions do not allow the exclusion or limitation of * +* incidental or consequential damages, so this exclusion and * +* limitation may not apply to You. * +* * +************************************************************************ + +8. Litigation +------------- + +Any litigation relating to this License may be brought only in the +courts of a jurisdiction where the defendant maintains its principal +place of business and such litigation shall be governed by laws of that +jurisdiction, without reference to its conflict-of-law provisions. +Nothing in this Section shall prevent a party's ability to bring +cross-claims or counter-claims. + +9. Miscellaneous +---------------- + +This License represents the complete agreement concerning the subject +matter hereof. If any provision of this License is held to be +unenforceable, such provision shall be reformed only to the extent +necessary to make it enforceable. Any law or regulation which provides +that the language of a contract shall be construed against the drafter +shall not be used to construe this License against a Contributor. + +10. Versions of the License +--------------------------- + +10.1. New Versions + +Julius Baxter is the license steward. Except as provided in Section +10.3, no one other than the license steward has the right to modify or +publish new versions of this License. Each version will be given a +distinguishing version number. + +10.2. Effect of New Versions + +You may distribute the Covered Hardware Description under the terms of the +version of the License under which You originally received the Covered Hardware +Description, or under the terms of any subsequent version published by the +license steward. + +10.3. Modified Versions + +If you create designs not governed by this License, and you want to +create a new license for such designs, you may create and use a +modified version of this License if you rename the license and remove +any references to the name of the license steward (except to note that +such modified license differs from this License). + +10.4. Distributing Source Code Form that is Incompatible With Secondary +Licenses + +If You choose to distribute Source Code Form that is Incompatible With +Secondary Licenses under the terms of this version of the License, the +notice described in Exhibit B of this License must be attached. + +Exhibit A - Source Code Form License Notice +------------------------------------------- + + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + +If it is not possible or desirable to put the notice in a particular +file, then You may include the notice in a location (such as a LICENSE +file in a relevant directory) where a recipient would be likely to look +for such a notice. + +You may add additional accurate notices of copyright ownership. + +Exhibit B - "Incompatible With Secondary Licenses" Notice +--------------------------------------------------------- + + This Source Code Form is "Incompatible With Secondary Licenses", as + defined by the Open Hardware Description License, v. 1.0. diff --git a/pythondata_cpu_mor1kx/verilog/README.md b/pythondata_cpu_mor1kx/verilog/README.md new file mode 100644 index 0000000..efb75f1 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/README.md @@ -0,0 +1,198 @@ +# *mor1kx* - an OpenRISC processor IP core + +## The Basics + +This repository contains an OpenRISC 1000 compliant processor IP core. + +It is written in Verilog HDL. + +This repository only contains the IP source code and some documentation. For +a verification environment, please see other projects. + +## Documentation + +The documentation is located in the [doc/](doc/) directory. + +It is in asciidoc format, and there's a makefile to build HTML or PDF documentation. To +build the HTML documentation, run the following in the [doc/](doc/) directory: + +``` + $ make html +``` + +## License + +This project is licensed under the Open Hardware Description License (OHDL). For +details please see the [LICENSE](./LICENSE) file or http://juliusbaxter.net/ohdl/ + +## Configuration + +The mor1kx CPU is very configurable to allow you to customize the core to your +exact needs. The following tables explain how each parameter can be configured, +what the configuration does and why you might want to use it. + +**Note:** *The **Usage?** field below indicates if a certain application (such +as running Linux) requires a setting different than the default value.* + +### Basic parameters + +|Parameter|Description|Default|Values|Usage?| +|---------|-----------|-------|------|------| +|OPTION_OPERAND_WIDTH|Specify the CPU data and address widths|32|32, 64, etc| | +|OPTION_CPU0|Specify the CPU pipeline core|`CAPPUCCINO`|`CAPPUCCINO` `ESPRESSO` `PRONTO_ESPRESSO`|`CAPPUCCINO` for Linux| +|OPTION_RESET_PC|Specify the program counter upon reset|`0x100`|n| | + +### Caching parameters + +|Parameter|Description|Default|Values|Usage?| +|---------|-----------|-------|------|------| +|FEATURE_DATACACHE|Enable memory access data caching|`NONE`|`ENABLED` `NONE`| | +|OPTION_DCACHE_BLOCK_WIDTH|Specify the address width of a cache block|5|`n`| | +|OPTION_DCACHE_SET_WIDTH|Specify the set address width|9|`n`| | +|OPTION_DCACHE_WAYS|Specify the number of blocks per set|2|`n`| | +|OPTION_DCACHE_LIMIT_WIDTH|Specify the maximum address width|32|`n`|`31` for Linux to allow uncached device access| +|OPTION_DCACHE_SNOOP|Enable bus snooping for cache coherency|`NONE`|`ENABLED` `NONE`|Linux SMP| +|FEATURE_INSTRUCTIONCACHE|Enable memory access instruction caching|`NONE`|`ENABLED` `NONE`| | +|OPTION_ICACHE_BLOCK_WIDTH|Specify the address width of a cache block|5|`n`| | +|OPTION_ICACHE_SET_WIDTH|Specify the set address width|9|`n`| | +|OPTION_ICACHE_WAYS|Specify the number of blocks per set|2|`n`| | +|OPTION_ICACHE_LIMIT_WIDTH|Specify the maximum address width|32|`n`| | + +### Memory Management Unit (MMU) parameters + +|Parameter|Description|Default|Values|Usage?| +|---------|-----------|-------|------|------| +|FEATURE_DMMU|Enable the data bus MMU|`NONE`|`ENABLED` `NONE`|Linux expects `ENABLED`| +|FEATURE_DMMU_HW_TLB_RELOAD|Enable hardware TLB reload|`NONE`|`ENABLED` `NONE`|Linux expects `NONE`| +|OPTION_DMMU_SET_WIDTH|Specify the set address width|6|`n`| | +|OPTION_DMMU_WAYS|Specify the number of ways per set|1|`n`| | +|FEATURE_IMMU|Enable the instruction bus MMU|`NONE`|`ENABLED` `NONE`|Linux expects `ENABLED`| +|FEATURE_IMMU_HW_TLB_RELOAD|Enable hardware TLB reload|`NONE`|`ENABLED` `NONE`|Linux expects `NONE`| +|OPTION_IMMU_SET_WIDTH|Specify the set address width|6|`n`| | +|OPTION_IMMU_WAYS|Specify the number of ways per set|1|`n`| | + +### System bus parameters + +|Parameter|Description|Default|Values|Usage?| +|---------|-----------|-------|------|------| +|FEATURE_STORE_BUFFER|Enable the load store unit store buffer|`ENABLED`|`ENABLED` `NONE`|Large footprint| +|OPTION_STORE_BUFFER_DEPTH_WIDTH|Specify the load store unit store buffer depth|8|1-n| | +|BUS_IF_TYPE|Specify the bus interface type|`WISHBONE32`|`WISHBONE32`| | +|IBUS_WB_TYPE|Specify the Instruction bus interface type option|`B3_READ_BURSTING`|`B3_READ_BURSTING` `B3_REGISTERED_FEEDBACK` `CLASSIC`| | +|DBUS_WB_TYPE|Specify the Data bus interface type option|`CLASSIC`|`B3_READ_BURSTING` `B3_REGISTERED_FEEDBACK` `CLASSIC`| | + +### Hardware unit configuration parameters + +|Parameter|Description|Default|Values|Usage?| +|---------|-----------|-------|------|------| +|FEATURE_TRACEPORT_EXEC|Enable the traceport hardware interface|`NONE`|`ENABLED` `NONE`|Verilator| +|FEATURE_DEBUGUNIT|Enable hardware breakpoints and advanced debug unit interface|`NONE`|`ENABLED` `NONE`|OpenOCD| +|FEATURE_PERFCOUNTERS|Enable the performance counters unit|`NONE`|`ENABLED` `NONE`| | +|OPTION_PERFCOUNTERS_NUM|Specify the number of performance counters to generate|0|n| | +|FEATURE_TIMER|Enable the internal OpenRISC timer|`ENABLED`|`ENABLED` `NONE`| | +|FEATURE_PIC|Enable the internal OpenRISC PIC|`ENABLED`|`ENABLED` `NONE`| | +|OPTION_PIC_TRIGGER|Specify the PIC trigger mode|`LEVEL`|`LEVEL` `EDGE` `LATCHED_LEVEL`| | +|OPTION_PIC_NMI_WIDTH|Specify non maskable interrupts width, starting at 0, these interrupts will not be reset or maskable|0|0-32| | +|OPTION_RF_CLEAR_ON_INIT|Enable clearing all registers on initialization|0|0, 1| | +|OPTION_RF_NUM_SHADOW_GPR|Specify the number of shadow register files|0|0-16|Set `>=1` for Linux SMP| +|OPTION_RF_ADDR_WIDTH|Specify the address width of the register file|5|5| | +|OPTION_RF_WORDS|Specify the number of registers in the register file|32|32| | +|FEATURE_FASTCONTEXTS|Enable fast context switching of register sets|`NONE`|`ENABLED` `NONE`| | +|FEATURE_MULTICORE|Enable the `coreid` and `numcores` SPR registers|`NONE`|`ENABLED` `NONE`|Linux SMP| +|FEATURE_FPU|Enable the FPU, for cappuccino pipeline only|`NONE`|`ENABLED` `NONE`| | +|OPTION_FTOI_ROUNDING|Select rounding behavior for `lf.ftoi.s` instruction|`CPP`|`CPP` `IEEE`|GCC9| +|FEATURE_BRANCH_PREDICTOR|Select the branch predictor implementation|`SIMPLE`|`SIMPLE` `GSHARE` `SAT_COUNTER`| | + +**Note:** *C/C++ double to integer conversion assumes truncation (rounding `toward zero`). +The default (`CPP`) value of OPTION_FTOI_ROUNDING forces `toward zero` rounding mode exclusively for +`lf.ftoi.s` instruction regardless of `rounding mode` bits of FPCSR. While with `IEEE` value +`lf.ftoi.s` performs conversion in according with `rounding mode` bits of FPCSR. And these bits are +`nearest-even` rounding mode by default. All other floating point instructions always perform rounding +in according with `rounding mode` bits of FPCSR.* + +### Exception handling options + +|Parameter|Description|Default|Values|Usage?| +|---------|-----------|-------|------|------| +|FEATURE_DSX|Enable setting the `SR[DSX]` flag when raising exceptions in a delay slot|`ENABLED`|`ENABLED` `NONE`| | +|FEATURE_RANGE|Enable checking and raising range exceptions|`ENABLED`|`ENABLED` `NONE`| | +|FEATURE_OVERFLOW|Enable checking and raising overflow exceptions|`ENABLED`|`ENABLED` `NONE`| | + +### ALU configuration options + +|Parameter|Description|Default|Values|Usage?| +|---------|-----------|-------|------|------| +|FEATURE_MULTIPLIER|Specify the multiplier implementation|`THREESTAGE`|`THREESTAGE` `PIPELINED` `SERIAL` `SIMULATION` `NONE`| | +|FEATURE_DIVIDER|Specify the divider implementation|`SERIAL`|`SERIAL` `SIMULATION` `NONE`| | +|OPTION_SHIFTER|Specify the shifter implementation|`BARREL`|`BARREL` `SERIAL`| | +|FEATURE_CARRY_FLAG|Enable checking and setting the carry flag|`ENABLED`|`ENABLED` `NONE`| | + +### Instruction enabling options + +|Parameter|Description|Default|Values|Usage?| +|---------|-----------|-------|------|------| +|FEATURE_MAC|Enable the `l.mac*` multiply accumulate instructions|`NONE`|`ENABLED` `NONE`| | +|FEATURE_SYSCALL|Enable the 'l.sys` OS syscall instruction|`ENABLED`|`ENABLED` `NONE`| | +|FEATURE_TRAP|Enable the `l.trap` instruction|`ENABLED`|`ENABLED` `NONE`|GDB| +|FEATURE_ADDC|Enable the `l.addc` add with `carry` flag instruction|`ENABLED`|`ENABLED` `NONE`| | +|FEATURE_SRA|Enable the `l.sra` shirt right arithmetic instruction|`ENABLED`|`ENABLED` `NONE`| | +|FEATURE_ROR|Enable the `l.ror*` rotate right instructions|`NONE`|`ENABLED` `NONE`| | +|FEATURE_EXT|Enable the `l.ext*` sign extend instructions|`NONE`|`ENABLED` `NONE`| | +|FEATURE_CMOV|Enable the `l.cmov` conditional move instruction|`ENABLED`|`ENABLED` `NONE`| | +|FEATURE_FFL1|Enable the `l.f[fl]1` find first/last set bit instructions|`ENABLED`|`ENABLED` `NONE`|Linux| +|FEATURE_ATOMIC|Enable the `l.lwa` and `l.swa` atomic instructions|`ENABLED`|`ENABLED` `NONE`|Linux SMP| +|FEATURE_CUST1|Enable the `l.cust*` custom instruction|`NONE`|`ENABLED` `NONE`| | +|FEATURE_CUST2|Enable the `l.cust*` custom instruction|`NONE`|`ENABLED` `NONE`| | +|FEATURE_CUST3|Enable the `l.cust*` custom instruction|`NONE`|`ENABLED` `NONE`| | +|FEATURE_CUST4|Enable the `l.cust*` custom instruction|`NONE`|`ENABLED` `NONE`| | +|FEATURE_CUST5|Enable the `l.cust*` custom instruction|`NONE`|`ENABLED` `NONE`| | +|FEATURE_CUST6|Enable the `l.cust*` custom instruction|`NONE`|`ENABLED` `NONE`| | +|FEATURE_CUST7|Enable the `l.cust*` custom instruction|`NONE`|`ENABLED` `NONE`| | +|FEATURE_CUST8|Enable the `l.cust*` custom instruction|`NONE`|`ENABLED` `NONE`| | + +## Testing and Continuous Integration + +A CPU core cannot be trusted without a full set of verification testing. The `mor1kx` +pipelines are constantly verified for correctness with the or1k Continuous +Integration (CI) suite running in [travis ci](travis-ci.org). This currently covers: + + - source linting - a `verilator --lint-only` check is run on each commit to + ensure there are no code quality issues. + - [or1k-tests](https://github.com/openrisc/or1k-tests) - the `or1k-tests` test suite + is run against each pipeline to check most major instructions, exception handling, + caching, timers, interrupts and other features. + + Status: [![Build Status](https://travis-ci.org/openrisc/mor1kx.svg?branch=master)](https://travis-ci.org/openrisc/mor1kx) + + +The or1k Continuous Integration (CI) suite is running in a librecores-ci-openrisc docker container in Travis CI. Parallel execution of tests runs in librecores-ci-openrisc docker environment. + - [librecores-ci-openrisc](https://github.com/librecores/docker-images/tree/master/librecores-ci-openrisc) docker image is based on the standard [librecores/librecores-ci](https://github.com/librecores/docker-images/tree/master/librecores-ci) docker image and it largely target the [FuseSoC](https://github.com/olofk/fusesoc) use cases. + - The base image includes installation of common EDA tools such as Icarus Verilog, Verilator and Yosys that is required by CI suite for testing. librecores/libreocres-ci-openrisc docker image gets the toolchain required, downloads and compiles the or1k-tests. + + +The Continous Integration suite also runs in [Jenkins](https://ci.librecores.org/) supported by [Librecores-CI](https://github.com/librecores/librecores-ci-jenkins-server). As similar to Travis, `mor1kx` pipelines are also constantly verified. In addition to that, it also supports: + + - Yosys synthesis for monitoring resource usages. [Fusesoc](https://github.com/olofk/fusesoc/blob/master/doc/icestorm.adoc) provides the icestorm backend. + - [LibreCores CI](https://github.com/librecores/docker-images/tree/master/librecores-ci) Docker image provides Yosys synthesis metrics parser which outputs 'Printing Statistics'. Results are parsed to graphs with Performance Plugin, which can be seen at [ci.librecores.org](https://ci.librecores.org/job/Projects/job/OpenRISC/job/mor1kx/) + + +Status : [![Build Status](https://ci.librecores.org/job/Projects/job/OpenRISC/job/mor1kx/job/master/badge/icon)](https://ci.librecores.org/job/Projects/job/OpenRISC/job/mor1kx/job/master/) + +In the future we are working on bringing more tests including: + + - softfloat, fpu verification (may not be feasable in CI due to long run times) + - CPU pipeline debugging verification via GDB/OpenOCD + - Resource utilization regression with yosys synth_intel synth_xilinx + - Formal verification with yosys + - Verification that each revision can boot differnt OS's **Linux**, **RTMES** + - Golden reference `or1ksim` trace comparisons vs verilog model using constrained + random inputs. + +Verification status of mor1kx pipelines: + +|Pipeline|Testing Support|Comments| +|--------|---------------|--------| +|`CAPPUCCINO`|`Linting` `or1k-tests`|All supported tests passing| +|`ESPRESSO`|`linting` `or1k-tests` |Still many pipeline failures, see issue #71| +|`PRONTO_ESPRESSO`|`linting`|No toolchain support for no-delayslot c code| +|`MAROCCHINO`|`linting` `or1k-tests`|See [marocchino](https://github.com/openrisc/or1k_marocchino) project.| diff --git a/pythondata_cpu_mor1kx/verilog/bench/verilog/mor1kx_monitor.v b/pythondata_cpu_mor1kx/verilog/bench/verilog/mor1kx_monitor.v new file mode 100644 index 0000000..405ee4c --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/bench/verilog/mor1kx_monitor.v @@ -0,0 +1,955 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx monitor module + + Attaches to hooks provided in the mor1kx pipeline wrapper and provides + execution trace, disassembly, and l.nop instruction functions. + + Copyright (C) 2012, 2013 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +/* Configure these defines to point to the mor1kx instantiation */ +`ifndef MOR1KX_INST + `define MOR1KX_INST dut.mor1kx0 +`endif + +/* The rest of these shouldn't need changing if the wrapper hooks have been + set up correctly in mor1kx_cpu. */ +`ifndef CPU_WRAPPER + `define CPU_WRAPPER `MOR1KX_INST.mor1kx_cpu +`endif +`define EXECUTE_STAGE_INSN `CPU_WRAPPER.monitor_execute_insn +`define EXECUTE_STAGE_ADV `CPU_WRAPPER.monitor_execute_advance +`define CPU_clk `CPU_WRAPPER.monitor_clk +`define CPU_FLAG `CPU_WRAPPER.monitor_flag +`define CPU_SR `CPU_WRAPPER.monitor_spr_sr +`define EXECUTE_PC `CPU_WRAPPER.monitor_execute_pc +`define GPR_GET(x) `CPU_WRAPPER.monitor.get_gpr(x) +`define GPR_SET(x, y) `CPU_WRAPPER.monitor.set_gpr(x, y) + +`include "mor1kx-defines.v" + +// Pull in an ORPSoC-specific file +`include "test-defines.v" // indicate if we should trace or not + +// OR1K ISA defines used in this file + +`define OR1K_OPCODE_POS 31:26 +`define OR1K_J_BR_IMM_POS 25:0 +`define OR1K_RD_POS 25:21 +`define OR1K_RA_POS 20:16 +`define OR1K_RB_POS 15:11 +`define OR1K_ALU_OP_POS 3:0 +`define OR1K_SF_OP 25:21 +`define OR1K_XSYNC_OP_POS 25:21 + +module mor1kx_monitor #(parameter LOG_DIR= "../out") (); + + // General output file descriptor + integer fgeneral = 0; + integer ftrace = 0; + integer insns = 0; + + wire clk; + + parameter OPTION_OPERAND_WIDTH = 32; + + reg TRACE_ENABLE; + initial TRACE_ENABLE = $test$plusargs("trace_enable"); + + reg TRACE_TO_SCREEN; + initial TRACE_TO_SCREEN = $test$plusargs("trace_to_screen"); + + assign clk = `CPU_clk; + + reg [63:0] cycle_counter = 0 ; + + /* Log file management code */ + initial + begin + $timeformat (-9, 2, " ns", 12); + fgeneral = $fopen({LOG_DIR,"/",`TEST_NAME_STRING,"-general.log"}); + ftrace = $fopen({LOG_DIR,"/",`TEST_NAME_STRING,"-trace.log"}); + end + + /* Simulation support code */ + + reg [1:80*8] decode_insn_disas; + reg [1:80*8] execute_insn_disas; + reg [OPTION_OPERAND_WIDTH-1:0] decode_pc; + reg [OPTION_OPERAND_WIDTH-1:0] execute_pc; + + reg [`OR1K_INSN_WIDTH-1:0] execute_insn; + reg flag_4stage; + + always @(`EXECUTE_STAGE_INSN) + mor1k_insn_to_string(`EXECUTE_STAGE_INSN, execute_insn_disas); + //$write("%tns: decode insn PC %08h %08h %s\n",$time, pc_decode_i, + // decode_insn_i, insn_disassembled_string); + + always @(negedge `CPU_clk) begin + + cycle_counter = cycle_counter + 1; + + if (`EXECUTE_STAGE_ADV) + begin + insns = insns + 1; + execute_insn = `EXECUTE_STAGE_INSN; + + if(TRACE_ENABLE) + mor1k_trace_print(execute_insn, `CPU_SR, `EXECUTE_PC, `CPU_FLAG); + + // Check instructions for simulation controls + if (execute_insn == 32'h15_00_00_01) + begin + $fdisplay(fgeneral,"%0t:exit(0x%08h);",$time,`GPR_GET(3)); + $fdisplay(ftrace,"exit(0x%08h);",`GPR_GET(3)); + $display("exit(0x%08h);",`GPR_GET(3)); + $finish; + end + if (execute_insn == 32'h15_00_00_02) + begin + $fdisplay(fgeneral,"%0t:report(0x%08h);",$time,`GPR_GET(3)); + $fdisplay(ftrace,"report(0x%08h);",`GPR_GET(3)); + $display("report(0x%08h);",`GPR_GET(3)); + end + if (execute_insn == 32'h15_00_00_04) + begin + $write("%c",`GPR_GET(3)); + $fdisplay(fgeneral, "%0t: l.nop putc (%c)", $time,`GPR_GET(3)); + end + if (execute_insn == 32'h15_00_00_05) + begin + cycle_counter = 0; + $fdisplay(fgeneral, "%0t: l.nop reset counter", $time); + end + if (execute_insn == 32'h15_00_00_06) + begin + $fdisplay(fgeneral, "%0t: l.nop report cycle counter: %d", $time, cycle_counter); + `GPR_SET(11,cycle_counter[31:0]); + `GPR_SET(12,cycle_counter[63:32]); + end + + if (execute_insn == 32'h15_00_00_0c) + begin + // Silent exit + $finish; + + end + + end // if (`EXECUTE_STAGE_ADV) + end + + task mor1k_trace_print; + input [31:0] insn; + input [31:0] sr; + input [31:0] pc; + input flag; + + + reg rD_used; + reg [4:0] rD_num, rA_num, rB_num; + reg [15:0] imm_16bit; + reg [25:0] imm_26bit; + reg [31:0] signext_imm_16bit; + + reg [1:80*8] insn_disas; + // Actual things happening + reg [15:0] regimm_chars; + reg [31:0] addr_result; + begin + + // Get instruction info + mor1kx_insn_info(insn,rA_num,rB_num,rD_num,rD_used,imm_16bit, + imm_26bit,regimm_chars); + + /* Sign-extend the 16-bit immediate to 32-bit so we can add it to other + 32-bit numbers and it should subtract if necessary */ + signext_imm_16bit = {{16{imm_16bit[15]}},imm_16bit}; + + // Display useful line of stuff, like or1ksim trace + if (sr[`OR1K_SPR_SR_SM] === 1'b0) + begin + $fwrite(ftrace,"U "); + if(TRACE_TO_SCREEN) + $write("U "); + end + else + begin + $fwrite(ftrace,"S "); + if(TRACE_TO_SCREEN) + $write("S "); + end + + // PC next + $fwrite(ftrace,"%08h: ", pc); + if(TRACE_TO_SCREEN) + $write("%08h: ", pc); + + // Instruction raw + $fwrite(ftrace,"%08h ",insn); + if(TRACE_TO_SCREEN) + $write("%08h ",insn); + + mor1k_insn_to_string(insn, insn_disas); + + // Instruction, disassembled + $fwrite(ftrace,"%0s", insn_disas); + if(TRACE_TO_SCREEN) + $write("%0s", insn_disas); + + for (regimm_chars=regimm_chars; + regimm_chars < 16; regimm_chars = regimm_chars + 1) + begin + $fwrite(ftrace," "); + if(TRACE_TO_SCREEN) + $write(" "); + end + + if (rD_used) + begin + if (insn[`OR1K_OPCODE_SELECT]===`OR1K_OPCODE_MFSPR) + begin + // Wait 1 cycle for MFSPR result + @(posedge `CPU_clk); + $fwrite(ftrace,"r%0d",rD_num); + if(TRACE_TO_SCREEN) + $write("r%0d",rD_num); + end + else + begin + $fwrite(ftrace,"r%0d",rD_num); + if(TRACE_TO_SCREEN) + $write("r%0d",rD_num); + end // else: !if(insn[`OR1K_OPCODE_SELECT]===`OR1K_OPCODE_MFSPR) + + // Tab 1 more if we're a single-number register + if (rD_num < 10) begin + $fwrite(ftrace,"\t\t"); + if(TRACE_TO_SCREEN) + $write("\t\t"); + end + else begin + $fwrite(ftrace,"\t"); + if(TRACE_TO_SCREEN) + $write("\t"); + end + + // Finally write what ended up in the in rD + $fwrite(ftrace,"= %08h ",`GPR_GET(rD_num)); + if(TRACE_TO_SCREEN) + $write("= %08h ",`GPR_GET(rD_num)); + end + else if (insn[`OR1K_OPCODE_SELECT]===`OR1K_OPCODE_MTSPR) + begin + // Clobber imm_16bit here to calculate MTSPR + imm_16bit = imm_16bit | `GPR_GET(rA_num); + $fwrite(ftrace,"SPR[%04x] = %08h ", imm_16bit, `GPR_GET(rB_num)); + if(TRACE_TO_SCREEN) + $write("SPR[%04x] = %08h ", imm_16bit, `GPR_GET(rB_num)); + + end // if (insn[`OR1K_OPCODE_SELECT]===`OR1K_OPCODE_MTSPR) + else if (insn[`OR1K_OPCODE_SELECT]>=`OR1K_OPCODE_SD && + insn[`OR1K_OPCODE_SELECT]<=`OR1K_OPCODE_SH) + begin + addr_result = signext_imm_16bit + `GPR_GET(rA_num); + $fwrite(ftrace,"[%08h] = %08h ",addr_result[31:0], + `GPR_GET(rB_num)); + if(TRACE_TO_SCREEN) + $write("[%08h] = %08h ",addr_result[31:0], + `GPR_GET(rB_num)); + end + else + begin + // Skip destination field + $fwrite(ftrace,"\t\t\t "); + if(TRACE_TO_SCREEN) + $write("\t\t\t "); + end + + /* Write flag */ + $fwrite(ftrace,"flag: %0d", flag); + if(TRACE_TO_SCREEN) + $write("flag: %0d", flag); + + /* End of line */ + $fwrite(ftrace,"\n"); + if(TRACE_TO_SCREEN) + $write("\n"); + + end + endtask // mor1k_trace_print + + task mor1kx_insn_info; + input [31:0] insn; + output [4:0] rA_num; + output [4:0] rB_num; + output [4:0] rD_num; + output rD_used; + output [15:0] imm_16bit; + output [25:0] imm_26bit; + + output [7:0] num_chars; + + // To count how long disassembled immediates/regs + // are - what a pain! + reg rA_used, rB_used, imm_16bit_used, + imm_26bit_used; + + reg [5:0] opcode; + + reg opc_store; + + begin + + // Register numbers (D, A and B) + rD_num = insn[`OR1K_RD_POS]; + rA_num = insn[`OR1K_RA_POS]; + rB_num = insn[`OR1K_RB_POS]; + + opcode = insn[`OR1K_OPCODE_POS]; + + + opc_store = (opcode==`OR1K_OPCODE_SD) || + (opcode==`OR1K_OPCODE_SW) || + (opcode==`OR1K_OPCODE_SB) || + (opcode==`OR1K_OPCODE_SH); + + case (opcode) + `OR1K_OPCODE_LWZ, + `OR1K_OPCODE_LBZ, + `OR1K_OPCODE_LBS, + `OR1K_OPCODE_LHZ, + `OR1K_OPCODE_LHS, + `OR1K_OPCODE_MFSPR, + `OR1K_OPCODE_MOVHI, + `OR1K_OPCODE_ADDI, + `OR1K_OPCODE_ADDIC, + `OR1K_OPCODE_ANDI, + `OR1K_OPCODE_ORI, + `OR1K_OPCODE_XORI, + `OR1K_OPCODE_MULI, + `OR1K_OPCODE_ALU, + `OR1K_OPCODE_SHRTI: + rD_used = 1; + default: + rD_used=0; + endcase // case (opcode) + + case (opcode) + `OR1K_OPCODE_J , + `OR1K_OPCODE_JAL , + `OR1K_OPCODE_BNF , + `OR1K_OPCODE_BF , + `OR1K_OPCODE_NOP , + `OR1K_OPCODE_MOVHI, + `OR1K_OPCODE_MACRC, + `OR1K_OPCODE_SYSTRAPSYNC, + `OR1K_OPCODE_RFE, + `OR1K_OPCODE_JR, + `OR1K_OPCODE_JALR: + /* + rD of store insns, is in rA field + `OR1K_OPCODE_SD, + `OR1K_OPCODE_SW, + `OR1K_OPCODE_SB, + `OR1K_OPCODE_SH + */ + rA_used = 0; + default: + rA_used=1; + endcase // case (opcode) + + case (opcode) + `OR1K_OPCODE_JR, + `OR1K_OPCODE_JALR, + `OR1K_OPCODE_MTSPR, + `OR1K_OPCODE_MAC, + `OR1K_OPCODE_MSB, + `OR1K_OPCODE_SD, + `OR1K_OPCODE_SW, + `OR1K_OPCODE_SB, + `OR1K_OPCODE_SH, + `OR1K_OPCODE_SF: + rB_used = 1; + `OR1K_OPCODE_ALU: + case(insn[`OR1K_ALU_OPC_SELECT]) + `OR1K_ALU_OPC_EXTBH, + `OR1K_ALU_OPC_EXTW, + `OR1K_ALU_OPC_FFL1: + rB_used = 0; + default: + rB_used = 1; + endcase // case (insn[`OR1K_ALU_OPC_SELECT]) + default: + rB_used = 0; + endcase // case (opcode) + + case (opcode) + `OR1K_OPCODE_MOVHI, + `OR1K_OPCODE_NOP, + `OR1K_OPCODE_SD, + `OR1K_OPCODE_SW, + `OR1K_OPCODE_SB, + `OR1K_OPCODE_SH, + `OR1K_OPCODE_LD , + `OR1K_OPCODE_LWZ , + `OR1K_OPCODE_LWS , + `OR1K_OPCODE_LBZ , + `OR1K_OPCODE_LBS , + `OR1K_OPCODE_LHZ , + `OR1K_OPCODE_LHS , + `OR1K_OPCODE_ADDI , + `OR1K_OPCODE_ADDIC, + `OR1K_OPCODE_ANDI , + `OR1K_OPCODE_ORI , + `OR1K_OPCODE_XORI , + `OR1K_OPCODE_MULI , + `OR1K_OPCODE_MACI , + `OR1K_OPCODE_SFIMM, + `OR1K_OPCODE_MTSPR, + `OR1K_OPCODE_MFSPR: + imm_16bit_used = 1; + default: + imm_16bit_used = 0; + endcase // case (opcode) + + case (opcode) + `OR1K_OPCODE_J , + `OR1K_OPCODE_JAL, + `OR1K_OPCODE_BNF, + `OR1K_OPCODE_BF: + imm_26bit_used = 1; + default: + imm_26bit_used = 0; + endcase + + // Extract immediate + case (opcode) + `OR1K_OPCODE_SW, + `OR1K_OPCODE_SB, + `OR1K_OPCODE_SH, + `OR1K_OPCODE_SD, + `OR1K_OPCODE_MTSPR: + imm_16bit = {insn[25:21],insn[10:0]}; + default: + imm_16bit = insn[15:0]; + endcase // case (opcode) + + imm_26bit = insn[25:0]; + + // Extra chars (commas, brackets) + case (opcode) +/* + `OR1K_OPCODE_J : + num_chars = 0; + `OR1K_OPCODE_JAL : + num_chars = 0; + `OR1K_OPCODE_BNF : + num_chars = 0; + `OR1K_OPCODE_BF : + num_chars = 0; + `OR1K_OPCODE_MACRC: + num_chars = 0; + `OR1K_OPCODE_SYSTRAPSYNC: + num_chars = 0; + `OR1K_OPCODE_RFE: + num_chars = 0; + `OR1K_OPCODE_JR : + num_chars = 0; + `OR1K_OPCODE_JALR : + num_chars = 0; + `OR1K_OPCODE_CUST1: + num_chars = 0; + `OR1K_OPCODE_CUST2: + num_chars = 0; + `OR1K_OPCODE_CUST3: + num_chars = 0; + `OR1K_OPCODE_CUST4: + num_chars = 0; + `OR1K_OPCODE_NOP : + num_chars = 0; + */ + `OR1K_OPCODE_MOVHI: + num_chars = 1; + `OR1K_OPCODE_MACI : + num_chars = 1; + `OR1K_OPCODE_LD : + num_chars = 3; + `OR1K_OPCODE_LWZ : + num_chars = 3; + `OR1K_OPCODE_LWS : + num_chars = 3; + `OR1K_OPCODE_LBZ : + num_chars = 3; + `OR1K_OPCODE_LBS : + num_chars = 3; + `OR1K_OPCODE_LHZ : + num_chars = 3; + `OR1K_OPCODE_LHS : + num_chars = 3; + `OR1K_OPCODE_ADDI : + num_chars = 2; + `OR1K_OPCODE_ADDIC: + num_chars = 2; + `OR1K_OPCODE_ANDI : + num_chars = 2; + `OR1K_OPCODE_ORI : + num_chars = 2; + `OR1K_OPCODE_XORI : + num_chars = 2; + `OR1K_OPCODE_MULI : + num_chars = 2; + `OR1K_OPCODE_MFSPR: + num_chars = 2; + `OR1K_OPCODE_SFIMM: + num_chars = 1; + `OR1K_OPCODE_MTSPR : + num_chars = 2; + `OR1K_OPCODE_MAC : + num_chars = 1; + `OR1K_OPCODE_MSB : + num_chars = 1; + `OR1K_OPCODE_SD : + num_chars = 3; + `OR1K_OPCODE_SW : + num_chars = 3; + `OR1K_OPCODE_SB : + num_chars = 3; + `OR1K_OPCODE_SH: + num_chars = 3; + `OR1K_OPCODE_ALU: + case(insn[`OR1K_ALU_OPC_SELECT]) + `OR1K_ALU_OPC_EXTBH, + `OR1K_ALU_OPC_EXTW, + `OR1K_ALU_OPC_FFL1: + num_chars = 1; + default: + num_chars = 2; + endcase // case (insn[`OR1K_ALU_OPC_SELECT]) + `OR1K_OPCODE_SF: + num_chars =1; + `OR1K_OPCODE_SHRTI: + /* + if (insn[5:0] < 6'h10) + num_chars = 5; + else + */ + num_chars = 6; + + default: + num_chars = 0; + + endcase // case (opcode) + + + // Determine length of register/immediate + // disassembly in characters + if (rA_used) + num_chars = (rA_num > 9) ? num_chars + 3 : + num_chars + 2; + + if (rB_used) + num_chars = (rB_num > 9) ? num_chars + 3 : + num_chars + 2; + + if (rD_used) + num_chars = (rD_num > 9) ? num_chars + 3 : + num_chars + 2; + + if (imm_16bit_used) + num_chars = num_chars + 6; + + if (imm_26bit_used) + num_chars = num_chars + 9; + + /* + $write("%b %b %b %b %b\n",rA_used, rB_used, rD_used, imm_16bit_used, + imm_26bit_used); + */ + //$write("%0d\n",num_chars); + + end + endtask // mor1k_insn_info + + + + + + task mor1k_insn_to_string; + input [31:0] insn; + output [80*8:1] insnstring; + + reg [5:0] opcode; + + reg [25:0] j_imm; + + reg [25:0] br_imm; + + reg [31:0] rA_val, rB_val; + + reg [3:0] alu_op; + + reg [5:0] sf_op; + + reg [5:0] xsync_op; + + reg [4:0] rD_num, rA_num, rB_num; + + reg [15:0] imm_16bit; + reg [15:0] imm_split16bit; + + + begin + + // Instruction opcode + opcode = insn[`OR1K_OPCODE_POS]; + // Immediates for jump or branch instructions + j_imm = insn[`OR1K_J_BR_IMM_POS]; + br_imm = insn[`OR1K_J_BR_IMM_POS]; + // Register numbers (D, A and B) + rD_num = insn[`OR1K_RD_POS]; + rA_num = insn[`OR1K_RA_POS]; + rB_num = insn[`OR1K_RB_POS]; + // Bottom 16 bits when used as immediates in various instructions + imm_16bit = insn[15:0]; + // Bottom 11 bits used as immediates for l.sX instructions + + // Split 16-bit immediate for l.mtspr/l.sX instructions + imm_split16bit = {insn[25:21],insn[10:0]}; + // ALU op for ALU instructions + alu_op = insn[`OR1K_ALU_OP_POS]; + + + // Set flag op + sf_op = insn[`OR1K_SF_OP]; + + // Xsync/syscall/trap opcode + xsync_op = insn[`OR1K_XSYNC_OP_POS]; + + case (opcode) + `OR1K_OPCODE_J: + begin + $sformat(insnstring, "l.j 0x%07h", j_imm); + end + + `OR1K_OPCODE_JAL: + begin + $sformat(insnstring, "l.jal 0x%07h", j_imm); + end + + `OR1K_OPCODE_BNF: + begin + $sformat(insnstring, "l.bnf 0x%07h", br_imm); + end + + `OR1K_OPCODE_BF: + begin + $sformat(insnstring, "l.bf 0x%07h", br_imm); + end + + `OR1K_OPCODE_RFE: + begin + $sformat(insnstring, "l.rfe "); + end + + `OR1K_OPCODE_JR: + begin + $sformat(insnstring, "l.jr r%0d",rB_num); + end + + `OR1K_OPCODE_JALR: + begin + $sformat(insnstring, "l.jalr r%0d",rB_num); + end + + `OR1K_OPCODE_LWZ: + begin + $sformat(insnstring, "l.lwz r%0d,0x%04h(r%0d)",rD_num,imm_16bit,rA_num); + end + + `OR1K_OPCODE_LBZ: + begin + $sformat(insnstring, "l.lbz r%0d,0x%04h(r%0d)",rD_num,imm_16bit,rA_num); + end + + `OR1K_OPCODE_LBS: + begin + $sformat(insnstring, "l.lbs r%0d,0x%04h(r%0d)",rD_num,imm_16bit,rA_num); + end + + `OR1K_OPCODE_LHZ: + begin + $sformat(insnstring, "l.lhz r%0d,0x%04h(r%0d)",rD_num,imm_16bit,rA_num); + end + + `OR1K_OPCODE_LHS: + begin + $sformat(insnstring, "l.lhs r%0d,0x%04h(r%0d)",rD_num,imm_16bit,rA_num); + end + + `OR1K_OPCODE_SW: + begin + $sformat(insnstring, "l.sw 0x%04h(r%0d),r%0d",imm_split16bit,rA_num,rB_num); + end + + `OR1K_OPCODE_SB: + begin + $sformat(insnstring, "l.sb 0x%04h(r%0d),r%0d",imm_split16bit,rA_num,rB_num); + end + + `OR1K_OPCODE_SH: + begin + $sformat(insnstring, "l.sh 0x%04h(r%0d),r%0d",imm_split16bit,rA_num,rB_num); + end + + `OR1K_OPCODE_MFSPR: + begin + $sformat(insnstring, "l.mfspr r%0d,r%0d,0x%04h",rD_num,rA_num,imm_16bit); + end + + `OR1K_OPCODE_MTSPR: + begin + $sformat(insnstring, "l.mtspr r%0d,r%0d,0x%04h",rA_num,rB_num,imm_split16bit); + end + + `OR1K_OPCODE_MOVHI: + begin + if (!insn[16])begin + $sformat(insnstring, "l.movhi r%0d,0x%04h",rD_num,imm_16bit); + end + else + $sformat(insnstring, "l.macrc r%0d",rD_num); + end + + `OR1K_OPCODE_ADDI: + begin + $sformat(insnstring, "l.addi r%0d,r%0d,0x%04h",rD_num,rA_num,imm_16bit); + end + + `OR1K_OPCODE_ADDIC: + begin + $sformat(insnstring, "l.addic r%0d,r%0d,0x%04h",rD_num,rA_num,imm_16bit); + end + + `OR1K_OPCODE_ANDI: + begin + $sformat(insnstring, "l.andi r%0d,r%0d,0x%04h",rD_num,rA_num,imm_16bit); + end + + `OR1K_OPCODE_ORI: + begin + $sformat(insnstring, "l.ori r%0d,r%0d,0x%04h",rD_num,rA_num,imm_16bit); + end + + `OR1K_OPCODE_XORI: + begin + $sformat(insnstring, "l.xori r%0d,r%0d,0x%04h",rD_num,rA_num,imm_16bit); + end + + `OR1K_OPCODE_MULI: + begin + $sformat(insnstring, "l.muli r%0d,r%0d,0x%04h",rD_num,rA_num,imm_16bit); + end + + `OR1K_OPCODE_ALU: + begin + case(insn[`OR1K_ALU_OPC_SELECT]) + `OR1K_ALU_OPC_ADD: + $sformat(insnstring, "l.add r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_ADDC: + $sformat(insnstring, "l.addc r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_SUB: + $sformat(insnstring, "l.sub r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_AND: + $sformat(insnstring, "l.and r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_OR: + $sformat(insnstring, "l.or r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_XOR: + $sformat(insnstring, "l.xor r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_MUL: + $sformat(insnstring, "l.mul r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_SHRT: + begin + case(insn[`OR1K_ALU_OPC_SECONDARY_SELECT]) + `OR1K_ALU_OPC_SECONDARY_SHRT_SLL: + $sformat(insnstring, "l.sll r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_SECONDARY_SHRT_SRL: + $sformat(insnstring, "l.srl r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_SECONDARY_SHRT_SRA: + $sformat(insnstring, "l.sra r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_SECONDARY_SHRT_ROR: + $sformat(insnstring, "l.ror r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + endcase // case (insn[`OR1K_ALU_OPC_SECONDARY_SELECT]) + end + `OR1K_ALU_OPC_DIV: + $sformat(insnstring, "l.div r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_DIVU: + $sformat(insnstring, "l.divu r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_MULU: + $sformat(insnstring, "l.mulu r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_CMOV: + $sformat(insnstring, "l.cmov r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_ALU_OPC_FFL1: + begin + case(insn[8]) + 0: + $sformat(insnstring, "l.ff1 r%0d,r%0d",rD_num,rA_num); + 1: + $sformat(insnstring, "l.fl1 r%0d,r%0d",rD_num,rA_num); + endcase // case (insn[8]) + end + + endcase // case (alu_op) + //$sformat(insnstring, "r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + end + + `OR1K_OPCODE_SHRTI: + begin + case(insn[`OR1K_ALU_OPC_SECONDARY_SELECT]) + `OR1K_ALU_OPC_SECONDARY_SHRT_SLL: + $sformat(insnstring, "l.slli r%0d,r%0d,0x%01h",rD_num,rA_num,insn[5:0]); + `OR1K_ALU_OPC_SECONDARY_SHRT_SRL: + $sformat(insnstring, "l.srli r%0d,r%0d,0x%01h",rD_num,rA_num,insn[5:0]); + `OR1K_ALU_OPC_SECONDARY_SHRT_SRA: + $sformat(insnstring, "l.srai r%0d,r%0d,0x%01h",rD_num,rA_num,insn[5:0]); + `OR1K_ALU_OPC_SECONDARY_SHRT_ROR: + $sformat(insnstring, "l.rori r%0d,r%0d,0x%01h",rD_num,rA_num,insn[5:0]); + endcase // case (insn[`OR1K_ALU_OPC_SECONDARY_SELECT]) + //$sformat(insnstring, "r%0d,r%0d,0x%0h",rD_num,rA_num,insn[5:0]); + end // case: `OR1K_OPCODE_SHRTI + + `OR1K_OPCODE_SFIMM: + begin + case(insn[`OR1K_COMP_OPC_SELECT]) + `OR1K_COMP_OPC_EQ: + $sformat(insnstring, "l.sfeqi r%0d,0x%04h",rA_num, imm_16bit); + `OR1K_COMP_OPC_NE: + $sformat(insnstring, "l.sfnei r%0d,0x%04h",rA_num, imm_16bit); + `OR1K_COMP_OPC_GTU: + $sformat(insnstring, "l.sfgtuir%0d,0x%04h",rA_num, imm_16bit); + `OR1K_COMP_OPC_GEU: + $sformat(insnstring, "l.sfgeuir%0d,0x%04h",rA_num, imm_16bit); + `OR1K_COMP_OPC_LTU: + $sformat(insnstring, "l.sfltuir%0d,0x%04h",rA_num, imm_16bit); + `OR1K_COMP_OPC_LEU: + $sformat(insnstring, "l.sfleuir%0d,0x%04h",rA_num, imm_16bit); + `OR1K_COMP_OPC_GTS: + $sformat(insnstring, "l.sfgtsir%0d,0x%04h",rA_num, imm_16bit); + `OR1K_COMP_OPC_GES: + $sformat(insnstring, "l.sfgesir%0d,0x%04h",rA_num, imm_16bit); + `OR1K_COMP_OPC_LTS: + $sformat(insnstring, "l.sfltsir%0d,0x%04h",rA_num, imm_16bit); + `OR1K_COMP_OPC_LES: + $sformat(insnstring, "l.sflesir%0d,0x%04h",rA_num, imm_16bit); + endcase // case (sf_op[2:0]) + + //$sformat(insnstring, "r%0d,0x%0h",rA_num, imm_16bit); + + end // case: `OR1K_OPCODE_SFXXI + + `OR1K_OPCODE_SF: + begin + case(insn[`OR1K_COMP_OPC_SELECT]) + `OR1K_COMP_OPC_EQ: + $sformat(insnstring, "l.sfeq r%0d,r%0d",rA_num, rB_num); + `OR1K_COMP_OPC_NE: + $sformat(insnstring, "l.sfne r%0d,r%0d",rA_num, rB_num); + `OR1K_COMP_OPC_GTU: + $sformat(insnstring, "l.sfgtu r%0d,r%0d",rA_num, rB_num); + `OR1K_COMP_OPC_GEU: + $sformat(insnstring, "l.sfgeu r%0d,r%0d",rA_num, rB_num); + `OR1K_COMP_OPC_LTU: + $sformat(insnstring, "l.sfltu r%0d,r%0d",rA_num, rB_num); + `OR1K_COMP_OPC_LEU: + $sformat(insnstring, "l.sfleu r%0d,r%0d",rA_num, rB_num); + `OR1K_COMP_OPC_GTS: + $sformat(insnstring, "l.sfgts r%0d,r%0d",rA_num, rB_num); + `OR1K_COMP_OPC_GES: + $sformat(insnstring, "l.sfges r%0d,r%0d",rA_num, rB_num); + `OR1K_COMP_OPC_LTS: + $sformat(insnstring, "l.sflts r%0d,r%0d",rA_num, rB_num); + `OR1K_COMP_OPC_LES: + $sformat(insnstring, "l.sfles r%0d,r%0d",rA_num, rB_num); + endcase // case (sf_op[2:0]) + //$sformat(insnstring, "r%0d,r%0d",rA_num, rB_num); + + end + + `OR1K_OPCODE_MACI: + begin + $sformat(insnstring, "l.maci r%0d,0x%04h",rA_num,imm_16bit); + end + + `OR1K_OPCODE_NOP: + begin + $sformat(insnstring, "l.nop 0x%04h",imm_16bit); + end + + `OR1K_OPCODE_SYSTRAPSYNC: + begin + case (insn[`OR1K_SYSTRAPSYNC_OPC_SELECT]) + `OR1K_SYSTRAPSYNC_OPC_SYSCALL: + $sformat(insnstring, "l.sys 0x%04h",imm_16bit); + `OR1K_SYSTRAPSYNC_OPC_TRAP: + $sformat(insnstring, "l.trap 0x%04h",imm_16bit); + `OR1K_SYSTRAPSYNC_OPC_MSYNC: + $sformat(insnstring, "l.msync"); + `OR1K_SYSTRAPSYNC_OPC_PSYNC: + $sformat(insnstring, "l.psync"); + `OR1K_SYSTRAPSYNC_OPC_CSYNC: + $sformat(insnstring, "l.csync"); + endcase // case (insn[`OR1K_SYSTRAPSYNC_OPC_SELECT]) + end + `OR1K_OPCODE_FPU: + begin + case (insn[`OR1K_FPUOP_SELECT]) + `OR1K_FPUOP_ADD: + $sformat(insnstring, "lf.add.s r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_FPUOP_SUB: + $sformat(insnstring, "lf.sub.s r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_FPUOP_MUL: + $sformat(insnstring, "lf.mul.s r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_FPUOP_DIV: + $sformat(insnstring, "lf.div.s r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_FPUOP_ITOF: + $sformat(insnstring, "lf.itof.s r%0d,r%0d",rD_num,rA_num); + `OR1K_FPUOP_FTOI: + $sformat(insnstring, "lf.ftoi.s r%0d,r%0d",rD_num,rA_num); + `OR1K_FPUOP_REM: + $sformat(insnstring, "lf.rem.s r%0d,r%0d,r%0d",rD_num,rA_num,rB_num); + `OR1K_FPCOP_SFEQ: + $sformat(insnstring, "lf.sfeq.s r%0d,r%0d",rA_num,rB_num); + `OR1K_FPCOP_SFNE: + $sformat(insnstring, "lf.sfne.s r%0d,r%0d",rA_num,rB_num); + `OR1K_FPCOP_SFGT: + $sformat(insnstring, "lf.sfgt.s r%0d,r%0d",rA_num,rB_num); + `OR1K_FPCOP_SFGE: + $sformat(insnstring, "lf.sfge.s r%0d,r%0d",rA_num,rB_num); + `OR1K_FPCOP_SFLT: + $sformat(insnstring, "lf.sflt.s r%0d,r%0d",rA_num,rB_num); + `OR1K_FPCOP_SFLE: + $sformat(insnstring, "lf.sfle.s r%0d,r%0d",rA_num,rB_num); + default: + $sformat(insnstring, "%t: FPU opcode 0x%0h, r%0d,r%0d,r%0d", $time, opcode, rD_num,rA_num,rB_num); + endcase // case(insn[`OR1K_FPUOP_SELECT]) + end + default: + begin + $sformat(insnstring, "%t: Unknown opcode 0x%0h",$time,opcode); + $sformat(insnstring, "%t: Unknown opcode 0x%0h",$time,opcode); + end + + endcase // case (opcode) + + end + endtask // mor1k_insn_to_string + +endmodule // mor1kx_module diff --git a/pythondata_cpu_mor1kx/verilog/bench/verilog/mor1kx_traceport_monitor.v b/pythondata_cpu_mor1kx/verilog/bench/verilog/mor1kx_traceport_monitor.v new file mode 100644 index 0000000..9bd6d3a --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/bench/verilog/mor1kx_traceport_monitor.v @@ -0,0 +1,128 @@ +`include "mor1kx-defines.v" + +`define OR1K_OPCODE_POS 31:26 +`define OR1K_J_BR_IMM_POS 25:0 +`define OR1K_RD_POS 25:21 +`define OR1K_RA_POS 20:16 +`define OR1K_RB_POS 15:11 +`define OR1K_ALU_OP_POS 3:0 +`define OR1K_SF_OP 25:21 +`define OR1K_XSYNC_OP_POS 25:21 + +module mor1kx_traceport_monitor(/*AUTOARG*/ + // Outputs + finish, + // Inputs + clk, rst, traceport_exec_valid, traceport_exec_pc, + traceport_exec_insn, traceport_exec_wbdata, traceport_exec_wbreg, + traceport_exec_wben, finish_cross + ); + + parameter OPTION_OPERAND_WIDTH = 32; + parameter OPTION_RF_ADDR_WIDTH = 5; + + parameter LOG_DIR = "../out"; + + parameter COREID = 0; + parameter NUMCORES = 1; + + integer fgeneral = 0; + integer ftrace = 0; + integer insns = 0; + + input clk; + input rst; + + input traceport_exec_valid; + input [31:0] traceport_exec_pc; + input [`OR1K_INSN_WIDTH-1:0] traceport_exec_insn; + input [OPTION_OPERAND_WIDTH-1:0] traceport_exec_wbdata; + input [OPTION_RF_ADDR_WIDTH-1:0] traceport_exec_wbreg; + input traceport_exec_wben; + + input [NUMCORES-1:0] finish_cross; + output reg finish; + + reg TRACE_ENABLE; + initial TRACE_ENABLE = $test$plusargs("trace_enable"); + + reg TRACE_TO_SCREEN; + initial TRACE_TO_SCREEN = $test$plusargs("trace_to_screen"); + + reg [63:0] cycle_counter = 0 ; + + reg [OPTION_OPERAND_WIDTH-1:0] r3; + reg [7:0] printstring [0:255]; + integer printstringpos; + + /* Log file management code */ + initial + begin + $timeformat (-9, 2, " ns", 12); +// fgeneral = $fopen({LOG_DIR,"/",`TEST_NAME_STRING,"-general.log"}); +// ftrace = $fopen({LOG_DIR,"/",`TEST_NAME_STRING,"-trace.log"}); + finish = 0; + printstringpos = 0; + end + + reg [`OR1K_INSN_WIDTH-1:0] execute_insn; + integer i; + + always @(negedge clk) begin + if ((COREID == 0) && &finish_cross) begin + $finish; + end + + cycle_counter = cycle_counter + 1; + + if (traceport_exec_valid) + begin + insns = insns + 1; + execute_insn = traceport_exec_insn; + + if (traceport_exec_wben && (traceport_exec_wbreg == 3)) begin + r3 = traceport_exec_wbdata; + end + +/* TODO: Re-enable + if(TRACE_ENABLE) + mor1k_trace_print(execute_insn, `CPU_SR, `EXECUTE_PC, `CPU_FLAG);*/ + + // Check instructions for simulation controls + if (execute_insn == 32'h15_00_00_01) + begin +// $fdisplay(fgeneral,"%0t:exit(0x%08h);",$time,r3); +// $fdisplay(ftrace,"exit(0x%08h);",r3); + $display("[%0d] exit(0x%08h);",COREID,r3); + $finish; + end + if (execute_insn == 32'h15_00_00_02) + begin +// $fdisplay(fgeneral,"%0t:report(0x%08h);",$time,r3); +// $fdisplay(ftrace,"report(0x%08h);",r3); + $display("[%0d, %0t] report(0x%08h);",COREID,$time,r3); + end + if (execute_insn == 32'h15_00_00_04) + begin + printstring[printstringpos] = r3[7:0]; + printstringpos = printstringpos + 1; + if (r3 == 32'h0a) begin + $write("[%0d, %0t] ",COREID,$time); + for (i = 0; i < printstringpos; i = i + 1) begin + $write("%s",printstring[i]); + end + printstringpos = 0; + end +// $fdisplay(fgeneral, "%0t: l.nop putc (%c)", $time,r3); + end + + if (execute_insn == 32'h15_00_00_0c) + begin + // Silent exit + finish = 1; + end + + end // if (`EXECUTE_STAGE_ADV) + end + +endmodule // mor1kx_traceport_monitor diff --git a/pythondata_cpu_mor1kx/verilog/doc/Makefile b/pythondata_cpu_mor1kx/verilog/doc/Makefile new file mode 100644 index 0000000..f29aac4 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/doc/Makefile @@ -0,0 +1,70 @@ +ASCIIDOC=asciidoc +XSLTPROC=xsltproc +DBLATEX=dblatex + +RM ?= rm -f +CP ?= cp +MV ?= mv +CAT ?= cat +GEN-DOCINFO=gen-docinfo.pl + +ifndef PERL_PATH + PERL_PATH = /usr/bin/perl +endif + +ASCIIDOC_EXTRA += -a docinfo + +SPEC_TXT=mor1kx.asciidoc +SPEC_HTML=$(patsubst %.asciidoc,%.html,$(SPEC_TXT)) +SPEC_XML=$(patsubst %.asciidoc,%.xml,$(SPEC_TXT)) +SPEC_PDF=$(patsubst %.asciidoc,%.pdf,$(SPEC_TXT)) + +SPEC_DOCINFO=$(patsubst %.asciidoc,%-docinfo.xml,$(SPEC_TXT)) + +ifneq ($(findstring $(MAKEFLAGS),s),s) +ifndef V + QUIET_ASCIIDOC = @echo ' ' ASCIIDOC $@; + QUIET_XSLTPROC = @echo ' ' XSLTPROC $@; + QUIET_DBLATEX = @echo ' ' DBLATEX $@; + QUIET_GEN-DOCINFO = @echo ' ' GEN-DOCINFO $@; + export V +endif +endif + +all: html + +html: $(SPEC_HTML) +pdf: $(SPEC_PDF) + +$(SPEC_DOCINFO): $(SPEC_TXT) $(GEN-DOCINFO) + $(QUIET_GEN-DOCINFO)$(CAT) $< | \ + $(PERL_PATH) -ne 'if (/__vstart__/../__vend__/) {print unless /__v(start|end)__/}' | \ + $(PERL_PATH) $(GEN-DOCINFO) > $@ + +$(SPEC_XML): $(SPEC_TXT) $(SPEC_DOCINFO) + $(QUIET_ASCIIDOC)$(ASCIIDOC) $(ASCIIDOC_EXTRA) -b docbook $< + +XSLT = docbook.xsl +XSLTOPTS = --xinclude --stringparam html.stylesheet docbook-xsl.css \ + --stringparam section.autolabel 1 \ + --stringparam section.label.includes.component.label 1 + +$(SPEC_HTML): $(SPEC_XML) + $(QUIET_XSLTPROC)$(RM) $@+ $@ && \ + $(XSLTPROC) $(XSLTOPTS) -o $@+ $(XSLT) $< && \ + $(MV) $@+ $@ + +DBLATEXOPTS = --param=doc.publisher.show=0 + +$(SPEC_PDF): $(SPEC_XML) + $(QUIET_DBLATEX)$(RM) $@+ $@ && \ + $(DBLATEX) $(DBLATEXOPTS) -o $@+ \ + -p /etc/asciidoc/dblatex/asciidoc-dblatex.xsl \ + -s /etc/asciidoc/dblatex/asciidoc-dblatex.sty $< && \ + $(MV) $@+ $@ + +clean: + $(RM) *.xml *.xml+ *.html *.html+ + $(RM) openrisc1200_spec.pdf+ openrisc1200_spec.txt+ + +.PHONY: all clean html diff --git a/pythondata_cpu_mor1kx/verilog/doc/docbook-xsl.css b/pythondata_cpu_mor1kx/verilog/doc/docbook-xsl.css new file mode 100644 index 0000000..e11c8f0 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/doc/docbook-xsl.css @@ -0,0 +1,296 @@ +/* + CSS stylesheet for XHTML produced by DocBook XSL stylesheets. + Tested with XSL stylesheets 1.61.2, 1.67.2 +*/ + +span.strong { + font-weight: bold; +} + +body blockquote { + margin-top: .75em; + line-height: 1.5; + margin-bottom: .75em; +} + +html body { + margin: 1em 5% 1em 5%; + line-height: 1.2; + font-family: sans-serif; +} + +body div { + margin: 0; +} + +h1, h2, h3, h4, h5, h6, +div.toc p b, +div.list-of-figures p b, +div.list-of-tables p b, +div.abstract p.title +{ + color: #527bbd; + font-family: tahoma, verdana, sans-serif; +} + +div.toc p:first-child, +div.list-of-figures p:first-child, +div.list-of-tables p:first-child, +div.example p.title +{ + margin-bottom: 0.2em; +} + +body h1 { + margin: .0em 0 0 -4%; + line-height: 1.3; + border-bottom: 2px solid silver; +} + +body h2 { + margin: 0.5em 0 0 -4%; + line-height: 1.3; + border-bottom: 2px solid silver; +} + +body h3 { + margin: .8em 0 0 -3%; + line-height: 1.3; +} + +body h4 { + margin: .8em 0 0 -3%; + line-height: 1.3; +} + +body h5 { + margin: .8em 0 0 -2%; + line-height: 1.3; +} + +body h6 { + margin: .8em 0 0 -1%; + line-height: 1.3; +} + +body hr { + border: none; /* Broken on IE6 */ +} +div.footnotes hr { + border: 1px solid silver; +} + +div.navheader th, div.navheader td, div.navfooter td { + font-family: sans-serif; + font-size: 0.9em; + font-weight: bold; + color: #527bbd; +} +div.navheader img, div.navfooter img { + border-style: none; +} +div.navheader a, div.navfooter a { + font-weight: normal; +} +div.navfooter hr { + border: 1px solid silver; +} + +body td { + line-height: 1.2 +} + +body th { + line-height: 1.2; +} + +ol { + line-height: 1.2; +} + +ul, body dir, body menu { + line-height: 1.2; +} + +html { + margin: 0; + padding: 0; +} + +body h1, body h2, body h3, body h4, body h5, body h6 { + margin-left: 0 +} + +body pre { + margin: 0.5em 10% 0.5em 1em; + line-height: 1.0; + color: navy; +} + +tt.literal, code.literal { + color: navy; + font-family: sans-serif; +} + +code.literal:before { content: "'"; } +code.literal:after { content: "'"; } + +em { + font-style: italic; + color: #064; +} + +div.literallayout p { + padding: 0em; + margin: 0em; +} + +div.literallayout { + font-family: monospace; + margin: 0em; + color: navy; + border: 1px solid silver; + background: #f4f4f4; + padding: 0.5em; +} + +.programlisting, .screen { + border: 1px solid silver; + background: #f4f4f4; + margin: 0.5em 10% 0.5em 0; + padding: 0.5em 1em; +} + +div.sidebar { + background: #ffffee; + margin: 1.0em 10% 0.5em 0; + padding: 0.5em 1em; + border: 1px solid silver; +} +div.sidebar * { padding: 0; } +div.sidebar div { margin: 0; } +div.sidebar p.title { + font-family: sans-serif; + margin-top: 0.5em; + margin-bottom: 0.2em; +} + +div.bibliomixed { + margin: 0.5em 5% 0.5em 1em; +} + +div.glossary dt { + font-weight: bold; +} +div.glossary dd p { + margin-top: 0.2em; +} + +dl { + margin: .8em 0; + line-height: 1.2; +} + +dt { + margin-top: 0.5em; +} + +dt span.term { + font-style: normal; + color: navy; +} + +div.variablelist dd p { + margin-top: 0; +} + +div.itemizedlist li, div.orderedlist li { + margin-left: -0.8em; + margin-top: 0.5em; +} + +ul, ol { + list-style-position: outside; +} + +div.sidebar ul, div.sidebar ol { + margin-left: 2.8em; +} + +div.itemizedlist p.title, +div.orderedlist p.title, +div.variablelist p.title +{ + margin-bottom: -0.8em; +} + +div.revhistory table { + border-collapse: collapse; + border: none; +} +div.revhistory th { + border: none; + color: #527bbd; + font-family: tahoma, verdana, sans-serif; +} +div.revhistory td { + border: 1px solid silver; +} + +/* Keep TOC and index lines close together. */ +div.toc dl, div.toc dt, +div.list-of-figures dl, div.list-of-figures dt, +div.list-of-tables dl, div.list-of-tables dt, +div.indexdiv dl, div.indexdiv dt +{ + line-height: normal; + margin-top: 0; + margin-bottom: 0; +} + +/* + Table styling does not work because of overriding attributes in + generated HTML. +*/ +div.table table, +div.informaltable table +{ + margin-left: 0; + margin-right: 5%; + margin-bottom: 0.8em; +} +div.informaltable table +{ + margin-top: 0.4em +} +div.table thead, +div.table tfoot, +div.table tbody, +div.informaltable thead, +div.informaltable tfoot, +div.informaltable tbody +{ + /* No effect in IE6. */ + border-top: 2px solid #527bbd; + border-bottom: 2px solid #527bbd; +} +div.table thead, div.table tfoot, +div.informaltable thead, div.informaltable tfoot +{ + font-weight: bold; +} + +div.mediaobject img { + border: 1px solid silver; + margin-bottom: 0.8em; +} +div.figure p.title, +div.table p.title +{ + margin-top: 1em; + margin-bottom: 0.4em; +} + +@media print { + div.navheader, div.navfooter { display: none; } +} diff --git a/pythondata_cpu_mor1kx/verilog/doc/docbook.xsl b/pythondata_cpu_mor1kx/verilog/doc/docbook.xsl new file mode 100644 index 0000000..8be773a --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/doc/docbook.xsl @@ -0,0 +1,17 @@ + + + + + appendix nop + article toc,title + book toc,title,figure,table,example,equation + part nop + preface nop + qandadiv nop + qandaset nop + reference toc,title + section nop + set toc + + diff --git a/pythondata_cpu_mor1kx/verilog/doc/gen-docinfo.pl b/pythondata_cpu_mor1kx/verilog/doc/gen-docinfo.pl new file mode 100755 index 0000000..7ad11f2 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/doc/gen-docinfo.pl @@ -0,0 +1,36 @@ +#!/usr/bin/perl + +use warnings; +use strict; + +my @keys = qw(number date author comment); +my @revs; +{ + local $/ = ''; + + while (<>) { + my @values = split(/\|/); + my %rev; + foreach (@keys) { + $rev{$_} = shift @values; + $rev{$_} =~ s/^\s+|\s+$//g; + } + push @revs, \%rev; + } +} + +if (@revs) { + print "\n"; + foreach my $rev (@revs) { + print " \n"; + print " $rev->{number}\n"; + print " $rev->{date}\n"; + print " $rev->{author}\n"; + print " \n"; + print " $rev->{comment}\n"; + print " \n"; + print " \n"; + } + print "\n"; +} + diff --git a/pythondata_cpu_mor1kx/verilog/doc/mor1kx.asciidoc b/pythondata_cpu_mor1kx/verilog/doc/mor1kx.asciidoc new file mode 100644 index 0000000..542b8ff --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/doc/mor1kx.asciidoc @@ -0,0 +1,527 @@ +mor1kx IP core specification +============================ +:doctype: book + +Introduction +------------ + +This document describes the mor1kx processor block and its various configurable +components. The core is an implementation of an OpenRISC 1000 compliant +processor which is highly configurable and contains multiple pipeline +implementations along with a configurable set of peripherals such as caches, +timers, debug and bus interfaces. This documentation will contain a section +detailing each optional block such as pipeline, caches, bus interface etc. It +can outline as much implementation detail as it wants but should ultimately +provide information to users on how to program and use the peripheral, or what +to expect during execution of software on a particular configuration. + +.Document Status +****************************************************************************** +This documentation is far from complete. It is expected that the documentation +will become more extensive over time. +****************************************************************************** + +OpenRISC Family +~~~~~~~~~~~~~~~ +(((OpenRISC,Family))) +OpenRISC 1000 is architecture for a family of free, open source RISC processor +cores. As architecture, OpenRISC 1000 allows for a spectrum of chip and +system implementations at a variety of price/performance points for a range of +applications. It is a 32/64-bit load and store RISC architecture designed with +emphasis on performance, simplicity, low power requirements, scalability and +versatility. OpenRISC 1000 architecture targets medium and high performance +networking, embedded, automotive and portable computer environments. + +mor1kx CPU +~~~~~~~~~~ + +The mor1kx implementation was developed in order to provide a better platform +for processor component development than previous implementations. The goal of +the implementation is to provide a greater level of flexibility in terms of +implementation trade-offs such as area and performance. + +The blocks within the core have been designed for maximum re-use within +different configurations. Based on this, different pipeline implementations are +a major focus of the core. With this, the core should be very useful for +developers and users alike. For developers as a base for either +high-performance or low-overhead pipeline implementations based on re-usable +components eg. decode and ALU block. For users as a wider variety of +capabilities should be available to suit the processor's use case. + +Implementation Overview +----------------------- + +The implementation is heavily modular, with each particular functional block +of the design being contained within its own Verilog module or modules. + +The implementation configuration makes use of Verilog parameters. There should +be no configuration performed with the use of Verilog defines. + +Hierarchy +~~~~~~~~~ + +The top few levels of hierarchy are as follows + +mor1kx:: Top-level, instantiatng bus interfaces and CPU top-level +* mor1kx_bus_if_xx - Bus interface, depending on desired bus standard +* mor1kx_cpu - Pipeline implementation wrapper +** mor1kx_cpu_xx - Pipeline implementation, depending on configuration +*** mor1kx_fetch_xx - Pipeline-implementation-dependent fetch stage +**** mor1kx_icache - Instruction cache implementation +**** mor1kx_immu - Instruction memory management unit implementation +*** mor1kx_decode - Generic decode stage +*** mor1kx_execute_alu - Generic ALU for execute stage +*** mor1kx_lsu_xx - Pipeline-implementation-dependent load/store unit +**** mor1kx_dcache - Data cache implementation +**** mor1kx_dmmu - Data memory management unit implementation +*** mor1kx_wb_mux_xx - Pipeline-implementation-dependent writeback stage mux +*** mor1kx_rf_xx - Pipeline-implementation-dependent register file +*** mor1kx_ctrl_xx - Pipeline-implementation-dependent control stage, usually containing features such as tick timer, interrupts etc. + + +The above hierarchy is not always the same for each pipeline implementation +(from _mor1kx_cpu_xx_ level down) but as a good indicator as to how the +existing pipeline implementations have been structured. + +Coding Style +~~~~~~~~~~~~ + +The coding style is a relatively simple Verilog style and should be adhered to +for any future development. All of the following detail how the existing code +base has been implemented, and it should be realtively easy to continue in that +style, but if in doubt, the following details some of the rules: + +* module names should be prefixed with _mor1kx_ and be in lowercase and stored in a source file of the same name plus the standard verilog suffix, _.v_. +* port names should have their direction abbreviation appended to the name +** E.g the instruction bus address output should be ibus_adr_o +** The only exceptions are the clock and reset ports, commonly referred to as _clk_ and _rst_ in the Verilog, and don't really require direction suffixes. The only exception is if a clock or reset generation module is outputting one of these, in which case it should have _o appended, otherwise for clock and reset inputs it can be inferred that they are inputs from the absence of _o. +* Parameter names should be in UPPERCASE, with parameter values, if strings, also in uppercase. +* Verilog reg and wire names should be named usefully, in lowercase, and make use of underscores in the name. +* There should be no reason to alter the mor1kx-defines.v or mor1kx-sprs.v files unless it is expanding upon or fixing what is already there. _No new class of defines controlling implementation should be added_ (use parameters instead!) Those files are only for storing architectural information in Verilog format, which can then be used in the design. + +There has been heavy use of the Emacs Verilog mode auto-implement and wire +features throughout the code. This is a useful tool and it would be useful to +stick to using this for future pipeline implementations, to speed up the +tedious task of wiring up new blocks. + + +Architecture compliance +~~~~~~~~~~~~~~~~~~~~~~~ + +The mor1kx aims to be fully OpenRISC 1000 compliant. It implements the VR2, +AVR and ISRs (still largely unused). The current OR1K architecture version +supported is 1.0. + +Version Register 2 definition +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Bits 23:0 of the VR2 are implementation-specific. They are defined, for mor1kx, +to be 3 bytes, interpreted as 3 unsigned 8-bit integers, as the following: + +* 23:16 - Major version number +* 15:8 - Minor version number +* 7:0 - Pipeline identifier +** Where this pipeline identifier means +*** 1 - Cappuccino +*** 2 - Espresso +*** 3 - Pronto Espresso + +However, the mor1kx does not implement the following at present: + +* arithmetic exception control/status registers (AECR, AESR) +* l.muld[u] and thus access to full 64-bit result as there is no MAC unit + +CPU Components +============== + +This section will outline each of the CPU component modules. + +CPU Pipeline Implementations +---------------------------- + +Available Implementations +~~~~~~~~~~~~~~~~~~~~~~~~~ + +At present the following pipeline implementations are available. These +combine various of the components, outlined in a following section, to +implement the bulk of the processor. + +- cappuccino - 6 stage, single issue, delay slot, debug unit, timers, PIC, + tightly coupled cache, MMU, ORBIS32 support +- espresso - 2 stage, single issue, delay slot, debug unit, timers, PIC, ORBIS32 support +- pronto espresso - 2 stage, single issue, no delay slot, debug unit, timers, PIC, ORBIS32 support + +Cappuccino pipeline +~~~~~~~~~~~~~~~~~~~ + +A 6 stage pipeline. (address, fetch, decode, execute, control/memory and writeback) + +Caches supported (optional). + +MMUs supported (optional). + +It has a delay slot on jump and branch instructions. + +It features the EVBAR. + +Pipeline consists of the following modules: + +* Fetch: +** <> +** <> +** <> +* Decode +** <> +* Execute stage +** <> +** <> +** <> +** <> +* Control/memory stage +** <> +** <> +** <> +** <> +** <> + +The following sections outline the pipeline-specific modules + +[[mor1kx_ctrl_branch_cappuccino]] +mor1kx_ctrl_branch_cappuccino +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This is the pipelines branch control unit, selecting the jump/branch address +and opcode input from execute stage, with flag input from control stage. +Indication of whether a branch needs to be evaluated (based on flag) comes +from the control stage. + +The block then outputs the appropriate indication of whether a branch is going +to occur and the target address to the fetch stage. + +It is wholly combinatorial. + +[[mor1kx_ctrl_cappuccino]] +mor1kx_ctrl_cappuccino +^^^^^^^^^^^^^^^^^^^^^^ + +This module contains a lot of the core functionality of the pipeline, such as: + +* SPRs (NPC, PPC, etc.) and accesses to them +* PIC +* Debug unit +* Tick timer +* Pipeline control signals +** Advance/stall signaling to each pipeline stage +** Exception handling + +It's in a big monolithic file but perhaps things like the PIC and tick timer +could be split out and made genering among pipeline implementations. + +[[mor1kx_execute_ctrl_cappuccino]] +mor1kx_execute_ctrl_cappuccino +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +Determine the status of execute-stage units in play (ALU or LSU) and: + +* Determine when they're done +* Control the write-enable to the register file for any result +* Correctly propagate exception signals from either execute or earlier stages + +[[mor1kx_fetch_cappuccino]] +mor1kx_fetch_cappuccino +^^^^^^^^^^^^^^^^^^^^^^^ + +Fetch stage, tightly coupled with instruction cache. + +[[mor1kx_lsu_cappuccino]] +mor1kx_lsu_cappuccino +^^^^^^^^^^^^^^^^^^^^^ + +Load/store unit. + +Performs accesses of the generic bus which may or may not be then going to +a data cache, and finally out onto the bus via the selected bus interface. + +Is 32-bit specific. + +Combinatorial outputs to pipeline control logic to reduce latency, may +introduce long paths effecting timing, though. + +Handles sign extension if load/store requires it. +Generates alignment exception, and handles bus error exception back to +the mor1kx_execute_ctrl_cappuccino module. + +[[mor1kx_rf_cappuccino]] +mor1kx_rf_cappuccino +^^^^^^^^^^^^^^^^^^^^ + +Register file for the pipeline. 2 lots of 32 general purpose registers (GPRs.) + +Handles forwarding from control/memory and writeback to execute stage. + +Instantiates a RAM for each of the two register files (_mor1kx_rf_ram_ module.) + +[[mor1kx_wb_mux_cappuccino]] +mor1kx_wb_mux_cappuccino +^^^^^^^^^^^^^^^^^^^^^^^^ + +Writeback stage mux. Inputs are ALU result, LSU result, SPR value for l.mfspr +instruction. Generates link address for jump-and-link instructions. + + +Espresso pipeline +~~~~~~~~~~~~~~~~~ + +The espresso pipeline essentially contains two stages: a fetch and "the-rest" +stages. There is no registering in the decode stage, so the register outputting +the fetched instruction from the fetch stage is what is used for the remainder +of that instruction's processing. + +No support for caches. + +It has a delay slot. + +Supports DSX bit in SR. + +Pipeline consists of the following modules: + +* Fetch: +** <> +* Decode +** <> +* Execute/memory stage +** <> +** <> +** <> +** <> +* Control stage +** <> + +The following sections outline the pipeline-specific modules + +[[mor1kx_fetch_espresso]] +mor1kx_fetch_espresso +^^^^^^^^^^^^^^^^^^^^^ + +This is the fetch stage for the espresso pipeline. + +It is tightly coupled with the control stage. + +The block attempts to stream in bursts from the bus interface. + +The block outputs register addresses for the next read from the RF. + +[[mor1kx_lsu_espresso]] +mor1kx_lsu_espresso +^^^^^^^^^^^^^^^^^^^ + +A LSU specific to the espresso pipeline. Its features are similar to the +<> block. + +[[mor1kx_wb_mux_espresso]] +mor1kx_wb_mux_espresso +^^^^^^^^^^^^^^^^^^^^^^ + +Writeback stage mux for the espresso pipeline. Similar to the +<> block. + +[[mor1kx_rf_espresso]] +mor1kx_rf_espresso +^^^^^^^^^^^^^^^^^^ + +Register file for the espresso pipeline. Simlar to the +<> block. + +[[mor1kx_ctrl_espresso]] +mor1kx_ctrl_espresso +^^^^^^^^^^^^^^^^^^^^ + +This module contains a lot of the core functionality of the pipeline, such as: + +* SPRs (NPC, PPC, etc.) and accesses to them +* PIC +* Debug unit +* Tick timer +* Pipeline control signals +** Advance/stall signaling to each pipeline stage +** Exception handling +** Branch indication to fetch stage +** Register file write enable + + +Pronto Espresso pipeline +~~~~~~~~~~~~~~~~~~~~~~~~ + +The pronto espresso pipeline essentially contains two stages: a fetch and "the-rest" +stages. It is based on the Espresso pipeline, but does not have a delay slot on jumps +and branches. As such, it reuses a lot of Espresso's pipeline. + +No support for caches. + +It has _no_ delay slot. + +Pipeline consists of the following modules: + +* Fetch: +** <> +* Decode +** <> +* Execute/memory stage +** <> +** <> +** <> +** <> +* Control stage +** <> + +The following sections outline the pipeline-specific modules + +[[mor1kx_fetch_prontoespresso]] +mor1kx_fetch_prontoespresso +^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This is the fetch stage for the pronto espresso pipeline + +It is tightly coupled with the control stage. + +The block attempts to stream in bursts from the bus interface. + +The block outputs register addresses for the next read from the RF. + +It takes into account that the pipeline does not support a delay slot on jumps/branches. + +[[mor1kx_ctrl_prontoespresso]] +mor1kx_ctrl_prontoespresso +^^^^^^^^^^^^^^^^^^^^^^^^^^ + +This module contains a lot of the core functionality of the pipeline, such as: + +* SPRs (NPC, PPC, etc.) and accesses to them +* PIC +* Debug unit +* Tick timer +* Pipeline control signals +** Advance/stall signaling to each pipeline stage +** Exception handling +** Branch indication to fetch stage +** Register file write enable + +It is based on the espresso pipeline, however is modified in such a way as to make sure it doesn't have a delay slot on branches. + +Components +---------- + +[[mor1kx_bus_if_wb32]] +mor1kx_bus_if_wb32 +~~~~~~~~~~~~~~~~~~ + +This module is a Wishbone bus interface block and sits between the pipeline's +fetch and load/store units and the Wishbone bus. + +It is Wishbone version B3 compliant and can perform burst reads. + +At present there are two configurations of the block, one is "classic" +configuration and the other is "B3 read bursting". + +The "classic" configuration performs all access as single cycle reads or +writes to the bus. + +The "B3 read bursting" mode will perform burst reads over the bus, but writes +are still single cycle accesses. + +[[mor1kx_cpu]] +mor1kx_cpu +~~~~~~~~~~ + +The CPU pipeline wrapper layer. This selects the appropriate pipeline CPU +implementation toplevel. + +Additionally, some signals intended to be used as hooks for monitor modules are +provided. + +[[mor1kx_decode]] +mor1kx_decode +~~~~~~~~~~~~~ + +This is a generic OR1K decode stage module, which can ideally be reused by each +CPU implementation. + +The module can either register the its decode output or be wholly combinatorial. + +It generates ALU, LSU and control operation signals for the remainder of the pipeline +units. Exceptions caused in this stage (ie, illegal instruction, system call etc.) or +earlier (fetch stage exceptions like bus error) are also generated or passed through. + +[[mor1kx_execute_alu]] +mor1kx_execute_alu +~~~~~~~~~~~~~~~~~~ + +This is a generic ALU implementation. It contains all of the integer arithmetic and +logical operations which are supported in the ORBIS32 instruction set. + +The following features are optional. All can be disabled, or enabled with the +implementation options listed, if any: + +* Multiplier +** Three stage, three cycle, full 32-bit parallel multiplier +** Serial, 32-cycle serial multiplication implementation +** Simulation, single cycle multiplication, not advisable for synthesis +* Divider +** Serial, 32-cycle serial division implementation +** Simulation, single cycle division, not synthesisable +* Shift-right-arithmetic +* Rotate right +* Shift instructions, logical shift left and right, and shift right arithmetic and rotate right can be chosen to be implemented in a single-cycle barrel shifter implementation or done serially to save implementation area. +* Conditional move +* Find first and last '1' + +The following is not yet supported: + +* Add with carry +* Sign extension instructions + +The module also implements comparison logic for the set flag instructions. + + +[[mor1kx_icache]] +mor1kx_icache +~~~~~~~~~~~~~ + +Instruction cache module. + +[[mor1kx_dcache]] +mor1kx_dcache +~~~~~~~~~~~~~ + +Data cache module. + + +[[mor1kx_immu]] +mor1kx_immu +~~~~~~~~~~~ + +Instruction memory management module. + +[[mor1kx_dmmu]] +mor1kx_dmmu +~~~~~~~~~~~ + +Data memory management module. + +[[mor1kx_rf_ram]] +mor1kx_rf_ram +~~~~~~~~~~~~~ + +[[mor1kx_spram]] +mor1kx_spram +~~~~~~~~~~~~ + +Generic single port ram with seperate read and write addresses. + +Has explicit bypass logic to correctly present write-first behaviour on +different platforms. + +[[mor1kx_dpram_sclk]] +mor1kx_dpram_sclk +~~~~~~~~~~~~~~~~~ + +Generic single clocked dual port ram. diff --git a/pythondata_cpu_mor1kx/verilog/mor1kx.core b/pythondata_cpu_mor1kx/verilog/mor1kx.core new file mode 100644 index 0000000..e935ed6 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/mor1kx.core @@ -0,0 +1,97 @@ +CAPI=2: + +name : ::mor1kx:5.0-r3 +#description : mor1kx - an OpenRISC processor IP core + +filesets: + core: + files: + - rtl/verilog/mor1kx-defines.v : {is_include_file : true} + - rtl/verilog/mor1kx-sprs.v : {is_include_file : true} + - rtl/verilog/mor1kx_utils.vh : {is_include_file : true} + - rtl/verilog/mor1kx_branch_predictor_gshare.v + - rtl/verilog/mor1kx_branch_predictor_simple.v + - rtl/verilog/mor1kx_branch_predictor_saturation_counter.v + - rtl/verilog/mor1kx_branch_prediction.v + - rtl/verilog/mor1kx_bus_if_wb32.v + - rtl/verilog/mor1kx_cache_lru.v + - rtl/verilog/mor1kx_cfgrs.v + - rtl/verilog/mor1kx_cpu_cappuccino.v + - rtl/verilog/mor1kx_cpu_espresso.v + - rtl/verilog/mor1kx_cpu_prontoespresso.v + - rtl/verilog/mor1kx_cpu.v + - rtl/verilog/mor1kx_ctrl_cappuccino.v + - rtl/verilog/mor1kx_ctrl_espresso.v + - rtl/verilog/mor1kx_ctrl_prontoespresso.v + - rtl/verilog/mor1kx_dcache.v + - rtl/verilog/mor1kx_decode_execute_cappuccino.v + - rtl/verilog/mor1kx_decode.v + - rtl/verilog/mor1kx_dmmu.v + - rtl/verilog/mor1kx_execute_alu.v + - rtl/verilog/mor1kx_execute_ctrl_cappuccino.v + - rtl/verilog/mor1kx_fetch_cappuccino.v + - rtl/verilog/mor1kx_fetch_espresso.v + - rtl/verilog/mor1kx_fetch_prontoespresso.v + - rtl/verilog/mor1kx_fetch_tcm_prontoespresso.v + - rtl/verilog/mor1kx_icache.v + - rtl/verilog/mor1kx_immu.v + - rtl/verilog/mor1kx_lsu_cappuccino.v + - rtl/verilog/mor1kx_lsu_espresso.v + - rtl/verilog/mor1kx_pcu.v + - rtl/verilog/mor1kx_pic.v + - rtl/verilog/mor1kx_rf_cappuccino.v + - rtl/verilog/mor1kx_rf_espresso.v + - rtl/verilog/mor1kx_simple_dpram_sclk.v + - rtl/verilog/mor1kx_store_buffer.v + - rtl/verilog/mor1kx_ticktimer.v + - rtl/verilog/mor1kx_true_dpram_sclk.v + - rtl/verilog/mor1kx.v + - rtl/verilog/mor1kx_wb_mux_cappuccino.v + - rtl/verilog/mor1kx_wb_mux_espresso.v + file_type : verilogSource + + fpu: + files: + - rtl/verilog/pfpu32/pfpu32_addsub.v + - rtl/verilog/pfpu32/pfpu32_cmp.v + - rtl/verilog/pfpu32/pfpu32_f2i.v + - rtl/verilog/pfpu32/pfpu32_i2f.v + - rtl/verilog/pfpu32/pfpu32_muldiv.v + - rtl/verilog/pfpu32/pfpu32_rnd.v + - rtl/verilog/pfpu32/pfpu32_top.v + file_type : verilogSource + + monitor: + files : [bench/verilog/mor1kx_monitor.v] + file_type : verilogSource + +parameters: + trace_enable: + datatype : bool + description : Enable mor1kx instruction trace + paramtype : plusarg + + trace_to_screen: + datatype : bool + description : Output mor1kx instruction trace to screen + paramtype : plusarg + +targets: + default: + filesets: + - core + - fpu + - "tool_icarus? (monitor)" + - "tool_isim? (monitor)" + - "tool_modelsim? (monitor)" + - "tool_rivierapro? (monitor)" + - "tool_xsim? (monitor)" + parameters: [trace_enable, trace_to_screen] + + synth: + default_tool : icestorm + filesets : [core , fpu] + tools: + icestorm: + pnr: none + toplevel : mor1kx diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx-defines.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx-defines.v new file mode 100644 index 0000000..265e49f --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx-defines.v @@ -0,0 +1,286 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx defines + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +/* ORBIS32 opcodes - top 6 bits */ + +`define OR1K_INSN_WIDTH 32 + +`define OR1K_RD_SELECT 25:21 +`define OR1K_RA_SELECT 20:16 +`define OR1K_RB_SELECT 15:11 + +`define OR1K_IMM_WIDTH 16 +`define OR1K_IMM_SELECT 15:0 + +`define OR1K_ALU_OPC_WIDTH 4 +`define OR1K_ALU_OPC_SELECT 3:0 + +`define OR1K_ALU_OPC_ADD `OR1K_ALU_OPC_WIDTH'h0 +`define OR1K_ALU_OPC_ADDC `OR1K_ALU_OPC_WIDTH'h1 +`define OR1K_ALU_OPC_SUB `OR1K_ALU_OPC_WIDTH'h2 +`define OR1K_ALU_OPC_AND `OR1K_ALU_OPC_WIDTH'h3 +`define OR1K_ALU_OPC_OR `OR1K_ALU_OPC_WIDTH'h4 +`define OR1K_ALU_OPC_XOR `OR1K_ALU_OPC_WIDTH'h5 +`define OR1K_ALU_OPC_MUL `OR1K_ALU_OPC_WIDTH'h6 +`define OR1K_ALU_OPC_RESV `OR1K_ALU_OPC_WIDTH'h7 +`define OR1K_ALU_OPC_SHRT `OR1K_ALU_OPC_WIDTH'h8 +`define OR1K_ALU_OPC_DIV `OR1K_ALU_OPC_WIDTH'h9 +`define OR1K_ALU_OPC_DIVU `OR1K_ALU_OPC_WIDTH'ha +`define OR1K_ALU_OPC_MULU `OR1K_ALU_OPC_WIDTH'hb +`define OR1K_ALU_OPC_EXTBH `OR1K_ALU_OPC_WIDTH'hc +`define OR1K_ALU_OPC_EXTW `OR1K_ALU_OPC_WIDTH'hd +`define OR1K_ALU_OPC_CMOV `OR1K_ALU_OPC_WIDTH'he +`define OR1K_ALU_OPC_FFL1 `OR1K_ALU_OPC_WIDTH'hf + +`define OR1K_ALU_OPC_SECONDARY_WIDTH 3 +`define OR1K_ALU_OPC_SECONDARY_SELECT 8:6 + +`define OR1K_ALU_OPC_SECONDARY_SHRT_SLL `OR1K_ALU_OPC_SECONDARY_WIDTH'h0 +`define OR1K_ALU_OPC_SECONDARY_SHRT_SRL `OR1K_ALU_OPC_SECONDARY_WIDTH'h1 +`define OR1K_ALU_OPC_SECONDARY_SHRT_SRA `OR1K_ALU_OPC_SECONDARY_WIDTH'h2 +`define OR1K_ALU_OPC_SECONDARY_SHRT_ROR `OR1K_ALU_OPC_SECONDARY_WIDTH'h3 + +`define OR1K_ALU_OPC_SECONDARY_EXTBH_EXTHS `OR1K_ALU_OPC_SECONDARY_WIDTH'h0 +`define OR1K_ALU_OPC_SECONDARY_EXTW_EXTWS `OR1K_ALU_OPC_SECONDARY_WIDTH'h0 +`define OR1K_ALU_OPC_SECONDARY_EXTBH_EXTBS `OR1K_ALU_OPC_SECONDARY_WIDTH'h1 +`define OR1K_ALU_OPC_SECONDARY_EXTW_EXTWZ `OR1K_ALU_OPC_SECONDARY_WIDTH'h1 +`define OR1K_ALU_OPC_SECONDARY_EXTBH_EXTHZ `OR1K_ALU_OPC_SECONDARY_WIDTH'h2 +`define OR1K_ALU_OPC_SECONDARY_EXTBH_EXTBZ `OR1K_ALU_OPC_SECONDARY_WIDTH'h3 + +`define OR1K_COMP_OPC_WIDTH 4 +`define OR1K_COMP_OPC_SELECT 24:21 +`define OR1K_COMP_OPC_EQ `OR1K_COMP_OPC_WIDTH'h0 +`define OR1K_COMP_OPC_NE `OR1K_COMP_OPC_WIDTH'h1 +`define OR1K_COMP_OPC_GTU `OR1K_COMP_OPC_WIDTH'h2 +`define OR1K_COMP_OPC_GEU `OR1K_COMP_OPC_WIDTH'h3 +`define OR1K_COMP_OPC_LTU `OR1K_COMP_OPC_WIDTH'h4 +`define OR1K_COMP_OPC_LEU `OR1K_COMP_OPC_WIDTH'h5 +`define OR1K_COMP_OPC_GTS `OR1K_COMP_OPC_WIDTH'ha +`define OR1K_COMP_OPC_GES `OR1K_COMP_OPC_WIDTH'hb +`define OR1K_COMP_OPC_LTS `OR1K_COMP_OPC_WIDTH'hc +`define OR1K_COMP_OPC_LES `OR1K_COMP_OPC_WIDTH'hd + +`define OR1K_JUMPBRANCH_IMMEDIATE_SELECT 25:0 + +`define OR1K_SYSTRAPSYNC_OPC_WIDTH 3 +`define OR1K_SYSTRAPSYNC_OPC_SELECT 25:23 +`define OR1K_SYSTRAPSYNC_OPC_SYSCALL `OR1K_SYSTRAPSYNC_OPC_WIDTH'h0 +`define OR1K_SYSTRAPSYNC_OPC_TRAP `OR1K_SYSTRAPSYNC_OPC_WIDTH'h2 +`define OR1K_SYSTRAPSYNC_OPC_MSYNC `OR1K_SYSTRAPSYNC_OPC_WIDTH'h4 +`define OR1K_SYSTRAPSYNC_OPC_PSYNC `OR1K_SYSTRAPSYNC_OPC_WIDTH'h5 +`define OR1K_SYSTRAPSYNC_OPC_CSYNC `OR1K_SYSTRAPSYNC_OPC_WIDTH'h6 + +`define OR1K_OPCODE_WIDTH 6 +`define OR1K_OPCODE_SELECT 31:26 + +`define OR1K_OPCODE_J {2'b00, 4'h0} +`define OR1K_OPCODE_JAL {2'b00, 4'h1} +`define OR1K_OPCODE_BNF {2'b00, 4'h3} +`define OR1K_OPCODE_BF {2'b00, 4'h4} +`define OR1K_OPCODE_NOP {2'b00, 4'h5} +`define OR1K_OPCODE_MOVHI {2'b00, 4'h6} +`define OR1K_OPCODE_MACRC {2'b00, 4'h6} + +/* +`define OR1K_OPCODE_SYS {2'b00, 4'h8} +`define OR1K_OPCODE_TRAP {2'b00, 4'h8} +`define OR1K_OPCODE_MSYNC {2'b00, 4'h8} +`define OR1K_OPCODE_PSYNC {2'b00, 4'h8} +`define OR1K_OPCODE_CSYNC {2'b00, 4'h8} + */ +`define OR1K_OPCODE_SYSTRAPSYNC {2'b00, 4'h8} +`define OR1K_OPCODE_RFE {2'b00, 4'h9} + +`define OR1K_OPCODE_JR {2'b01, 4'h1} +`define OR1K_OPCODE_JALR {2'b01, 4'h2} +`define OR1K_OPCODE_MACI {2'b01, 4'h3} +`define OR1K_OPCODE_LWA {2'b01, 4'hB} +`define OR1K_OPCODE_CUST1 {2'b01, 4'hC} +`define OR1K_OPCODE_CUST2 {2'b01, 4'hD} +`define OR1K_OPCODE_CUST3 {2'b01, 4'hE} +`define OR1K_OPCODE_CUST4 {2'b01, 4'hF} + +`define OR1K_OPCODE_LD {2'b10, 4'h0} +`define OR1K_OPCODE_LWZ {2'b10, 4'h1} +`define OR1K_OPCODE_LWS {2'b10, 4'h2} +`define OR1K_OPCODE_LBZ {2'b10, 4'h3} +`define OR1K_OPCODE_LBS {2'b10, 4'h4} +`define OR1K_OPCODE_LHZ {2'b10, 4'h5} +`define OR1K_OPCODE_LHS {2'b10, 4'h6} + +`define OR1K_OPCODE_ADDI {2'b10, 4'h7} +`define OR1K_OPCODE_ADDIC {2'b10, 4'h8} +`define OR1K_OPCODE_ANDI {2'b10, 4'h9} +`define OR1K_OPCODE_ORI {2'b10, 4'hA} +`define OR1K_OPCODE_XORI {2'b10, 4'hB} +`define OR1K_OPCODE_MULI {2'b10, 4'hC} +`define OR1K_OPCODE_MFSPR {2'b10, 4'hD} +/* +`define OR1K_OPCODE_SLLI {2'b10, 4'hE} +`define OR1K_OPCODE_SRLI {2'b10, 4'hE} +`define OR1K_OPCODE_SRAI {2'b10, 4'hE} +`define OR1K_OPCODE_RORI {2'b10, 4'hE} +*/ +`define OR1K_OPCODE_SHRTI {2'b10, 4'hE} + +/* +`define OR1K_OPCODE_SFEQI {2'b10, 4'hF} +`define OR1K_OPCODE_SFNEI {2'b10, 4'hF} +`define OR1K_OPCODE_SFGTUI {2'b10, 4'hF} +`define OR1K_OPCODE_SFGEUI {2'b10, 4'hF} +`define OR1K_OPCODE_SFLTUI {2'b10, 4'hF} +`define OR1K_OPCODE_SFLEUI {2'b10, 4'hF} +`define OR1K_OPCODE_SFGTSI {2'b10, 4'hF} +`define OR1K_OPCODE_SFGESI {2'b10, 4'hF} +`define OR1K_OPCODE_SFLTSI {2'b10, 4'hF} +`define OR1K_OPCODE_SFLESI {2'b10, 4'hF} +*/ +`define OR1K_OPCODE_SFIMM {2'b10, 4'hF} + +`define OR1K_OPCODE_MTSPR {2'b11, 4'h0} +`define OR1K_OPCODE_MAC {2'b11, 4'h1} +`define OR1K_OPCODE_MSB {2'b11, 4'h1} + +`define OR1K_OPCODE_SWA {2'b11, 4'h3} +`define OR1K_OPCODE_SD {2'b11, 4'h4} +`define OR1K_OPCODE_SW {2'b11, 4'h5} +`define OR1K_OPCODE_SB {2'b11, 4'h6} +`define OR1K_OPCODE_SH {2'b11, 4'h7} + +/* +`define OR1K_OPCODE_ADD {2'b11, 4'h8} +`define OR1K_OPCODE_ADDC {2'b11, 4'h8} +`define OR1K_OPCODE_SUB {2'b11, 4'h8} +`define OR1K_OPCODE_AND {2'b11, 4'h8} +`define OR1K_OPCODE_OR {2'b11, 4'h8} +`define OR1K_OPCODE_XOR {2'b11, 4'h8} +`define OR1K_OPCODE_MUL {2'b11, 4'h8} + +`define OR1K_OPCODE_SLL {2'b11, 4'h8} +`define OR1K_OPCODE_SRL {2'b11, 4'h8} +`define OR1K_OPCODE_SRA {2'b11, 4'h8} +`define OR1K_OPCODE_ROR {2'b11, 4'h8} +`define OR1K_OPCODE_DIV {2'b11, 4'h8} +`define OR1K_OPCODE_DIVU {2'b11, 4'h8} +`define OR1K_OPCODE_MULU {2'b11, 4'h8} +`define OR1K_OPCODE_EXTBS {2'b11, 4'h8} +`define OR1K_OPCODE_EXTHS {2'b11, 4'h8} +`define OR1K_OPCODE_EXTWS {2'b11, 4'h8} +`define OR1K_OPCODE_EXTBZ {2'b11, 4'h8} +`define OR1K_OPCODE_EXTHZ {2'b11, 4'h8} +`define OR1K_OPCODE_EXTWZ {2'b11, 4'h8} +`define OR1K_OPCODE_CMOV {2'b11, 4'h8} +`define OR1K_OPCODE_FF1 {2'b11, 4'h8} +`define OR1K_OPCODE_FL1 {2'b11, 4'h8} +*/ +`define OR1K_OPCODE_ALU {2'b11, 4'h8} + +/* +`define OR1K_OPCODE_SFEQ {2'b11, 4'h9} +`define OR1K_OPCODE_SFNE {2'b11, 4'h9} +`define OR1K_OPCODE_SFGTU {2'b11, 4'h9} +`define OR1K_OPCODE_SFGEU {2'b11, 4'h9} +`define OR1K_OPCODE_SFLTU {2'b11, 4'h9} +`define OR1K_OPCODE_SFLEU {2'b11, 4'h9} +`define OR1K_OPCODE_SFGTS {2'b11, 4'h9} +`define OR1K_OPCODE_SFGES {2'b11, 4'h9} +`define OR1K_OPCODE_SFLTS {2'b11, 4'h9} +`define OR1K_OPCODE_SFLES {2'b11, 4'h9} +*/ +`define OR1K_OPCODE_SF {2'b11, 4'h9} + +`define OR1K_OPCODE_CUST5 {2'b11, 4'hC} +`define OR1K_OPCODE_CUST6 {2'b11, 4'hD} +`define OR1K_OPCODE_CUST7 {2'b11, 4'hE} +`define OR1K_OPCODE_CUST8 {2'b11, 4'hF} + +// +// ORFPX32 opcodes +// +`define OR1K_OPCODE_FPU {2'b11, 4'h2} +// FP OPs +// MSbit indicates FPU operation valid +`define OR1K_FPUOP_WIDTH 8 +`define OR1K_FPUOP_SELECT 7:0 +// Select bits for Ordered/Unordered comparison +`define OR1K_FPUOP_GENERIC_CMP_WIDTH 3 +`define OR1K_FPUOP_GENERIC_CMP_SELECT 2:0 +// Unordered comparison bit +`define OR1K_FPUOP_UNORDERED_CMP_BIT 5 +// FP instruction is double precision if bit 4 is set. We're a 32-bit +// implementation thus do not support double precision FP +`define OR1K_FPUOP_DOUBLE_BIT 4 +// FP Arithmetic OPCs +`define OR1K_FPUOP_ADD 8'b0000_0000 +`define OR1K_FPUOP_SUB 8'b0000_0001 +`define OR1K_FPUOP_MUL 8'b0000_0010 +`define OR1K_FPUOP_DIV 8'b0000_0011 +`define OR1K_FPUOP_ITOF 8'b0000_0100 +`define OR1K_FPUOP_FTOI 8'b0000_0101 +`define OR1K_FPUOP_REM 8'b0000_0110 +`define OR1K_FPUOP_RESERVED 8'b0000_0111 +// FP Ordered Comparison OPCs +`define OR1K_FPCOP_SFEQ 8'b0000_1000 +`define OR1K_FPCOP_SFNE 8'b0000_1001 +`define OR1K_FPCOP_SFGT 8'b0000_1010 +`define OR1K_FPCOP_SFGE 8'b0000_1011 +`define OR1K_FPCOP_SFLT 8'b0000_1100 +`define OR1K_FPCOP_SFLE 8'b0000_1101 +// FP Unordered Comparison OPCs +`define OR1K_FPCOP_SFUEQ 8'b0010_1000 +`define OR1K_FPCOP_SFUNE 8'b0010_1001 +`define OR1K_FPCOP_SFUGT 8'b0010_1010 +`define OR1K_FPCOP_SFUGE 8'b0010_1011 +`define OR1K_FPCOP_SFULT 8'b0010_1100 +`define OR1K_FPCOP_SFULE 8'b0010_1101 +`define OR1K_FPCOP_SFUN 8'b0010_1110 + + +// +// OR1K SPR defines +// +`include "mor1kx-sprs.v" + +/* Exception addresses */ +`define OR1K_RESET_VECTOR 5'h01 +`define OR1K_BERR_VECTOR 5'h02 +`define OR1K_DPF_VECTOR 5'h03 +`define OR1K_IPF_VECTOR 5'h04 +`define OR1K_TT_VECTOR 5'h05 +`define OR1K_ALIGN_VECTOR 5'h06 +`define OR1K_ILLEGAL_VECTOR 5'h07 +`define OR1K_INT_VECTOR 5'h08 +`define OR1K_DTLB_VECTOR 5'h09 +`define OR1K_ITLB_VECTOR 5'h0a +`define OR1K_RANGE_VECTOR 5'h0b +`define OR1K_SYSCALL_VECTOR 5'h0c +`define OR1K_FP_VECTOR 5'h0d +`define OR1K_TRAP_VECTOR 5'h0e + +// Whether we'll allow things using AYNC reset to have it: +//`define OR_ASYNC_RST or posedge rst +`define OR_ASYNC_RST + +// Implementation version defines +`define MOR1KX_CPUID 8'h01 +// mor1kx breaks up the VR2 version register to be 3 8-bit fields +// MSB is major version, middle byte is minor version number +// and final byte is the pipeline identifier. +`define MOR1KX_VERSION_MAJOR 8'd5 +`define MOR1KX_VERSION_MINOR 8'd0 + +// mor1kx implementation-specific register definitions +`define MOR1KX_PIPEID_CAPPUCCINO 8'd1 +`define MOR1KX_PIPEID_ESPRESSO 8'd2 +`define MOR1KX_PIPEID_PRONTOESPRESSO 8'd3 diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx-sprs.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx-sprs.v new file mode 100644 index 0000000..8beff23 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx-sprs.v @@ -0,0 +1,376 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: SPR definitions + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ +`define SPR_BASE(x) (x/(2**11)) +`define SPR_OFFSET(x) (x%(2**11)) + +// +// Addresses +// +`define OR1K_SPR_SYS_BASE {4'd0} +`define OR1K_SPR_VR_ADDR {5'd0,11'd0} +`define OR1K_SPR_UPR_ADDR {5'd0,11'd1} +`define OR1K_SPR_CPUCFGR_ADDR {5'd0,11'd2} +`define OR1K_SPR_DMMUCFGR_ADDR {5'd0,11'd3} +`define OR1K_SPR_IMMUCFGR_ADDR {5'd0,11'd4} +`define OR1K_SPR_DCCFGR_ADDR {5'd0,11'd5} +`define OR1K_SPR_ICCFGR_ADDR {5'd0,11'd6} +`define OR1K_SPR_DCFGR_ADDR {5'd0,11'd7} +`define OR1K_SPR_PCCFGR_ADDR {5'd0,11'd8} +`define OR1K_SPR_VR2_ADDR {5'd0,11'd9} +`define OR1K_SPR_AVR_ADDR {5'd0,11'd10} +`define OR1K_SPR_EVBAR_ADDR {5'd0,11'd11} +`define OR1K_SPR_AECR_ADDR {5'd0,11'd12} +`define OR1K_SPR_AESR_ADDR {5'd0,11'd13} +`define OR1K_SPR_NPC_ADDR {5'd0,11'd16} +`define OR1K_SPR_SR_ADDR {5'd0,11'd17} +`define OR1K_SPR_PPC_ADDR {5'd0,11'd18} +`define OR1K_SPR_FPCSR_ADDR {5'd0,11'd20} +`define OR1K_SPR_ISR0_ADDR {5'd0,11'd21} +`define OR1K_SPR_EPCR0_ADDR {5'd0,11'd32} +`define OR1K_SPR_EEAR0_ADDR {5'd0,11'd48} +`define OR1K_SPR_ESR0_ADDR {5'd0,11'd64} +`define OR1K_SPR_COREID_ADDR {5'd0,11'd128} +`define OR1K_SPR_NUMCORES_ADDR {5'd0,11'd129} +`define OR1K_SPR_GPR0_ADDR {5'd0,11'd1024} + +`define OR1K_SPR_DMMU_BASE {4'd1} +`define OR1K_SPR_DMMUCR_ADDR {5'd1,11'd0} +`define OR1K_SPR_DMMUPR_ADDR {5'd1,11'd1} +`define OR1K_SPR_DTLBEIR_ADDR {5'd1,11'd2} +`define OR1K_SPR_DATBMR0_ADDR {5'd1,11'd4} +`define OR1K_SPR_DATBTR0_ADDR {5'd1,11'd8} +`define OR1K_SPR_DTLBW0MR0_ADDR {5'd1,11'd512} +`define OR1K_SPR_DTLBW0TR0_ADDR {5'd1,11'd640} +`define OR1K_SPR_DTLBW1MR0_ADDR {5'd1,11'd768} +`define OR1K_SPR_DTLBW1TR0_ADDR {5'd1,11'd896} +`define OR1K_SPR_DTLBW2MR0_ADDR {5'd1,11'd1024} +`define OR1K_SPR_DTLBW2TR0_ADDR {5'd1,11'd1152} +`define OR1K_SPR_DTLBW3MR0_ADDR {5'd1,11'd1280} +`define OR1K_SPR_DTLBW3TR0_ADDR {5'd1,11'd1408} + +`define OR1K_SPR_IMMU_BASE {4'd2} +`define OR1K_SPR_IMMUCR_ADDR {5'd2,11'd0} +`define OR1K_SPR_IMMUPR_ADDR {5'd2,11'd1} +`define OR1K_SPR_ITLBEIR_ADDR {5'd2,11'd2} +`define OR1K_SPR_IATBMR0_ADDR {5'd2,11'd4} +`define OR1K_SPR_IATBTR0_ADDR {5'd2,11'd8} +`define OR1K_SPR_ITLBW0MR0_ADDR {5'd2,11'd512} +`define OR1K_SPR_ITLBW0TR0_ADDR {5'd2,11'd640} +`define OR1K_SPR_ITLBW1MR0_ADDR {5'd2,11'd768} +`define OR1K_SPR_ITLBW1TR0_ADDR {5'd2,11'd896} +`define OR1K_SPR_ITLBW2MR0_ADDR {5'd2,11'd1024} +`define OR1K_SPR_ITLBW2TR0_ADDR {5'd2,11'd1152} +`define OR1K_SPR_ITLBW3MR0_ADDR {5'd2,11'd1280} +`define OR1K_SPR_ITLBW3TR0_ADDR {5'd2,11'd1408} + +`define OR1K_SPR_DC_BASE {4'd3} +`define OR1K_SPR_DCCR_ADDR {5'd3,11'd0} +`define OR1K_SPR_DCBPR_ADDR {5'd3,11'd1} +`define OR1K_SPR_DCBFR_ADDR {5'd3,11'd2} +`define OR1K_SPR_DCBIR_ADDR {5'd3,11'd3} +`define OR1K_SPR_DCBWR_ADDR {5'd3,11'd4} +`define OR1K_SPR_DCBLR_ADDR {5'd3,11'd5} + +`define OR1K_SPR_IC_BASE {4'd4} +`define OR1K_SPR_ICCR_ADDR {5'd4,11'd0} +`define OR1K_SPR_ICBPR_ADDR {5'd4,11'd1} +`define OR1K_SPR_ICBIR_ADDR {5'd4,11'd2} +`define OR1K_SPR_ICBLR_ADDR {5'd4,11'd3} + +`define OR1K_SPR_MAC_BASE {4'd5} +`define OR1K_SPR_MACLO_ADDR {5'd5,11'd1} +`define OR1K_SPR_MACHI_ADDR {5'd5,11'd2} + +`define OR1K_SPR_DU_BASE {4'd6} +`define OR1K_SPR_DVR0_ADDR {5'd6,11'd0} +`define OR1K_SPR_DCR0_ADDR {5'd6,11'd8} +`define OR1K_SPR_DMR1_ADDR {5'd6,11'd16} +`define OR1K_SPR_DMR2_ADDR {5'd6,11'd17} +`define OR1K_SPR_DCWR0_ADDR {5'd6,11'd18} +`define OR1K_SPR_DSR_ADDR {5'd6,11'd20} +`define OR1K_SPR_DRR_ADDR {5'd6,11'd21} + +`define OR1K_SPR_PC_BASE {4'd7} +`define OR1K_SPR_PCCR0_ADDR {5'd7,11'd0} +`define OR1K_SPR_PCCR1_ADDR {5'd7,11'd1} +`define OR1K_SPR_PCCR2_ADDR {5'd7,11'd2} +`define OR1K_SPR_PCCR3_ADDR {5'd7,11'd3} +`define OR1K_SPR_PCCR4_ADDR {5'd7,11'd4} +`define OR1K_SPR_PCCR5_ADDR {5'd7,11'd5} +`define OR1K_SPR_PCCR6_ADDR {5'd7,11'd6} +`define OR1K_SPR_PCCR7_ADDR {5'd7,11'd7} +`define OR1K_SPR_PCMR0_ADDR {5'd7,11'd8} +`define OR1K_SPR_PCMR1_ADDR {5'd7,11'd9} +`define OR1K_SPR_PCMR2_ADDR {5'd7,11'd10} +`define OR1K_SPR_PCMR3_ADDR {5'd7,11'd11} +`define OR1K_SPR_PCMR4_ADDR {5'd7,11'd12} +`define OR1K_SPR_PCMR5_ADDR {5'd7,11'd13} +`define OR1K_SPR_PCMR6_ADDR {5'd7,11'd14} +`define OR1K_SPR_PCMR7_ADDR {5'd7,11'd15} + +`define OR1K_SPR_PM_BASE {4'd8} +`define OR1K_SPR_PMR_ADDR {5'd8,11'd0} + +`define OR1K_SPR_PIC_BASE {4'd9} +`define OR1K_SPR_PICMR_ADDR {5'd9,11'd0} +`define OR1K_SPR_PICSR_ADDR {5'd9,11'd2} + +`define OR1K_SPR_TT_BASE {4'd10} +`define OR1K_SPR_TTMR_ADDR {5'd10,11'd0} +`define OR1K_SPR_TTCR_ADDR {5'd10,11'd1} + +`define OR1K_SPR_FPU_BASE {4'd11} + +// +// Register bit defines +// + +// Supervision Register +`define OR1K_SPR_SR_SM 0 /* Supervisor mode */ +`define OR1K_SPR_SR_TEE 1 /* Timer exception enable */ +`define OR1K_SPR_SR_IEE 2 /* Interrupt exception enable */ +`define OR1K_SPR_SR_DCE 3 /* Data cache enable */ +`define OR1K_SPR_SR_ICE 4 /* Instruction cache enable */ +`define OR1K_SPR_SR_DME 5 /* Data MMU enable */ +`define OR1K_SPR_SR_IME 6 /* Instruction MMU enable */ +`define OR1K_SPR_SR_LEE 7 /* Little-endian enable */ +`define OR1K_SPR_SR_CE 8 /* CID enable */ +`define OR1K_SPR_SR_F 9 /* Flag */ +`define OR1K_SPR_SR_CY 10 /* Carry flag */ +`define OR1K_SPR_SR_OV 11 /* Overflow flag */ +`define OR1K_SPR_SR_OVE 12 /* Overflow exception enable */ +`define OR1K_SPR_SR_DSX 13 /* Delay slot exception */ +`define OR1K_SPR_SR_EPH 14 /* Exception prefix high */ +`define OR1K_SPR_SR_FO 15 /* Fixed to one */ +`define OR1K_SPR_SR_SUMRA 16 /* SPR user read mode access */ +`define OR1K_SPR_SR_RESERVED 27:17 /* Reserved */ +`define OR1K_SPR_SR_CID 31:28 /* Context ID */ + +// Version register - DEPRECATED +`define OR1K_SPR_VR_REV 5:0 /* Revision */ +`define OR1K_SPR_VR_UVRP 6 /* Updated Version Registers Present */ +`define OR1K_SPR_VR_RESERVED 15:7 /* Reserved */ +`define OR1K_SPR_VR_CFG 23:16 /* Configuration Template */ +`define OR1K_SPR_VR_VER 31:24 /* Version */ + + +// Unit Present register +`define OR1K_SPR_UPR_UP 0 +`define OR1K_SPR_UPR_DCP 1 +`define OR1K_SPR_UPR_ICP 2 +`define OR1K_SPR_UPR_DMP 3 +`define OR1K_SPR_UPR_IMP 4 +`define OR1K_SPR_UPR_MP 5 +`define OR1K_SPR_UPR_DUP 6 +`define OR1K_SPR_UPR_PCUP 7 +`define OR1K_SPR_UPR_PICP 8 +`define OR1K_SPR_UPR_PMP 9 +`define OR1K_SPR_UPR_TTP 10 +`define OR1K_SPR_UPR_RESERVED 23:11 +`define OR1K_SPR_UPR_CUP 31:24 + +// CPU Configuration register +`define OR1K_SPR_CPUCFGR_NSGF 3:0 /* Number of shadow GPRs */ +`define OR1K_SPR_CPUCFGR_CFG 4 +`define OR1K_SPR_CPUCFGR_OB32S 5 +`define OR1K_SPR_CPUCFGR_OB64S 6 +`define OR1K_SPR_CPUCFGR_OF32S 7 +`define OR1K_SPR_CPUCFGR_OF64S 8 +`define OR1K_SPR_CPUCFGR_OV64S 9 +`define OR1K_SPR_CPUCFGR_ND 10 /* No delay-slot implementation */ +`define OR1K_SPR_CPUCFGR_AVRP 11 /* Arch. version registers */ +`define OR1K_SPR_CPUCFGR_EVBARP 12 /* Exception vector base addr reg */ +`define OR1K_SPR_CPUCFGR_ISRP 13 /* Implementation specific regs */ +`define OR1K_SPR_CPUCFGR_AECSRP 14 /* Arith. exception regs */ +`define OR1K_SPR_CPUCFGR_RESERVED 31:15 + +// Version register 2 (new with OR1K 1.0) +`define OR1K_SPR_VR2_VER 23:0 +`define OR1K_SPR_VR2_CPUID 31:24 + +// Architecture Version register +`define OR1K_SPR_AVR_RESERVED 7:0 +`define OR1K_SPR_AVR_REV 15:8 +`define OR1K_SPR_AVR_MIN 23:16 +`define OR1K_SPR_AVR_MAJ 31:24 + +// Exception Vector Base Address register +`define OR1K_SPR_EVBAR_RESERVED 12:0 +`define OR1K_SPR_EVBAR_EVBA 31:13 + +// Arithmetic Exception Control register +`define OR1K_SPR_AECR_CYADDE 0 +`define OR1K_SPR_AECR_OVADDE 1 +`define OR1K_SPR_AECR_CYMULE 2 +`define OR1K_SPR_AECR_OVMULE 3 +`define OR1K_SPR_AECR_DBZE 4 +`define OR1K_SPR_AECR_CYMACADDE 5 +`define OR1K_SPR_AECR_OVMACADDE 6 +`define OR1K_SPR_AECR_RESERVED 31:7 + +// Arithmetic Exception Status register +`define OR1K_SPR_AESR_CYADDE 0 +`define OR1K_SPR_AESR_OVADDE 1 +`define OR1K_SPR_AESR_CYMULE 2 +`define OR1K_SPR_AESR_OVMULE 3 +`define OR1K_SPR_AESR_DBZE 4 +`define OR1K_SPR_AESR_CYMACADDE 5 +`define OR1K_SPR_AESR_OVMACADDE 6 +`define OR1K_SPR_AESR_RESERVED 31:7 + +// Tick timer registers +`define OR1K_SPR_TTMR_TP 27:0 /* Time period */ +`define OR1K_SPR_TTMR_IP 28 /* Interrupt pending */ +`define OR1K_SPR_TTMR_IE 29 /* Interrupt enable */ +`define OR1K_SPR_TTMR_M 31:30 /* Mode */ +// Tick timer mode values +`define OR1K_SPR_TTMR_M_DIS 2'b00 /* Disabled */ +`define OR1K_SPR_TTMR_M_RST 2'b01 /* Restart-on-match mode */ +`define OR1K_SPR_TTMR_M_STP 2'b10 /* Stop-on-match mode */ +`define OR1K_SPR_TTMR_M_CNT 2'b11 /* Continue counting mode */ + +// Data Cache Configuration register +`define OR1K_SPR_DCCFGR_NCW 2:0 /* Number of Cache Ways */ +`define OR1K_SPR_DCCFGR_NCS 6:3 /* Number of Cache Sets */ +`define OR1K_SPR_DCCFGR_CBS 7 /* Cache Block Size */ +`define OR1K_SPR_DCCFGR_CWS 8 /* Cache Write Strategy */ +`define OR1K_SPR_DCCFGR_CCRI 9 /* Cache Control Register Implemented */ +`define OR1K_SPR_DCCFGR_CBIRI 10 /* Cache Block Invalidate Register Implemented */ +`define OR1K_SPR_DCCFGR_CBPRI 11 /* Cache Block Prefetch Register Implemented */ +`define OR1K_SPR_DCCFGR_CBLRI 12 /* Cache Block Lock Register Implemented */ +`define OR1K_SPR_DCCFGR_CBFRI 13 /* Cache Block Flush Register Implemented */ +`define OR1K_SPR_DCCFGR_CBWBRI 14 /* Cache Block Write-Back Register Implemented */ + +// Instruction Cache Configuration register +`define OR1K_SPR_ICCFGR_NCW 2:0 /* Number of Cache Ways */ +`define OR1K_SPR_ICCFGR_NCS 6:3 /* Number of Cache Sets */ +`define OR1K_SPR_ICCFGR_CBS 7 /* Cache Block Size */ +`define OR1K_SPR_ICCFGR_CCRI 9 /* Cache Control Register Implemented */ +`define OR1K_SPR_ICCFGR_CBIRI 10 /* Cache Block Invalidate Register Implemented */ +`define OR1K_SPR_ICCFGR_CBPRI 11 /* Cache Block Prefetch Register Implemented */ +`define OR1K_SPR_ICCFGR_CBLRI 12 /* Cache Block Lock Register Implemented */ + +// Data MMU Configuration register +`define OR1K_SPR_DMMUFGR_NTW 1:0 /* Number of TLB ways */ +`define OR1K_SPR_DMMUFGR_NTS 4:2 /* Number of TLB sets */ +`define OR1K_SPR_DMMUFGR_NAE 7:5 /* Number of ATB entries */ +`define OR1K_SPR_DMMUFGR_CRI 8 /* Control Register Implemented */ +`define OR1K_SPR_DMMUFGR_PRI 9 /* Protection Register Implemented */ +`define OR1K_SPR_DMMUFGR_TEIRI 10 /* TLB Entry Invalidate Register Implemented */ +`define OR1K_SPR_DMMUFGR_HTR 11 /* Hardware TLB Reload */ + +// Instruction MMU Configuration register +`define OR1K_SPR_IMMUFGR_NTW 1:0 /* Number of TLB ways */ +`define OR1K_SPR_IMMUFGR_NTS 4:2 /* Number of TLB sets */ +`define OR1K_SPR_IMMUFGR_NAE 7:5 /* Number of ATB entries */ +`define OR1K_SPR_IMMUFGR_CRI 8 /* Control Register Implemented */ +`define OR1K_SPR_IMMUFGR_PRI 9 /* Protection Register Implemented */ +`define OR1K_SPR_IMMUFGR_TEIRI 10 /* TLB Entry Invalidate Register Implemented */ +`define OR1K_SPR_IMMUFGR_HTR 11 /* Hardware TLB Reload */ + +// Debug Mode Register 1 +`define OR1K_SPR_DMR1_ST 22 +`define OR1K_SPR_DMR1_BT 23 + +// Debug Stop Register +`define OR1K_SPR_DSR_RSTE 0 +`define OR1K_SPR_DSR_BUSEE 1 +`define OR1K_SPR_DSR_DPFE 2 +`define OR1K_SPR_DSR_IPFE 3 +`define OR1K_SPR_DSR_TTE 4 +`define OR1K_SPR_DSR_AE 5 +`define OR1K_SPR_DSR_IIE 6 +`define OR1K_SPR_DSR_INTE 7 +`define OR1K_SPR_DSR_DME 8 +`define OR1K_SPR_DSR_IME 9 +`define OR1K_SPR_DSR_RE 10 +`define OR1K_SPR_DSR_SCE 11 +`define OR1K_SPR_DSR_FPE 12 +`define OR1K_SPR_DSR_TE 13 + +`define OR1K_SPR_DRR_RSTE 0 +`define OR1K_SPR_DRR_BUSEE 1 +`define OR1K_SPR_DRR_DPFE 2 +`define OR1K_SPR_DRR_IPFE 3 +`define OR1K_SPR_DRR_TTE 4 +`define OR1K_SPR_DRR_AE 5 +`define OR1K_SPR_DRR_IIE 6 +`define OR1K_SPR_DRR_IE 7 +`define OR1K_SPR_DRR_DME 8 +`define OR1K_SPR_DRR_IME 9 +`define OR1K_SPR_DRR_RE 10 +`define OR1K_SPR_DRR_SCE 11 +`define OR1K_SPR_DRR_FPE 12 +`define OR1K_SPR_DRR_TE 13 + +// FPCSR bits +`define OR1K_FPCSR_FPEE 0 +`define OR1K_FPCSR_RM 2:1 +`define OR1K_FPCSR_OVF 3 +`define OR1K_FPCSR_UNF 4 +`define OR1K_FPCSR_SNF 5 +`define OR1K_FPCSR_QNF 6 +`define OR1K_FPCSR_ZF 7 +`define OR1K_FPCSR_IXF 8 +`define OR1K_FPCSR_IVF 9 +`define OR1K_FPCSR_INF 10 +`define OR1K_FPCSR_DZF 11 +// FPCSR sizes of fields +`define OR1K_FPCSR_WIDTH 12 // [11:0] +`define OR1K_FPCSR_RM_SIZE 2 +`define OR1K_FPCSR_ALLF_SIZE 9 // [11:3] +// FPCSR flags +`define OR1K_FPCSR_ALLF `OR1K_FPCSR_DZF:`OR1K_FPCSR_OVF +// FPCSR reset value +`define OR1K_FPCSR_RESET_VALUE `OR1K_FPCSR_WIDTH'd0 +// FPCSR extention: maskable FPU flags. +// -vvvv- uncomment the next line to switch the extention on -vvvv- +//`define OR1K_FPCSR_MASK_FLAGS +// bits +`define OR1K_FPCSR_MASK_OVF 12 +`define OR1K_FPCSR_MASK_UNF 13 +`define OR1K_FPCSR_MASK_SNF 14 +`define OR1K_FPCSR_MASK_QNF 15 +`define OR1K_FPCSR_MASK_ZF 16 +`define OR1K_FPCSR_MASK_IXF 17 +`define OR1K_FPCSR_MASK_IVF 18 +`define OR1K_FPCSR_MASK_INF 19 +`define OR1K_FPCSR_MASK_DZF 20 +// bus select +`define OR1K_FPCSR_MASK_ALL `OR1K_FPCSR_MASK_DZF:`OR1K_FPCSR_MASK_OVF +// reset value. +`define OR1K_FPCSR_MASK_RESET_VALUE `OR1K_FPCSR_ALLF_SIZE'd0 + +// PCU PCMR bits +`define OR1K_PCMR_CP 0 +`define OR1K_PCMR_RSVD_1 1 +`define OR1K_PCMR_CISM 2 +`define OR1K_PCMR_CIUM 3 +`define OR1K_PCMR_LA 4 +`define OR1K_PCMR_SA 5 +`define OR1K_PCMR_IF 6 +`define OR1K_PCMR_DCM 7 +`define OR1K_PCMR_ICM 8 +`define OR1K_PCMR_IFS 9 +`define OR1K_PCMR_LSUS 10 +`define OR1K_PCMR_BS 11 +`define OR1K_PCMR_DTLBM 12 +`define OR1K_PCMR_ITLBM 13 +`define OR1K_PCMR_DDS 14 +`define OR1K_PCMR_WPE 25:15 +`define OR1K_PCMR_RSVD_2 31:26 + +// Implementation-specific SPR defines +`define MOR1KX_SPR_SR_WIDTH 16 +`define MOR1KX_SPR_SR_RESET_VALUE `MOR1KX_SPR_SR_WIDTH'h8001 diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx.v new file mode 100644 index 0000000..2103b8b --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx.v @@ -0,0 +1,473 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx processor top level + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + Stefan Kristiansson + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx + #( + parameter OPTION_OPERAND_WIDTH = 32, + + parameter OPTION_CPU0 = "CAPPUCCINO", + + parameter FEATURE_DATACACHE = "NONE", + parameter OPTION_DCACHE_BLOCK_WIDTH = 5, + parameter OPTION_DCACHE_SET_WIDTH = 9, + parameter OPTION_DCACHE_WAYS = 2, + parameter OPTION_DCACHE_LIMIT_WIDTH = 32, + parameter OPTION_DCACHE_SNOOP = "NONE", + parameter FEATURE_DMMU = "NONE", + parameter FEATURE_DMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_DMMU_SET_WIDTH = 6, + parameter OPTION_DMMU_WAYS = 1, + parameter FEATURE_INSTRUCTIONCACHE = "NONE", + parameter OPTION_ICACHE_BLOCK_WIDTH = 5, + parameter OPTION_ICACHE_SET_WIDTH = 9, + parameter OPTION_ICACHE_WAYS = 2, + parameter OPTION_ICACHE_LIMIT_WIDTH = 32, + parameter FEATURE_IMMU = "NONE", + parameter FEATURE_IMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_IMMU_SET_WIDTH = 6, + parameter OPTION_IMMU_WAYS = 1, + parameter FEATURE_TIMER = "ENABLED", + parameter FEATURE_DEBUGUNIT = "NONE", + parameter FEATURE_PERFCOUNTERS = "NONE", + parameter OPTION_PERFCOUNTERS_NUM = 0, + parameter FEATURE_MAC = "NONE", + + parameter FEATURE_SYSCALL = "ENABLED", + parameter FEATURE_TRAP = "ENABLED", + parameter FEATURE_RANGE = "ENABLED", + + parameter FEATURE_PIC = "ENABLED", + parameter OPTION_PIC_TRIGGER = "LEVEL", + parameter OPTION_PIC_NMI_WIDTH = 0, + + parameter FEATURE_DSX = "ENABLED", + parameter FEATURE_OVERFLOW = "ENABLED", + parameter FEATURE_CARRY_FLAG = "ENABLED", + + parameter FEATURE_FASTCONTEXTS = "NONE", + parameter OPTION_RF_CLEAR_ON_INIT = 0, + parameter OPTION_RF_NUM_SHADOW_GPR = 0, + parameter OPTION_RF_ADDR_WIDTH = 5, + parameter OPTION_RF_WORDS = 32, + + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + + parameter FEATURE_MULTIPLIER = "THREESTAGE", + parameter FEATURE_DIVIDER = "SERIAL", + + parameter FEATURE_ADDC = "ENABLED", + parameter FEATURE_SRA = "ENABLED", + parameter FEATURE_ROR = "NONE", + parameter FEATURE_EXT = "NONE", + parameter FEATURE_CMOV = "ENABLED", + parameter FEATURE_FFL1 = "ENABLED", + parameter FEATURE_ATOMIC = "ENABLED", + + parameter FEATURE_CUST1 = "NONE", + parameter FEATURE_CUST2 = "NONE", + parameter FEATURE_CUST3 = "NONE", + parameter FEATURE_CUST4 = "NONE", + parameter FEATURE_CUST5 = "NONE", + parameter FEATURE_CUST6 = "NONE", + parameter FEATURE_CUST7 = "NONE", + parameter FEATURE_CUST8 = "NONE", + + parameter FEATURE_FPU = "NONE", // ENABLED|NONE: actual for cappuccino pipeline only + parameter OPTION_FTOI_ROUNDING = "CPP", // "CPP" (force toward zero; default) / "IEEE" (by rounding mode bits from FPCSR) + + parameter OPTION_SHIFTER = "BARREL", + + parameter FEATURE_STORE_BUFFER = "ENABLED", + parameter OPTION_STORE_BUFFER_DEPTH_WIDTH = 8, + + parameter FEATURE_MULTICORE = "NONE", + + parameter FEATURE_TRACEPORT_EXEC = "NONE", + parameter FEATURE_BRANCH_PREDICTOR = "SIMPLE", // SIMPLE|SAT_COUNTER|GSHARE + + parameter BUS_IF_TYPE = "WISHBONE32", + + parameter IBUS_WB_TYPE = "B3_READ_BURSTING", + parameter DBUS_WB_TYPE = "CLASSIC" + ) + ( + input clk, + input rst, + + // Wishbone interface + output [31:0] iwbm_adr_o, + output iwbm_stb_o, + output iwbm_cyc_o, + output [3:0] iwbm_sel_o, + output iwbm_we_o, + output [2:0] iwbm_cti_o, + output [1:0] iwbm_bte_o, + output [31:0] iwbm_dat_o, + input iwbm_err_i, + input iwbm_ack_i, + input [31:0] iwbm_dat_i, + input iwbm_rty_i, + + output [31:0] dwbm_adr_o, + output dwbm_stb_o, + output dwbm_cyc_o, + output [3:0] dwbm_sel_o, + output dwbm_we_o, + output [2:0] dwbm_cti_o, + output [1:0] dwbm_bte_o, + output [31:0] dwbm_dat_o, + input dwbm_err_i, + input dwbm_ack_i, + input [31:0] dwbm_dat_i, + input dwbm_rty_i, + + input [31:0] irq_i, + + // Debug interface + input [15:0] du_addr_i, + input du_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] du_dat_i, + input du_we_i, + output [OPTION_OPERAND_WIDTH-1:0] du_dat_o, + output du_ack_o, + // Stall control from debug interface + input du_stall_i, + output du_stall_o, + + output traceport_exec_valid_o, + output [31:0] traceport_exec_pc_o, + output traceport_exec_jb_o, + output traceport_exec_jal_o, + output traceport_exec_jr_o, + output [31:0] traceport_exec_jbtarget_o, + output [`OR1K_INSN_WIDTH-1:0] traceport_exec_insn_o, + output [OPTION_OPERAND_WIDTH-1:0] traceport_exec_wbdata_o, + output [OPTION_RF_ADDR_WIDTH-1:0] traceport_exec_wbreg_o, + output traceport_exec_wben_o, + + // The multicore core identifier + input [OPTION_OPERAND_WIDTH-1:0] multicore_coreid_i, + // The number of cores + input [OPTION_OPERAND_WIDTH-1:0] multicore_numcores_i, + + input [31:0] snoop_adr_i, + input snoop_en_i + ); + + /*AUTOWIRE*/ + // Beginning of automatic wires (for undeclared instantiated-module outputs) + wire [OPTION_OPERAND_WIDTH-1:0] dbus_adr_o; // From mor1kx_cpu of mor1kx_cpu.v + wire [3:0] dbus_bsel_o; // From mor1kx_cpu of mor1kx_cpu.v + wire dbus_burst_o; // From mor1kx_cpu of mor1kx_cpu.v + wire [OPTION_OPERAND_WIDTH-1:0] dbus_dat_o; // From mor1kx_cpu of mor1kx_cpu.v + wire dbus_req_o; // From mor1kx_cpu of mor1kx_cpu.v + wire dbus_we_o; // From mor1kx_cpu of mor1kx_cpu.v + wire [OPTION_OPERAND_WIDTH-1:0] ibus_adr_o; // From mor1kx_cpu of mor1kx_cpu.v + wire ibus_burst_o; // From mor1kx_cpu of mor1kx_cpu.v + wire ibus_req_o; // From mor1kx_cpu of mor1kx_cpu.v + wire [15:0] spr_bus_addr_o; // From mor1kx_cpu of mor1kx_cpu.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o;// From mor1kx_cpu of mor1kx_cpu.v + wire spr_bus_stb_o; // From mor1kx_cpu of mor1kx_cpu.v + wire spr_bus_we_o; // From mor1kx_cpu of mor1kx_cpu.v + wire [15:0] spr_sr_o; // From mor1kx_cpu of mor1kx_cpu.v + // End of automatics + + wire ibus_ack_i; + wire [OPTION_OPERAND_WIDTH-1:0] ibus_dat_i; + wire ibus_err_i; + + wire dbus_ack_i; + wire [OPTION_OPERAND_WIDTH-1:0] dbus_dat_i; + wire dbus_err_i; + + generate + if (BUS_IF_TYPE=="WISHBONE32") begin : bus_gen + + /* mor1kx_bus_if_wb32 AUTO_TEMPLATE ( + .cpu_err_o (ibus_err_i), + .cpu_ack_o (ibus_ack_i), + .cpu_dat_o (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), + .wbm_adr_o (iwbm_adr_o), + .wbm_stb_o (iwbm_stb_o), + .wbm_cyc_o (iwbm_cyc_o), + .wbm_sel_o (iwbm_sel_o), + .wbm_we_o (iwbm_we_o), + .wbm_cti_o (iwbm_cti_o), + .wbm_bte_o (iwbm_bte_o), + .wbm_dat_o (iwbm_dat_o), + // Inputs + .cpu_adr_i (ibus_adr_o), + .cpu_dat_i ({OPTION_OPERAND_WIDTH{1'b0}}), + .cpu_req_i (ibus_req_o), + .cpu_we_i (1'b0), + .cpu_bsel_i (4'b1111), + .cpu_burst_i (ibus_burst_o), + .wbm_err_i (iwbm_err_i), + .wbm_ack_i (iwbm_ack_i), + .wbm_dat_i (iwbm_dat_i), + .wbm_rty_i (iwbm_rty_i), + ); */ + + mor1kx_bus_if_wb32 + #(.BUS_IF_TYPE(IBUS_WB_TYPE), + .BURST_LENGTH((FEATURE_INSTRUCTIONCACHE != "NONE") ? + ((OPTION_ICACHE_BLOCK_WIDTH == 4) ? 4 : + ((OPTION_ICACHE_BLOCK_WIDTH == 5) ? 8 : 1)) + : 1 )) + ibus_bridge + (/*AUTOINST*/ + // Outputs + .cpu_err_o (ibus_err_i), // Templated + .cpu_ack_o (ibus_ack_i), // Templated + .cpu_dat_o (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), // Templated + .wbm_adr_o (iwbm_adr_o), // Templated + .wbm_stb_o (iwbm_stb_o), // Templated + .wbm_cyc_o (iwbm_cyc_o), // Templated + .wbm_sel_o (iwbm_sel_o), // Templated + .wbm_we_o (iwbm_we_o), // Templated + .wbm_cti_o (iwbm_cti_o), // Templated + .wbm_bte_o (iwbm_bte_o), // Templated + .wbm_dat_o (iwbm_dat_o), // Templated + // Inputs + .clk (clk), + .rst (rst), + .cpu_adr_i (ibus_adr_o), // Templated + .cpu_dat_i ({OPTION_OPERAND_WIDTH{1'b0}}), // Templated + .cpu_req_i (ibus_req_o), // Templated + .cpu_bsel_i (4'b1111), // Templated + .cpu_we_i (1'b0), // Templated + .cpu_burst_i (ibus_burst_o), // Templated + .wbm_err_i (iwbm_err_i), // Templated + .wbm_ack_i (iwbm_ack_i), // Templated + .wbm_dat_i (iwbm_dat_i), // Templated + .wbm_rty_i (iwbm_rty_i)); // Templated + + /* mor1kx_bus_if_wb32 AUTO_TEMPLATE ( + .cpu_err_o (dbus_err_i), + .cpu_ack_o (dbus_ack_i), + .cpu_dat_o (dbus_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .wbm_adr_o (dwbm_adr_o), + .wbm_stb_o (dwbm_stb_o), + .wbm_cyc_o (dwbm_cyc_o), + .wbm_sel_o (dwbm_sel_o), + .wbm_we_o (dwbm_we_o), + .wbm_cti_o (dwbm_cti_o), + .wbm_bte_o (dwbm_bte_o), + .wbm_dat_o (dwbm_dat_o), + // Inputs + .cpu_adr_i (dbus_adr_o[31:0]), + .cpu_dat_i (dbus_dat_o), + .cpu_req_i (dbus_req_o), + .cpu_we_i (dbus_we_o), + .cpu_bsel_i (dbus_bsel_o), + .cpu_burst_i (dbus_burst_o), + .wbm_err_i (dwbm_err_i), + .wbm_ack_i (dwbm_ack_i), + .wbm_dat_i (dwbm_dat_i), + .wbm_rty_i (dwbm_rty_i), + ); */ + + mor1kx_bus_if_wb32 + #(.BUS_IF_TYPE(DBUS_WB_TYPE), + .BURST_LENGTH((FEATURE_DATACACHE != "NONE") ? + ((OPTION_DCACHE_BLOCK_WIDTH == 4) ? 4 : + ((OPTION_DCACHE_BLOCK_WIDTH == 5) ? 8 : 1)) + : 1 )) + dbus_bridge + (/*AUTOINST*/ + // Outputs + .cpu_err_o (dbus_err_i), // Templated + .cpu_ack_o (dbus_ack_i), // Templated + .cpu_dat_o (dbus_dat_i[OPTION_OPERAND_WIDTH-1:0]), // Templated + .wbm_adr_o (dwbm_adr_o), // Templated + .wbm_stb_o (dwbm_stb_o), // Templated + .wbm_cyc_o (dwbm_cyc_o), // Templated + .wbm_sel_o (dwbm_sel_o), // Templated + .wbm_we_o (dwbm_we_o), // Templated + .wbm_cti_o (dwbm_cti_o), // Templated + .wbm_bte_o (dwbm_bte_o), // Templated + .wbm_dat_o (dwbm_dat_o), // Templated + // Inputs + .clk (clk), + .rst (rst), + .cpu_adr_i (dbus_adr_o[31:0]), // Templated + .cpu_dat_i (dbus_dat_o), // Templated + .cpu_req_i (dbus_req_o), // Templated + .cpu_bsel_i (dbus_bsel_o), // Templated + .cpu_we_i (dbus_we_o), // Templated + .cpu_burst_i (dbus_burst_o), // Templated + .wbm_err_i (dwbm_err_i), // Templated + .wbm_ack_i (dwbm_ack_i), // Templated + .wbm_dat_i (dwbm_dat_i), // Templated + .wbm_rty_i (dwbm_rty_i)); // Templated + + end else begin + initial begin + $display("Error: BUS_IF_TYPE not correct"); + $finish(); + end + end // else: !if(BUS_IF_TYPE=="WISHBONE32") + endgenerate + + /* mor1kx_cpu AUTO_TEMPLATE + ( + .spr_bus_dat_dmmu_i (), + .spr_bus_ack_dmmu_i (), + .spr_bus_dat_immu_i (), + .spr_bus_ack_immu_i (), + .spr_bus_dat_mac_i (), + .spr_bus_ack_mac_i (), + .spr_bus_dat_pmu_i (), + .spr_bus_ack_pmu_i (), + .spr_bus_dat_pcu_i (), + .spr_bus_ack_pcu_i (), + .spr_bus_dat_fpu_i (), + .spr_bus_ack_fpu_i (), + ); */ + mor1kx_cpu + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_CPU(OPTION_CPU0), + .FEATURE_DATACACHE(FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH(OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH(OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS(OPTION_DCACHE_WAYS), + .OPTION_DCACHE_LIMIT_WIDTH(OPTION_DCACHE_LIMIT_WIDTH), + .OPTION_DCACHE_SNOOP(OPTION_DCACHE_SNOOP), + .FEATURE_DMMU(FEATURE_DMMU), + .FEATURE_DMMU_HW_TLB_RELOAD(FEATURE_DMMU_HW_TLB_RELOAD), + .OPTION_DMMU_SET_WIDTH(OPTION_DMMU_SET_WIDTH), + .OPTION_DMMU_WAYS(OPTION_DMMU_WAYS), + .FEATURE_INSTRUCTIONCACHE(FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH(OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS(OPTION_ICACHE_WAYS), + .OPTION_ICACHE_LIMIT_WIDTH(OPTION_ICACHE_LIMIT_WIDTH), + .FEATURE_IMMU(FEATURE_IMMU), + .FEATURE_IMMU_HW_TLB_RELOAD(FEATURE_IMMU_HW_TLB_RELOAD), + .OPTION_IMMU_SET_WIDTH(OPTION_IMMU_SET_WIDTH), + .OPTION_IMMU_WAYS(OPTION_IMMU_WAYS), + .FEATURE_PIC(FEATURE_PIC), + .FEATURE_TIMER(FEATURE_TIMER), + .FEATURE_DEBUGUNIT(FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS(FEATURE_PERFCOUNTERS), + .OPTION_PERFCOUNTERS_NUM(OPTION_PERFCOUNTERS_NUM), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE), + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH), + .FEATURE_DSX(FEATURE_DSX), + .FEATURE_OVERFLOW(FEATURE_OVERFLOW), + .FEATURE_CARRY_FLAG(FEATURE_CARRY_FLAG), + .FEATURE_FASTCONTEXTS(FEATURE_FASTCONTEXTS), + .OPTION_RF_CLEAR_ON_INIT(OPTION_RF_CLEAR_ON_INIT), + .OPTION_RF_NUM_SHADOW_GPR(OPTION_RF_NUM_SHADOW_GPR), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RF_WORDS(OPTION_RF_WORDS), + .OPTION_RESET_PC(OPTION_RESET_PC), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_ATOMIC(FEATURE_ATOMIC), + .FEATURE_FPU(FEATURE_FPU), // mor1kx_cpu instance + .OPTION_FTOI_ROUNDING(OPTION_FTOI_ROUNDING), // mor1kx_cpu instance + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8), + .OPTION_SHIFTER(OPTION_SHIFTER), + .FEATURE_STORE_BUFFER(FEATURE_STORE_BUFFER), + .OPTION_STORE_BUFFER_DEPTH_WIDTH(OPTION_STORE_BUFFER_DEPTH_WIDTH), + .FEATURE_MULTICORE(FEATURE_MULTICORE), + .FEATURE_TRACEPORT_EXEC(FEATURE_TRACEPORT_EXEC), + .FEATURE_BRANCH_PREDICTOR(FEATURE_BRANCH_PREDICTOR) + ) + mor1kx_cpu + (/*AUTOINST*/ + // Outputs + .ibus_adr_o (ibus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .ibus_req_o (ibus_req_o), + .ibus_burst_o (ibus_burst_o), + .dbus_adr_o (dbus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_dat_o (dbus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_req_o (dbus_req_o), + .dbus_bsel_o (dbus_bsel_o[3:0]), + .dbus_we_o (dbus_we_o), + .dbus_burst_o (dbus_burst_o), + .du_dat_o (du_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .du_ack_o (du_ack_o), + .du_stall_o (du_stall_o), + .traceport_exec_valid_o (traceport_exec_valid_o), + .traceport_exec_pc_o (traceport_exec_pc_o[31:0]), + .traceport_exec_jb_o (traceport_exec_jb_o), + .traceport_exec_jal_o (traceport_exec_jal_o), + .traceport_exec_jr_o (traceport_exec_jr_o), + .traceport_exec_jbtarget_o (traceport_exec_jbtarget_o[31:0]), + .traceport_exec_insn_o (traceport_exec_insn_o[`OR1K_INSN_WIDTH-1:0]), + .traceport_exec_wbdata_o (traceport_exec_wbdata_o[OPTION_OPERAND_WIDTH-1:0]), + .traceport_exec_wbreg_o (traceport_exec_wbreg_o[OPTION_RF_ADDR_WIDTH-1:0]), + .traceport_exec_wben_o (traceport_exec_wben_o), + .spr_bus_addr_o (spr_bus_addr_o[15:0]), + .spr_bus_we_o (spr_bus_we_o), + .spr_bus_stb_o (spr_bus_stb_o), + .spr_bus_dat_o (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .spr_sr_o (spr_sr_o[15:0]), + // Inputs + .clk (clk), + .rst (rst), + .ibus_err_i (ibus_err_i), + .ibus_ack_i (ibus_ack_i), + .ibus_dat_i (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), + .dbus_err_i (dbus_err_i), + .dbus_ack_i (dbus_ack_i), + .dbus_dat_i (dbus_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .irq_i (irq_i[31:0]), + .du_addr_i (du_addr_i[15:0]), + .du_stb_i (du_stb_i), + .du_dat_i (du_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .du_we_i (du_we_i), + .du_stall_i (du_stall_i), + .spr_bus_dat_dmmu_i (), // Templated + .spr_bus_ack_dmmu_i (), // Templated + .spr_bus_dat_immu_i (), // Templated + .spr_bus_ack_immu_i (), // Templated + .spr_bus_dat_mac_i (), // Templated + .spr_bus_ack_mac_i (), // Templated + .spr_bus_dat_pmu_i (), // Templated + .spr_bus_ack_pmu_i (), // Templated + .spr_bus_dat_pcu_i (), // Templated + .spr_bus_ack_pcu_i (), // Templated + .spr_bus_dat_fpu_i (), // Templated + .spr_bus_ack_fpu_i (), // Templated + .multicore_coreid_i (multicore_coreid_i[OPTION_OPERAND_WIDTH-1:0]), + .multicore_numcores_i (multicore_numcores_i[OPTION_OPERAND_WIDTH-1:0]), + .snoop_adr_i (snoop_adr_i[31:0]), + .snoop_en_i (snoop_en_i)); + +endmodule // mor1kx diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_prediction.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_prediction.v new file mode 100644 index 0000000..01f1de1 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_prediction.v @@ -0,0 +1,113 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Branch prediction module + Generates a predicted flag output and compares that to the real flag + when it comes back in the following pipeline stage. + Signals are deliberately not named after the pipeline stage they belong to, + in order to keep this module generic. + + Copyright (C) 2013 Stefan Kristiansson + Copyright (C) 2016 Alexey Baturo + + ******************************************************************************/ + +`include "mor1kx-defines.v" + +module mor1kx_branch_prediction + #( + parameter [95:0] FEATURE_BRANCH_PREDICTOR = "SIMPLE", + parameter OPTION_OPERAND_WIDTH = 32 + ) + ( + input clk, + input rst, + + // Signals belonging to the stage where the branch is predicted. + input op_bf_i, // from decode stage, brn is bf + input op_bnf_i, // from decode stage, brn is bnf + input [9:0] immjbr_upper_i, // from decode stage, imm + input [OPTION_OPERAND_WIDTH - 1:0] brn_pc_i, // pc of brn being predicted + output predicted_flag_o, // to decode-execute stage, flag we predict to be + + // Signals belonging to the stage where the branch is resolved. + input prev_op_brcond_i, // from decode-execute stage, prev brn was cond + input prev_predicted_flag_i, // from decode-execute, prev predicted flag + input flag_i, // from execute-ctrl stage, real flag we got + + input padv_decode_i, // is decode stage stalled + input execute_bf_i, // prev insn was bf + input execute_bnf_i, // prev insn was bnf + + // Branch misprediction indicator + output branch_mispredict_o // to decode-execute stage, was brn mispredicted or not + ); + + // Compare the real flag with the previously predicted flag and signal a + // misprediction in case of a mismatch. + assign branch_mispredict_o = prev_op_brcond_i & (flag_i != prev_predicted_flag_i); + +generate +if (FEATURE_BRANCH_PREDICTOR=="SAT_COUNTER") begin : branch_predictor_saturation_counter + mor1kx_branch_predictor_saturation_counter + mor1kx_branch_predictor_saturation_counter + ( + // Outputs + .predicted_flag_o (predicted_flag_o), + // Inputs + .clk (clk), + .rst (rst), + .flag_i (flag_i), + .execute_op_bf_i (execute_bf_i), + .execute_op_bnf_i (execute_bnf_i), + .op_bf_i (op_bf_i), + .op_bnf_i (op_bnf_i), + .prev_op_brcond_i (prev_op_brcond_i), + .padv_decode_i (padv_decode_i), + .branch_mispredict_i (branch_mispredict_o)); + +end else if (FEATURE_BRANCH_PREDICTOR=="GSHARE") begin : branch_predictor_gshare + mor1kx_branch_predictor_gshare + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH) + ) + mor1kx_branch_predictor_gshare + ( + // Outputs + .predicted_flag_o (predicted_flag_o), + // Inputs + .clk (clk), + .rst (rst), + .flag_i (flag_i), + .execute_op_bf_i (execute_bf_i), + .execute_op_bnf_i (execute_bnf_i), + .op_bf_i (op_bf_i), + .brn_pc_i (brn_pc_i), + .op_bnf_i (op_bnf_i), + .prev_op_brcond_i (prev_op_brcond_i), + .padv_decode_i (padv_decode_i), + .branch_mispredict_i (branch_mispredict_o)); + +end else if (FEATURE_BRANCH_PREDICTOR=="SIMPLE") begin : branch_predictor_simple + mor1kx_branch_predictor_simple + mor1kx_branch_predictor_simple + ( + // Outputs + .predicted_flag_o (predicted_flag_o), + // Inputs + .op_bf_i (op_bf_i), + .op_bnf_i (op_bnf_i), + .immjbr_upper_i (immjbr_upper_i)); + +end else begin + initial begin + $display("Error: FEATURE_PREDICTOR_TYPE, %s, not valid", FEATURE_BRANCH_PREDICTOR); + $finish(); + end +end +endgenerate + +endmodule diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_predictor_gshare.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_predictor_gshare.v new file mode 100644 index 0000000..8d3e17d --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_predictor_gshare.v @@ -0,0 +1,116 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: gshare branch predictor + This predictor is based on array of FSMs with 4 states: strongly not taken, + weakly not taken, weakly taken, strongly taken. Check saturation predictor. + Index to the array of FSMs is built using xor of global history and lower bits of PC. + + Copyright (C) 2016 Alexey Baturo + + ******************************************************************************/ + +`include "mor1kx-defines.v" + +module mor1kx_branch_predictor_gshare + #( + parameter GSHARE_BITS_NUM = 10, + parameter OPTION_OPERAND_WIDTH = 32 + ) + ( + input clk, + input rst, + + // Signals belonging to the stage where the branch is predicted. + output predicted_flag_o, //result of predictor + + input execute_op_bf_i, // prev insn was bf + input execute_op_bnf_i, // prev insn was bnf + input op_bf_i, // cur insn is bf + input op_bnf_i, // cur insn is bnf + input padv_decode_i, // pipeline is moved + input flag_i, // prev predicted flag + + // Signals belonging to the stage where the branch is resolved. + input prev_op_brcond_i, // prev op was cond brn + input branch_mispredict_i, // prev brn was mispredicted + + input [OPTION_OPERAND_WIDTH-1:0] brn_pc_i + ); + + localparam [1:0] + STATE_STRONGLY_NOT_TAKEN = 2'b00, + STATE_WEAKLY_NOT_TAKEN = 2'b01, + STATE_WEAKLY_TAKEN = 2'b10, + STATE_STRONGLY_TAKEN = 2'b11; + localparam FSM_NUM = 2 ** GSHARE_BITS_NUM; + + integer i = 0; + + reg [1:0] state [0:FSM_NUM]; + reg [GSHARE_BITS_NUM:0] brn_hist_reg = 0; + + reg [GSHARE_BITS_NUM - 1:0] prev_idx = 0; + + // +2 bits for alignement + wire [GSHARE_BITS_NUM - 1:0] state_index = brn_hist_reg[GSHARE_BITS_NUM - 1:0] ^ brn_pc_i[GSHARE_BITS_NUM + 1:2]; + + assign predicted_flag_o = (state[state_index][1] && op_bf_i) || + (!state[state_index][1] && op_bnf_i); + wire brn_taken = (execute_op_bf_i && flag_i) || (execute_op_bnf_i && !flag_i); + + always @(posedge clk) begin + if (rst) begin + brn_hist_reg <= 0; + prev_idx <= 0; + for(i = 0; i < FSM_NUM; i = i + 1) begin + state[i] <= STATE_WEAKLY_TAKEN; + end + end else begin + if (op_bf_i || op_bnf_i) begin + // store prev index + prev_idx <= state_index; + end + + if (prev_op_brcond_i && padv_decode_i) begin + brn_hist_reg <= {brn_hist_reg[GSHARE_BITS_NUM - 1 : 0], brn_taken}; + if (!brn_taken) begin + // change fsm state: + // STATE_STRONGLY_TAKEN -> STATE_WEAKLY_TAKEN + // STATE_WEAKLY_TAKEN -> STATE_WEAKLY_NOT_TAKEN + // STATE_WEAKLY_NOT_TAKEN -> STATE_STRONGLY_NOT_TAKEN + // STATE_STRONGLY_NOT_TAKEN -> STATE_STRONGLY_NOT_TAKEN + case (state[prev_idx]) + STATE_STRONGLY_TAKEN: + state[prev_idx] <= STATE_WEAKLY_TAKEN; + STATE_WEAKLY_TAKEN: + state[prev_idx] <= STATE_WEAKLY_NOT_TAKEN; + STATE_WEAKLY_NOT_TAKEN: + state[prev_idx] <= STATE_STRONGLY_NOT_TAKEN; + STATE_STRONGLY_NOT_TAKEN: + state[prev_idx] <= STATE_STRONGLY_NOT_TAKEN; + endcase + end else begin + // change fsm state: + // STATE_STRONGLY_NOT_TAKEN -> STATE_WEAKLY_NOT_TAKEN + // STATE_WEAKLY_NOT_TAKEN -> STATE_WEAKLY_TAKEN + // STATE_WEAKLY_TAKEN -> STATE_STRONGLY_TAKEN + // STATE_STRONGLY_TAKEN -> STATE_STRONGLY_TAKEN + case (state[prev_idx]) + STATE_STRONGLY_NOT_TAKEN: + state[prev_idx] <= STATE_WEAKLY_NOT_TAKEN; + STATE_WEAKLY_NOT_TAKEN: + state[prev_idx] <= STATE_WEAKLY_TAKEN; + STATE_WEAKLY_TAKEN: + state[prev_idx] <= STATE_STRONGLY_TAKEN; + STATE_STRONGLY_TAKEN: + state[prev_idx] <= STATE_STRONGLY_TAKEN; + endcase + end + end + end + end +endmodule diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_predictor_saturation_counter.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_predictor_saturation_counter.v new file mode 100644 index 0000000..0ee9b27 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_predictor_saturation_counter.v @@ -0,0 +1,97 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Saturating counter branch predictor + This is FSM with 4 states: strongly not taken, weakly not taken, + weakly taken, strongly taken. + Fsm changes it state upon real(not predicted) flag. + If flag was "true" and instruction was bf or flag was "false" and + instruction was bnf fsm changes its state towards "taken". And vice versa + otherwise. + We predict flag on current fsm state and current branch type. + If we are in any "taken" state and current instruction is bf, + we predict flag to be "true". Or we're in any "not taken" state and + current instruction is bnf, we predict flag to be "true". + + Copyright (C) 2016 Alexey Baturo + + ******************************************************************************/ + +`include "mor1kx-defines.v" + +module mor1kx_branch_predictor_saturation_counter + ( + input clk, + input rst, + + // Signals belonging to the stage where the branch is predicted. + output predicted_flag_o, //result of predictor + + input execute_op_bf_i, // prev insn was bf + input execute_op_bnf_i, // prev insn was bnf + input op_bf_i, // cur insn is bf + input op_bnf_i, // cur insn is bnf + input padv_decode_i, // pipeline is moved + input flag_i, // prev predicted flag + + // Signals belonging to the stage where the branch is resolved. + input prev_op_brcond_i, // prev op was cond brn + input branch_mispredict_i // prev brn was mispredicted + ); + + localparam [1:0] + STATE_STRONGLY_NOT_TAKEN = 2'b00, + STATE_WEAKLY_NOT_TAKEN = 2'b01, + STATE_WEAKLY_TAKEN = 2'b10, + STATE_STRONGLY_TAKEN = 2'b11; + + reg [1:0] state = STATE_WEAKLY_TAKEN; + + assign predicted_flag_o = (state[1] && op_bf_i) || (!state[1] && op_bnf_i); + wire brn_taken = (execute_op_bf_i && flag_i) || (execute_op_bnf_i && !flag_i); + + always @(posedge clk) begin + if (rst) begin + state <= STATE_WEAKLY_TAKEN; + end else begin + if (prev_op_brcond_i && padv_decode_i) begin + if (!brn_taken) begin + // change fsm state: + // STATE_STRONGLY_TAKEN -> STATE_WEAKLY_TAKEN + // STATE_WEAKLY_TAKEN -> STATE_WEAKLY_NOT_TAKEN + // STATE_WEAKLY_NOT_TAKEN -> STATE_STRONGLY_NOT_TAKEN + // STATE_STRONGLY_NOT_TAKEN -> STATE_STRONGLY_NOT_TAKEN + case (state) + STATE_STRONGLY_TAKEN: + state <= STATE_WEAKLY_TAKEN; + STATE_WEAKLY_TAKEN: + state <= STATE_WEAKLY_NOT_TAKEN; + STATE_WEAKLY_NOT_TAKEN: + state <= STATE_STRONGLY_NOT_TAKEN; + STATE_STRONGLY_NOT_TAKEN: + state <= STATE_STRONGLY_NOT_TAKEN; + endcase + end else begin + // change fsm state: + // STATE_STRONGLY_NOT_TAKEN -> STATE_WEAKLY_NOT_TAKEN + // STATE_WEAKLY_NOT_TAKEN -> STATE_WEAKLY_TAKEN + // STATE_WEAKLY_TAKEN -> STATE_STRONGLY_TAKEN + // STATE_STRONGLY_TAKEN -> STATE_STRONGLY_TAKEN + case (state) + STATE_STRONGLY_NOT_TAKEN: + state <= STATE_WEAKLY_NOT_TAKEN; + STATE_WEAKLY_NOT_TAKEN: + state <= STATE_WEAKLY_TAKEN; + STATE_WEAKLY_TAKEN: + state <= STATE_STRONGLY_TAKEN; + STATE_STRONGLY_TAKEN: + state <= STATE_STRONGLY_TAKEN; + endcase + end + end + end + end +endmodule diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_predictor_simple.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_predictor_simple.v new file mode 100644 index 0000000..5708820 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_branch_predictor_simple.v @@ -0,0 +1,31 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Simple branch predictor implementation + We assume flag to be "true" if instruction is bf and it jumps backwords + or if instruction is bnf and it jumps forward. + + Copyright (C) 2013 Stefan Kristiansson + + ******************************************************************************/ + +`include "mor1kx-defines.v" + +module mor1kx_branch_predictor_simple + ( + // Signals belonging to the stage where the branch is predicted. + input op_bf_i, // branch if flag + input op_bnf_i, // branch if not flag + input [9:0] immjbr_upper_i, // branch offset + output predicted_flag_o //result of predictor + ); + + // Static branch prediction - backward branches are predicted as taken, + // forward branches as not taken. + assign predicted_flag_o = op_bf_i & immjbr_upper_i[9] | + op_bnf_i & !immjbr_upper_i[9]; + +endmodule diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_bus_if_wb32.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_bus_if_wb32.v new file mode 100644 index 0000000..f1d43d7 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_bus_if_wb32.v @@ -0,0 +1,183 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx processor Wishbone bus bridge + + For now, very simple, not registering, assumes 32-bit data, addressing + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_bus_if_wb32 + #( + parameter BUS_IF_TYPE = "CLASSIC", + parameter BURST_LENGTH = 8 + ) + ( + input clk, + input rst, + + output cpu_err_o, + output cpu_ack_o, + output [31:0] cpu_dat_o, + input [31:0] cpu_adr_i, + input [31:0] cpu_dat_i, + input cpu_req_i, + input [3:0] cpu_bsel_i, + input cpu_we_i, + input cpu_burst_i, + + output [31:0] wbm_adr_o, + output wbm_stb_o, + output wbm_cyc_o, + output [3:0] wbm_sel_o, + output wbm_we_o, + output [2:0] wbm_cti_o, + output [1:0] wbm_bte_o, + output [31:0] wbm_dat_o, + input wbm_err_i, + input wbm_ack_i, + input [31:0] wbm_dat_i, + input wbm_rty_i + ); + + localparam BADDR_WITH = (BURST_LENGTH==4) ? 2 : + (BURST_LENGTH==8) ? 3 : + (BURST_LENGTH==16)? 4 : 30; + + generate + /* verilator lint_off WIDTH */ + if (BUS_IF_TYPE=="B3_READ_BURSTING") begin : b3_read_bursting + /* verilator lint_on WIDTH */ + + // Burst until the incoming address is not what it should be + wire finish_burst; + reg finish_burst_r; + reg bursting; + reg [31:2] burst_address; + reg [BADDR_WITH-1:0] burst_wrap_start; + wire [BADDR_WITH-1:0] burst_wrap_finish; + wire address_differs; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + bursting <= 0; + else if (wbm_err_i) + bursting <= 0; + else if (bursting & finish_burst & wbm_ack_i) + bursting <= 0; + else if (cpu_req_i & !bursting & !cpu_we_i) + bursting <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + begin + burst_address <= 0; + burst_wrap_start <= 0; + end + else if (cpu_req_i & !bursting) + begin + burst_address <= cpu_adr_i[31:2]; + burst_wrap_start <= cpu_adr_i[BADDR_WITH+2-1:2]; + end + else if (wbm_ack_i) + burst_address[BADDR_WITH+2-1:2] <= burst_address[BADDR_WITH+2-1:2] + + 1; + + + assign address_differs = (burst_address!=cpu_adr_i[31:2]); + assign burst_wrap_finish = burst_wrap_start - 1; + assign finish_burst = (bursting & ( + (BURST_LENGTH!=0 && + burst_address[BADDR_WITH+2-1:2]==(burst_wrap_finish)) + | address_differs + | !cpu_req_i + ) + ) + ; + always @(posedge clk `OR_ASYNC_RST) + if (rst) + finish_burst_r <= 0; + else if (wbm_ack_i) + finish_burst_r <= finish_burst; + else + finish_burst_r <= 0; + + assign wbm_adr_o = bursting ? {burst_address,2'b00} : cpu_adr_i; + assign wbm_stb_o = bursting & !finish_burst_r; + assign wbm_cyc_o = bursting & !finish_burst_r; + assign wbm_sel_o = cpu_bsel_i; + assign wbm_we_o = cpu_we_i; + assign wbm_cti_o = bursting ? (finish_burst ? 3'b111 : 3'b010) : + 3'b000; + assign wbm_bte_o = BURST_LENGTH==4 ? 2'b01 : + BURST_LENGTH==8 ? 2'b10 : + BURST_LENGTH==16 ? 2'b11 : + 2'b00; // Linear burst + + assign wbm_dat_o = cpu_dat_i; + + assign cpu_err_o = wbm_err_i; + assign cpu_ack_o = (wbm_ack_i) & + !(bursting & address_differs) & cpu_req_i; + assign cpu_dat_o = wbm_err_i ? 0 : wbm_dat_i; + + /* verilator lint_off WIDTH */ + end else if (BUS_IF_TYPE=="B3_REGISTERED_FEEDBACK") begin : b3_registered_feedback + /* verilator lint_on WIDTH */ + + assign wbm_adr_o = cpu_adr_i; + assign wbm_stb_o = cpu_req_i; + assign wbm_cyc_o = cpu_req_i; + assign wbm_sel_o = cpu_bsel_i; + assign wbm_we_o = cpu_we_i; + assign wbm_cti_o = cpu_burst_i ? 3'b010 : 3'b111; + assign wbm_bte_o = BURST_LENGTH==4 ? 2'b01 : + BURST_LENGTH==8 ? 2'b10 : + BURST_LENGTH==16 ? 2'b11 : + 2'b00; // Linear burst + + assign wbm_dat_o = cpu_dat_i; + assign cpu_err_o = wbm_err_i; + assign cpu_ack_o = wbm_ack_i; + assign cpu_dat_o = wbm_dat_i; + + end else begin : classic // CLASSIC only + + // Only classic, single cycle accesses + + // A register to force de-assertion of access request signals after + // each ack + reg cycle_end; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + cycle_end <= 1; + else + cycle_end <= wbm_ack_i | wbm_err_i; + + assign cpu_err_o = wbm_err_i; + assign cpu_ack_o = wbm_ack_i; + assign cpu_dat_o = wbm_dat_i; + + assign wbm_adr_o = cpu_adr_i; + assign wbm_stb_o = cpu_req_i & !cycle_end; + assign wbm_cyc_o = cpu_req_i; + assign wbm_sel_o = cpu_bsel_i; + assign wbm_we_o = cpu_we_i; + assign wbm_cti_o = 0; + assign wbm_bte_o = 0; + assign wbm_dat_o = cpu_dat_i; + + end // else: !if(BUS_IF_TYPE=="READ_B3_BURSTING") + endgenerate + +endmodule // mor1kx_bus_if_wb diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cache_lru.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cache_lru.v new file mode 100644 index 0000000..88ea116 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cache_lru.v @@ -0,0 +1,281 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Data cache LRU implementation + + Copyright (C) 2012 Stefan Wallentowitz + + ******************************************************************************/ + +// This is the least-recently-used (LRU) calculation module. It +// essentially has two types of input and output. First, the history +// information needs to be evaluated to calculate the LRU value. +// Second, the current access and the LRU are one hot values of the +// ways. +// +// This module is pure combinational. All registering is done outside +// this module. The following parameter exists: +// +// * NUMWAYS: Number of ways (must be greater than 1) +// +// The following ports exist: +// +// * current: The current LRU history +// * update: The new LRU history after access +// +// * access: 0 if no access or one-hot of the way that accesses +// * lru_pre: LRU before the access (one hot of ways) +// * lru_post: LRU after the access (one hot of ways) +// +// The latter three have the width of NUMWAYS apparently. The first +// three are more complicated as this is an optimized way of storing +// the history information, which will be shortly described in the +// following. +// +// A naive approach to store the history of the access is to store the +// relative "age" of each element in a vector, for example for four +// ways: +// +// 0: 1 1: 3 2: 1 3:0 +// +// This needs 4x2 bits, but more important it also needs a set of +// comparators and adders. This can become increasingly complex when +// using a higher number of cache ways with an impact on area and +// timing. +// +// Similarly, it is possible to store a "stack" of the access and +// reorder this stack on an access. But the problems are similar, it +// needs comparators etc. +// +// A neat approach is to store the history efficiently coded, while +// also easing the calculation. This approach stores the information +// whether each entry is older than the others. For example for the +// four-way example (x>1)-1:0])), +// .update (updated_history[((NUMWAYS*(NUMWAYS-1))>>1)-1:0])), +// .access (access[NUMWAYS-1:0]), +// .lru_pre (lru_pre[NUMWAYS-1:0]), +// .lru_post (lru_post[NUMWAYS-1:0])); + + +module mor1kx_cache_lru(/*AUTOARG*/ + // Outputs + update, lru_pre, lru_post, + // Inputs + current, access + ); + parameter NUMWAYS = 2; + + // Triangular number + localparam WIDTH = NUMWAYS*(NUMWAYS-1) >> 1; + + input [WIDTH-1:0] current; + output reg [WIDTH-1:0] update; + + input [NUMWAYS-1:0] access; + output reg [NUMWAYS-1:0] lru_pre; + output reg [NUMWAYS-1:0] lru_post; + + reg [NUMWAYS-1:0] expand [0:NUMWAYS-1]; + + integer i, j; + integer offset; + + // + // < 0 1 2 3 + // 0 1 (0<1) (0<2) (0<3) + // 1 (1<0) 1 (1<2) (1<3) + // 2 (2<0) (2<1) 1 (2<3) + // 3 (3<0) (3<1) (3<2) 1 + // + // As two entries can never be equally old (needs to be avoided on + // the outside) this is equivalent to: + // + // < 0 1 2 3 + // 0 1 (0<1) (0<2) (0<3) + // 1 !(0<1) 1 (1<2) (1<3) + // 2 !(0<2) !(1<2) 1 (2<3) + // 3 !(0<3) !(1<3) !(2<3) 1 + // + // The lower half below the diagonal is the inverted mirror of the + // upper half. The number of entries in each half is of course + // equal to the width of our LRU vector and the upper half is + // filled with the values from the vector. + // + // The algorithm works as follows: + // + // 1. Fill the matrix (expand) with the values. The entry (i,i) is + // statically one. + // + // 2. The LRU_pre vector is the vector of the ANDs of the each row. + // + // 3. Update the values with the access vector (if any) in the + // following way: If access[i] is set, the values in row i are + // set to 0. Similarly, the values in column i are set to 1. + // + // 4. The update vector of the lru history is then generated by + // copying the upper half of the matrix back. + // + // 5. The LRU_post vector is the vector of the ANDs of each row. + // + // In the following an example will be used to demonstrate the algorithm: + // + // NUMWAYS = 4 + // current = 6'b110100; + // access = 4'b0010; + // + // This current history is: + // + // 0<1 0<2 0<3 1<2 1<3 2<3 + // 0 0 1 0 1 1 + // + // and way 2 is accessed. + // + // The history of accesses is 3>0>1>2 and the expected result is an + // update to 2>3>0>1 with LRU_pre=2 and LRU_post=1 + + + always @(*) begin : comb + // The offset is used to transfer the flat history vector into + // the upper half of the + offset = 0; + + // 1. Fill the matrix (expand) with the values. The entry (i,i) is + // statically one. + for (i = 0; i < NUMWAYS; i = i + 1) begin + expand[i][i] = 1'b1; + + for (j = i + 1; j < NUMWAYS; j = j + 1) begin + expand[i][j] = current[offset+j-i-1]; + end + for (j = 0; j < i; j = j + 1) begin + expand[i][j] = !expand[j][i]; + end + + offset = offset + NUMWAYS - i - 1; + end // for (i = 0; i < NUMWAYS; i = i + 1) + + // For the example expand is now: + // < 0 1 2 3 0 1 2 3 + // 0 1 (0<1) (0<2) (0<3) 0 1 0 0 1 + // 1 (1<0) 1 (1<2) (1<3) => 1 1 1 0 1 + // 2 (2<0) (2<1) 1 (2<3) 2 1 1 1 1 + // 3 (3<0) (3<1) (3<2) 1 3 0 0 0 1 + + + // 2. The LRU_pre vector is the vector of the ANDs of the each + // row. + for (i = 0; i < NUMWAYS; i = i + 1) begin + lru_pre[i] = &expand[i]; + end + + // We derive why this is the case for the example here: + // lru_pre[2] is high when the following condition holds: + // + // (2<0) & (2<1) & (2<3). + // + // Applying the negation transform we get: + // + // !(0<2) & !(1<2) & (2<3) + // + // and this is exactly row [2], so that here + // + // lru_pre[2] = &expand[2] = 1'b1; + // + // At this point you can also see why we initialize the diagonal + // with 1. + + // 3. Update the values with the access vector (if any) in the + // following way: If access[i] is set, the values in row i + // are set to 0. Similarly, the values in column i are set + // to 1. + for (i = 0; i < NUMWAYS; i = i + 1) begin + if (access[i]) begin + for (j = 0; j < NUMWAYS; j = j + 1) begin + if (i != j) begin + expand[i][j] = 1'b0; + end + end + for (j = 0; j < NUMWAYS; j = j + 1) begin + if (i != j) begin + expand[j][i] = 1'b1; + end + end + end + end // for (i = 0; i < NUMWAYS; i = i + 1) + + // Again this becomes obvious when you see what we do here. + // Accessing way 2 leads means now + // + // (0<2) = (1<2) = (3<2) = 1, and + // (2<0) = (2<1) = (2<3) = 0 + // + // The matrix changes accordingly + // + // 0 1 2 3 0 1 2 3 + // 0 1 0 0 1 0 1 0 1 1 + // 1 1 1 0 1 => 1 1 1 1 1 + // 2 1 1 1 1 2 0 0 1 0 + // 3 0 0 0 1 3 0 0 1 1 + + // 4. The update vector of the lru history is then generated by + // copying the upper half of the matrix back. + offset = 0; + for (i = 0; i < NUMWAYS; i = i + 1) begin + for (j = i + 1; j < NUMWAYS; j = j + 1) begin + update[offset+j-i-1] = expand[i][j]; + end + offset = offset + NUMWAYS - i - 1; + end + + // This is the opposite operation of step 1 and is clear now. + // Update becomes: + // + // update = 6'b011110 + // + // This is translated to + // + // 0<1 0<2 0<3 1<2 1<3 2<3 + // 0 1 1 1 1 0 + // + // which is: 2>3>0>1, which is what we expected. + + // 5. The LRU_post vector is the vector of the ANDs of each row. + for (i = 0; i < NUMWAYS; i = i + 1) begin + lru_post[i] = &expand[i]; + end + + // This final step is equal to step 2 and also clear now. + // + // lru_post[1] = &expand[1] = 1'b1; + // + // lru_post = 4'b0010 is what we expected. + end + + +endmodule // mor1kx_dcache_lru diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cfgrs.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cfgrs.v new file mode 100644 index 0000000..52da1ac --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cfgrs.v @@ -0,0 +1,240 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx SPRs indicating configuration and version + + All registers are read only and configured at synthesis time. + + Note that the outputs do not have the usual "_o" prefix on the port names + as this module is intended to be instantiated without a Verilog-mode + AUTO_TEMPLATE, and as the module is providing read-only signals, there's + no confusion about the direction of the ports. + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_cfgrs + #( + parameter FEATURE_SYSCALL = "ENABLED", + parameter FEATURE_TRAP = "ENABLED", + parameter FEATURE_RANGE = "ENABLED", + + parameter FEATURE_DATACACHE = "NONE", + parameter OPTION_DCACHE_BLOCK_WIDTH = 5, + parameter OPTION_DCACHE_SET_WIDTH = 9, + parameter OPTION_DCACHE_WAYS = 2, + parameter FEATURE_DMMU = "NONE", + parameter [2:0] OPTION_DMMU_SET_WIDTH = 6, + parameter OPTION_DMMU_WAYS = 1, + parameter FEATURE_INSTRUCTIONCACHE = "NONE", + parameter OPTION_ICACHE_BLOCK_WIDTH = 5, + parameter OPTION_ICACHE_SET_WIDTH = 9, + parameter OPTION_ICACHE_WAYS = 2, + parameter FEATURE_IMMU = "NONE", + parameter OPTION_IMMU_SET_WIDTH = 6, + parameter OPTION_IMMU_WAYS = 1, + parameter FEATURE_PIC = "ENABLED", + parameter FEATURE_TIMER = "ENABLED", + parameter FEATURE_DEBUGUNIT = "NONE", + parameter FEATURE_PERFCOUNTERS = "NONE", + parameter OPTION_PERFCOUNTERS_NUM = 0, + parameter FEATURE_PMU = "NONE", + parameter FEATURE_MAC = "NONE", + parameter FEATURE_FPU = "NONE", + + parameter OPTION_PIC_TRIGGER = "LEVEL", + + parameter FEATURE_DSX = "NONE", + parameter FEATURE_FASTCONTEXTS = "NONE", + parameter OPTION_RF_NUM_SHADOW_GPR = 0, + parameter FEATURE_OVERFLOW = "NONE", + + parameter FEATURE_DELAYSLOT = "NONE", + + parameter FEATURE_EVBAR = "NONE", + parameter FEATURE_AECSR = "NONE" + ) + ( + output [31:0] spr_vr, + output [31:0] spr_vr2, + output [31:0] spr_upr, + output [31:0] spr_cpucfgr, + output [31:0] spr_dmmucfgr, + output [31:0] spr_immucfgr, + output [31:0] spr_dccfgr, + output [31:0] spr_iccfgr, + output [31:0] spr_dcfgr, + output [31:0] spr_pccfgr, + output [31:0] spr_avr + ); + + assign spr_vr[`OR1K_SPR_VR_REV] = 0; + assign spr_vr[`OR1K_SPR_VR_UVRP] = 1; + assign spr_vr[`OR1K_SPR_VR_RESERVED] = 0; + assign spr_vr[`OR1K_SPR_VR_CFG] = 0; + assign spr_vr[`OR1K_SPR_VR_VER] = 8'h10; + + assign spr_upr[`OR1K_SPR_UPR_UP] = 1; + assign spr_upr[`OR1K_SPR_UPR_DCP] = (FEATURE_DATACACHE!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_ICP] = (FEATURE_INSTRUCTIONCACHE!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_DMP] = (FEATURE_DMMU!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_IMP] = (FEATURE_IMMU!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_MP] = (FEATURE_MAC!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_DUP] = (FEATURE_DEBUGUNIT!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_PCUP] = (FEATURE_PERFCOUNTERS!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_PICP] = (FEATURE_PIC!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_PMP] = (FEATURE_PMU!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_TTP] = (FEATURE_TIMER!="NONE"); + assign spr_upr[`OR1K_SPR_UPR_RESERVED] = 0; + assign spr_upr[`OR1K_SPR_UPR_CUP] = 0; + + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_NSGF] = OPTION_RF_NUM_SHADOW_GPR; + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_CFG] = 0; + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_OB32S] = 1; + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_OB64S] = 0; + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_OF32S] = (FEATURE_FPU!="NONE"); + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_OF64S] = 0; + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_OV64S] = 0; + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_ND] = (FEATURE_DELAYSLOT=="NONE"); + /* AVR will always be present in mor1kx */ + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_AVRP] = 1; + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_EVBARP] = (FEATURE_EVBAR!="NONE"); + /* ISRs will always be present */ + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_ISRP] = 1; + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_AECSRP] = (FEATURE_AECSR!="NONE"); + assign spr_cpucfgr[`OR1K_SPR_CPUCFGR_RESERVED] = 0; + + /* Version register 2 */ + /* Implementation ID as per: + http://opencores.org/or1k/OR1K_CPU_Cores#CPU_ID_Table + mor1kx breaks up the VR2[23:0] to be 3 8-bit fields + 23:16 - Major version number + 15:8 - Minor version number + 7:0 - Pipeline implementation identifier (set outside of this module) + */ + assign spr_vr2[`OR1K_SPR_VR2_CPUID] = `MOR1KX_CPUID; + assign spr_vr2[`OR1K_SPR_VR2_VER] = {`MOR1KX_VERSION_MAJOR, + `MOR1KX_VERSION_MINOR, + 8'd0}; + + /* Currently supporting OR1K version 1.1 rev0 */ + assign spr_avr[`OR1K_SPR_AVR_MAJ] = 8'd1; + assign spr_avr[`OR1K_SPR_AVR_MIN] = 8'd1; + assign spr_avr[`OR1K_SPR_AVR_REV] = 8'd0; + assign spr_avr[`OR1K_SPR_AVR_RESERVED] = 0; + + /* Data MMU Configuration Register */ + /* Reserved */ + assign spr_dmmucfgr[31:12] = 0; + /* Hardware TLB Reload */ + assign spr_dmmucfgr[`OR1K_SPR_DMMUFGR_HTR] = 0; + /* TLB Entry Invalidate Register Implemented */ + assign spr_dmmucfgr[`OR1K_SPR_DMMUFGR_TEIRI] = 0; + /* Protection Register Implemented */ + assign spr_dmmucfgr[`OR1K_SPR_DMMUFGR_PRI] = 0; + /* Control Register Implemented */ + assign spr_dmmucfgr[`OR1K_SPR_DMMUFGR_CRI] = 0; + /* Number of ATB entries */ + assign spr_dmmucfgr[`OR1K_SPR_DMMUFGR_NAE] = 0; + /* Number of TLB sets */ + assign spr_dmmucfgr[`OR1K_SPR_DMMUFGR_NTS] = (FEATURE_DMMU!="NONE") ? + OPTION_DMMU_SET_WIDTH : 0; + /* Number of TLB ways */ + assign spr_dmmucfgr[`OR1K_SPR_DMMUFGR_NTW] = (FEATURE_DMMU!="NONE") ? + OPTION_DMMU_WAYS-1 : 0; + + /* Instruction MMU Configuration Register */ + /* Reserved */ + assign spr_immucfgr[31:12] = 0; + /* Hardware TLB Reload */ + assign spr_immucfgr[`OR1K_SPR_IMMUFGR_HTR] = 0; + /* TLB Entry Invalidate Register Implemented */ + assign spr_immucfgr[`OR1K_SPR_IMMUFGR_TEIRI] = 0; + /* Protection Register Implemented */ + assign spr_immucfgr[`OR1K_SPR_IMMUFGR_PRI] = 0; + /* Control Register Implemented */ + assign spr_immucfgr[`OR1K_SPR_IMMUFGR_CRI] = 0; + /* Number of ATB entries */ + assign spr_immucfgr[`OR1K_SPR_IMMUFGR_NAE] = 0; + /* Number of TLB sets */ + assign spr_immucfgr[`OR1K_SPR_IMMUFGR_NTS] = (FEATURE_IMMU!="NONE") ? + OPTION_IMMU_SET_WIDTH : 0; + /* Number of TLB ways */ + assign spr_immucfgr[`OR1K_SPR_IMMUFGR_NTW] = (FEATURE_IMMU!="NONE") ? + OPTION_IMMU_WAYS-1 : 0; + + /* Data Cache Configuration register */ + /* Reserved */ + assign spr_dccfgr[31:15] = 0; + /* Cache Block Write-Back Register Implemented */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_CBWBRI] = 0; + /* Cache Block Flush Register Implemented */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_CBFRI] = (FEATURE_DATACACHE!="NONE"); + /* Cache Block Lock Register Implemented */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_CBLRI] = 0; + /* Cache Block Prefetch Register Implemented */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_CBPRI] = 0; + /* Cache Block Invalidate Register Implemented */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_CBIRI] = (FEATURE_DATACACHE!="NONE"); + /* Cache Control Register Implemented */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_CCRI] = 0; + /* Cache Write Strategy (0 = write-through, 1 = write-back) */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_CWS] = 0; + /* Cache Block Size (0 = 16 bytes, 1 = 32 bytes) */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_CBS] = (FEATURE_DATACACHE!="NONE") ? + (OPTION_DCACHE_BLOCK_WIDTH == 5 ? + 1 : 0) : 0; + /* Number of Cache Sets */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_NCS] = (FEATURE_DATACACHE!="NONE") ? + OPTION_DCACHE_SET_WIDTH : 0; + /* Number of Cache Ways */ + assign spr_dccfgr[`OR1K_SPR_DCCFGR_NCW] = (FEATURE_DATACACHE!="NONE") ? + (OPTION_DCACHE_WAYS == 1) ? 3'd0 : + (OPTION_DCACHE_WAYS == 2) ? 3'd1 : + (OPTION_DCACHE_WAYS == 4) ? 3'd2 : + (OPTION_DCACHE_WAYS == 8) ? 3'd3 : + (OPTION_DCACHE_WAYS == 16) ? 3'd4 : + (OPTION_DCACHE_WAYS == 32) ? 3'd5 : + 3'd0 : 3'd0; + + /* Instruction Cache Configuration register */ + /* Reserved */ + assign spr_iccfgr[31:13] = 0; + assign spr_iccfgr[8] = 0; + /* Cache Block Lock Register Implemented */ + assign spr_iccfgr[`OR1K_SPR_ICCFGR_CBLRI] = 0; + /* Cache Block Prefetch Register Implemented */ + assign spr_iccfgr[`OR1K_SPR_ICCFGR_CBPRI] = 0; + /* Cache Block Invalidate Register Implemented */ + assign spr_iccfgr[`OR1K_SPR_ICCFGR_CBIRI] = (FEATURE_INSTRUCTIONCACHE!="NONE"); + /* Cache Control Register Implemented */ + assign spr_iccfgr[`OR1K_SPR_ICCFGR_CCRI] = 0; + /* Cache Block Size (0 = 16 bytes, 1 = 32 bytes) */ + assign spr_iccfgr[`OR1K_SPR_ICCFGR_CBS] = (FEATURE_INSTRUCTIONCACHE!="NONE") ? + (OPTION_ICACHE_BLOCK_WIDTH == 5 ? + 1 : 0) : 0; + /* Number of Cache Sets */ + assign spr_iccfgr[`OR1K_SPR_ICCFGR_NCS] = (FEATURE_INSTRUCTIONCACHE!="NONE") ? + OPTION_ICACHE_SET_WIDTH : 0; + /* Number of Cache Ways */ + assign spr_iccfgr[`OR1K_SPR_ICCFGR_NCW] = (FEATURE_INSTRUCTIONCACHE!="NONE") ? + (OPTION_ICACHE_WAYS == 1) ? 3'd0 : + (OPTION_ICACHE_WAYS == 2) ? 3'd1 : + (OPTION_ICACHE_WAYS == 4) ? 3'd2 : + (OPTION_ICACHE_WAYS == 8) ? 3'd3 : + (OPTION_ICACHE_WAYS == 16) ? 3'd4 : + (OPTION_ICACHE_WAYS == 32) ? 3'd5 : + 3'd0 : 3'd0; + + assign spr_dcfgr = 0; + assign spr_pccfgr = (FEATURE_PERFCOUNTERS!="NONE") ? OPTION_PERFCOUNTERS_NUM : 0; + +endmodule // mor1kx_cfgrs diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu.v new file mode 100644 index 0000000..682198f --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu.v @@ -0,0 +1,692 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: CPU wrapper module + + Allows selection of CPU pipeline implementation based on parameter. + + Also provides some API-like hooks into the pipeline for monitors. + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_cpu + #( + parameter OPTION_OPERAND_WIDTH = 32, + + parameter OPTION_CPU = "CAPPUCCINO", + + parameter FEATURE_DATACACHE = "NONE", + parameter OPTION_DCACHE_BLOCK_WIDTH = 5, + parameter OPTION_DCACHE_SET_WIDTH = 9, + parameter OPTION_DCACHE_WAYS = 2, + parameter OPTION_DCACHE_LIMIT_WIDTH = 32, + parameter OPTION_DCACHE_SNOOP = "NONE", + parameter FEATURE_DMMU = "NONE", + parameter FEATURE_DMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_DMMU_SET_WIDTH = 6, + parameter OPTION_DMMU_WAYS = 1, + parameter FEATURE_INSTRUCTIONCACHE = "NONE", + parameter OPTION_ICACHE_BLOCK_WIDTH = 5, + parameter OPTION_ICACHE_SET_WIDTH = 9, + parameter OPTION_ICACHE_WAYS = 2, + parameter OPTION_ICACHE_LIMIT_WIDTH = 32, + parameter FEATURE_IMMU = "NONE", + parameter FEATURE_IMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_IMMU_SET_WIDTH = 6, + parameter OPTION_IMMU_WAYS = 1, + parameter FEATURE_TIMER = "ENABLED", + parameter FEATURE_DEBUGUNIT = "NONE", + parameter FEATURE_PERFCOUNTERS = "NONE", + parameter OPTION_PERFCOUNTERS_NUM = 0, + parameter FEATURE_MAC = "NONE", + + parameter FEATURE_SYSCALL = "ENABLED", + parameter FEATURE_TRAP = "ENABLED", + parameter FEATURE_RANGE = "ENABLED", + + parameter FEATURE_PIC = "ENABLED", + parameter OPTION_PIC_TRIGGER = "LEVEL", + parameter OPTION_PIC_NMI_WIDTH = 0, + + parameter FEATURE_DSX = "NONE", + parameter FEATURE_OVERFLOW = "NONE", + parameter FEATURE_CARRY_FLAG = "ENABLED", + + parameter FEATURE_FASTCONTEXTS = "NONE", + parameter OPTION_RF_CLEAR_ON_INIT = 0, + parameter OPTION_RF_NUM_SHADOW_GPR = 0, + parameter OPTION_RF_ADDR_WIDTH = 5, + parameter OPTION_RF_WORDS = 32, + + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + + parameter OPTION_TCM_FETCHER = "DISABLED", + + parameter FEATURE_MULTIPLIER = "THREESTAGE", + parameter FEATURE_DIVIDER = "NONE", + + parameter OPTION_SHIFTER = "BARREL", + + parameter FEATURE_ADDC = "NONE", + parameter FEATURE_SRA = "ENABLED", + parameter FEATURE_ROR = "NONE", + parameter FEATURE_EXT = "NONE", + parameter FEATURE_CMOV = "NONE", + parameter FEATURE_FFL1 = "NONE", + parameter FEATURE_MSYNC = "ENABLED", + parameter FEATURE_PSYNC = "NONE", + parameter FEATURE_CSYNC = "NONE", + parameter FEATURE_ATOMIC = "ENABLED", + + parameter FEATURE_FPU = "NONE", // ENABLED|NONE + parameter OPTION_FTOI_ROUNDING = "CPP", // "CPP" / "IEEE" + + parameter FEATURE_CUST1 = "NONE", + parameter FEATURE_CUST2 = "NONE", + parameter FEATURE_CUST3 = "NONE", + parameter FEATURE_CUST4 = "NONE", + parameter FEATURE_CUST5 = "NONE", + parameter FEATURE_CUST6 = "NONE", + parameter FEATURE_CUST7 = "NONE", + parameter FEATURE_CUST8 = "NONE", + + parameter FEATURE_STORE_BUFFER = "ENABLED", + parameter OPTION_STORE_BUFFER_DEPTH_WIDTH = 8, + + parameter FEATURE_MULTICORE = "NONE", + + parameter FEATURE_TRACEPORT_EXEC = "NONE", + parameter FEATURE_BRANCH_PREDICTOR = "SIMPLE" + ) + ( + input clk, + input rst, + + // Instruction bus + input ibus_err_i, + input ibus_ack_i, + input [`OR1K_INSN_WIDTH-1:0] ibus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] ibus_adr_o, + output ibus_req_o, + output ibus_burst_o, + + // Data bus + input dbus_err_i, + input dbus_ack_i, + input [OPTION_OPERAND_WIDTH-1:0] dbus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] dbus_adr_o, + output [OPTION_OPERAND_WIDTH-1:0] dbus_dat_o, + output dbus_req_o, + output [3:0] dbus_bsel_o, + output dbus_we_o, + output dbus_burst_o, + + // Interrupts + input [31:0] irq_i, + + // Debug interface + input [15:0] du_addr_i, + input du_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] du_dat_i, + input du_we_i, + output [OPTION_OPERAND_WIDTH-1:0] du_dat_o, + output du_ack_o, + // Stall control from debug interface + input du_stall_i, + output du_stall_o, + + output traceport_exec_valid_o, + output [31:0] traceport_exec_pc_o, + output traceport_exec_jb_o, + output traceport_exec_jal_o, + output traceport_exec_jr_o, + output [31:0] traceport_exec_jbtarget_o, + output [`OR1K_INSN_WIDTH-1:0] traceport_exec_insn_o, + output [OPTION_OPERAND_WIDTH-1:0] traceport_exec_wbdata_o, + output [OPTION_RF_ADDR_WIDTH-1:0] traceport_exec_wbreg_o, + output traceport_exec_wben_o, + + // SPR accesses to external units (cache, mmu, etc.) + output [15:0] spr_bus_addr_o, + output spr_bus_we_o, + output spr_bus_stb_o, + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dmmu_i, + input spr_bus_ack_dmmu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_immu_i, + input spr_bus_ack_immu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_mac_i, + input spr_bus_ack_mac_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pmu_i, + input spr_bus_ack_pmu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pcu_i, + input spr_bus_ack_pcu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_fpu_i, + input spr_bus_ack_fpu_i, + output [15:0] spr_sr_o, + + // The multicore core identifier + input [OPTION_OPERAND_WIDTH-1:0] multicore_coreid_i, + // The number of cores + input [OPTION_OPERAND_WIDTH-1:0] multicore_numcores_i, + + input [31:0] snoop_adr_i, + input snoop_en_i + ); + + wire [`OR1K_INSN_WIDTH-1:0] monitor_execute_insn/* verilator public */; + wire monitor_execute_advance/* verilator public */; + wire monitor_flag_set/* verilator public */; + wire monitor_flag_clear/* verilator public */; + wire monitor_flag_sr/* verilator public */; + wire monitor_flag/* verilator public */; + wire [OPTION_OPERAND_WIDTH-1:0] monitor_spr_sr/* verilator public */; + wire [OPTION_OPERAND_WIDTH-1:0] monitor_execute_pc/* verilator public */; + wire [OPTION_OPERAND_WIDTH-1:0] monitor_rf_result_in/* verilator public */; + wire monitor_clk/* verilator public */; + wire [OPTION_OPERAND_WIDTH-1:0] monitor_spr_epcr/* verilator public */; + wire [OPTION_OPERAND_WIDTH-1:0] monitor_spr_eear/* verilator public */; + wire [OPTION_OPERAND_WIDTH-1:0] monitor_spr_esr/* verilator public */; + wire monitor_branch_mispredict/* verilator public */; + + // synthesis translate_off +`ifndef SYNTHESIS + /* Provide interface hooks for register functions. */ + generate + if (OPTION_CPU=="CAPPUCCINO") begin : monitor + +`include "mor1kx_utils.vh" + localparam RF_ADDR_WIDTH = calc_rf_addr_width(OPTION_RF_ADDR_WIDTH, + OPTION_RF_NUM_SHADOW_GPR); + + function [OPTION_OPERAND_WIDTH-1:0] get_gpr; + // verilator public + input [RF_ADDR_WIDTH-1:0] gpr_num; + get_gpr = cappuccino.mor1kx_cpu.get_gpr(gpr_num); + endfunction + task set_gpr; + // verilator public + input [RF_ADDR_WIDTH-1:0] gpr_num; + input [OPTION_OPERAND_WIDTH-1:0] gpr_value; + cappuccino.mor1kx_cpu.set_gpr(gpr_num, gpr_value); + endtask + end + if (OPTION_CPU=="ESPRESSO") begin : monitor + function [OPTION_OPERAND_WIDTH-1:0] get_gpr; + // verilator public + input [15:0] gpr_num; + get_gpr = espresso.mor1kx_cpu.get_gpr(gpr_num); + endfunction + task set_gpr; + // verilator public + input [15:0] gpr_num; + input [OPTION_OPERAND_WIDTH-1:0] gpr_value; + espresso.mor1kx_cpu.set_gpr(gpr_num, gpr_value); + endtask + end + /* verilator lint_off WIDTH */ + if (OPTION_CPU=="PRONTO_ESPRESSO") begin : monitor + function [OPTION_OPERAND_WIDTH-1:0] get_gpr; + // verilator public + input [15:0] gpr_num; + get_gpr = prontoespresso.mor1kx_cpu.get_gpr(gpr_num); + endfunction + task set_gpr; + // verilator public + input [15:0] gpr_num; + input [OPTION_OPERAND_WIDTH-1:0] gpr_value; + prontoespresso.mor1kx_cpu.set_gpr(gpr_num, gpr_value); + endtask + end + endgenerate +`endif + // synthesis translate_on + + generate + /* verilator lint_off WIDTH */ + if (OPTION_CPU=="CAPPUCCINO") begin : cappuccino + /* verilator lint_on WIDTH */ + mor1kx_cpu_cappuccino + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .FEATURE_DATACACHE(FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH(OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH(OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS(OPTION_DCACHE_WAYS), + .OPTION_DCACHE_LIMIT_WIDTH(OPTION_DCACHE_LIMIT_WIDTH), + .OPTION_DCACHE_SNOOP(OPTION_DCACHE_SNOOP), + .FEATURE_DMMU(FEATURE_DMMU), + .FEATURE_DMMU_HW_TLB_RELOAD(FEATURE_DMMU_HW_TLB_RELOAD), + .OPTION_DMMU_SET_WIDTH(OPTION_DMMU_SET_WIDTH), + .OPTION_DMMU_WAYS(OPTION_DMMU_WAYS), + .FEATURE_INSTRUCTIONCACHE(FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH(OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS(OPTION_ICACHE_WAYS), + .OPTION_ICACHE_LIMIT_WIDTH(OPTION_ICACHE_LIMIT_WIDTH), + .FEATURE_IMMU(FEATURE_IMMU), + .FEATURE_IMMU_HW_TLB_RELOAD(FEATURE_IMMU_HW_TLB_RELOAD), + .OPTION_IMMU_SET_WIDTH(OPTION_IMMU_SET_WIDTH), + .OPTION_IMMU_WAYS(OPTION_IMMU_WAYS), + .FEATURE_PIC(FEATURE_PIC), + .FEATURE_TIMER(FEATURE_TIMER), + .FEATURE_DEBUGUNIT(FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS(FEATURE_PERFCOUNTERS), + .OPTION_PERFCOUNTERS_NUM(OPTION_PERFCOUNTERS_NUM), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_MULTICORE(FEATURE_MULTICORE), + .FEATURE_TRACEPORT_EXEC(FEATURE_TRACEPORT_EXEC), + .FEATURE_BRANCH_PREDICTOR(FEATURE_BRANCH_PREDICTOR), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE), + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH), + .FEATURE_DSX(FEATURE_DSX), + .FEATURE_FASTCONTEXTS(FEATURE_FASTCONTEXTS), + .OPTION_RF_CLEAR_ON_INIT(OPTION_RF_CLEAR_ON_INIT), + .OPTION_RF_NUM_SHADOW_GPR(OPTION_RF_NUM_SHADOW_GPR), + .FEATURE_OVERFLOW(FEATURE_OVERFLOW), + .FEATURE_CARRY_FLAG(FEATURE_CARRY_FLAG), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RF_WORDS(OPTION_RF_WORDS), + .OPTION_RESET_PC(OPTION_RESET_PC), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_MSYNC(FEATURE_MSYNC), + .FEATURE_PSYNC(FEATURE_PSYNC), + .FEATURE_CSYNC(FEATURE_CSYNC), + .FEATURE_ATOMIC(FEATURE_ATOMIC), + .FEATURE_FPU(FEATURE_FPU), + .OPTION_FTOI_ROUNDING(OPTION_FTOI_ROUNDING), + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8), + .OPTION_SHIFTER(OPTION_SHIFTER), + .FEATURE_STORE_BUFFER(FEATURE_STORE_BUFFER), + .OPTION_STORE_BUFFER_DEPTH_WIDTH(OPTION_STORE_BUFFER_DEPTH_WIDTH) + ) + mor1kx_cpu + (/*AUTOINST*/ + // Outputs + .ibus_adr_o (ibus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .ibus_req_o (ibus_req_o), + .ibus_burst_o (ibus_burst_o), + .dbus_adr_o (dbus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_dat_o (dbus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_req_o (dbus_req_o), + .dbus_bsel_o (dbus_bsel_o[3:0]), + .dbus_we_o (dbus_we_o), + .dbus_burst_o (dbus_burst_o), + .du_dat_o (du_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .du_ack_o (du_ack_o), + .du_stall_o (du_stall_o), + .traceport_exec_valid_o (traceport_exec_valid_o), + .traceport_exec_pc_o (traceport_exec_pc_o[31:0]), + .traceport_exec_jb_o (traceport_exec_jb_o), + .traceport_exec_jal_o (traceport_exec_jal_o), + .traceport_exec_jr_o (traceport_exec_jr_o), + .traceport_exec_jbtarget_o (traceport_exec_jbtarget_o[31:0]), + .traceport_exec_insn_o (traceport_exec_insn_o[`OR1K_INSN_WIDTH-1:0]), + .traceport_exec_wbdata_o (traceport_exec_wbdata_o[OPTION_OPERAND_WIDTH-1:0]), + .traceport_exec_wbreg_o (traceport_exec_wbreg_o[OPTION_RF_ADDR_WIDTH-1:0]), + .traceport_exec_wben_o (traceport_exec_wben_o), + .spr_bus_addr_o (spr_bus_addr_o[15:0]), + .spr_bus_we_o (spr_bus_we_o), + .spr_bus_stb_o (spr_bus_stb_o), + .spr_bus_dat_o (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .spr_sr_o (spr_sr_o[15:0]), + // Inputs + .clk (clk), + .rst (rst), + .ibus_err_i (ibus_err_i), + .ibus_ack_i (ibus_ack_i), + .ibus_dat_i (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), + .dbus_err_i (dbus_err_i), + .dbus_ack_i (dbus_ack_i), + .dbus_dat_i (dbus_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .irq_i (irq_i[31:0]), + .du_addr_i (du_addr_i[15:0]), + .du_stb_i (du_stb_i), + .du_dat_i (du_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .du_we_i (du_we_i), + .du_stall_i (du_stall_i), + .spr_bus_dat_mac_i (spr_bus_dat_mac_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_mac_i (spr_bus_ack_mac_i), + .spr_bus_dat_pmu_i (spr_bus_dat_pmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pmu_i (spr_bus_ack_pmu_i), + .spr_bus_dat_pcu_i (spr_bus_dat_pcu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pcu_i (spr_bus_ack_pcu_i), + .spr_bus_dat_fpu_i (spr_bus_dat_fpu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_fpu_i (spr_bus_ack_fpu_i), + .multicore_coreid_i (multicore_coreid_i[OPTION_OPERAND_WIDTH-1:0]), + .multicore_numcores_i (multicore_numcores_i[OPTION_OPERAND_WIDTH-1:0]), + .snoop_adr_i (snoop_adr_i[31:0]), + .snoop_en_i (snoop_en_i)); + + // synthesis translate_off +`ifndef SYNTHESIS + + assign monitor_flag = monitor_flag_set ? 1 : + monitor_flag_clear ? 0 : + monitor_flag_sr; + assign monitor_clk = clk; + + assign monitor_execute_advance = cappuccino.mor1kx_cpu.padv_execute_o; + assign monitor_flag_set = cappuccino.mor1kx_cpu.mor1kx_execute_ctrl_cappuccino.flag_set_i; + assign monitor_flag_clear = cappuccino.mor1kx_cpu.mor1kx_execute_ctrl_cappuccino.flag_clear_i; + assign monitor_flag_sr = cappuccino.mor1kx_cpu.mor1kx_ctrl_cappuccino.ctrl_flag_o; + assign monitor_spr_sr = {16'd0,cappuccino.mor1kx_cpu.mor1kx_ctrl_cappuccino.spr_sr[15:`OR1K_SPR_SR_F+1],cappuccino.mor1kx_cpu.mor1kx_ctrl_cappuccino.ctrl_flag_o,cappuccino.mor1kx_cpu.mor1kx_ctrl_cappuccino.spr_sr[`OR1K_SPR_SR_F-1:0]}; + assign monitor_execute_pc = cappuccino.mor1kx_cpu.pc_decode_to_execute; + assign monitor_rf_result_in = cappuccino.mor1kx_cpu.mor1kx_rf_cappuccino.result_i; + assign monitor_spr_esr = {16'd0,cappuccino.mor1kx_cpu.mor1kx_ctrl_cappuccino.spr_esr}; + assign monitor_spr_epcr = cappuccino.mor1kx_cpu.mor1kx_ctrl_cappuccino.spr_epcr; + assign monitor_spr_eear = cappuccino.mor1kx_cpu.mor1kx_ctrl_cappuccino.spr_eear; + assign monitor_branch_mispredict = cappuccino.mor1kx_cpu.branch_mispredict_o; + + reg [`OR1K_INSN_WIDTH-1:0] monitor_execute_insn_reg; + always @(posedge clk) + if (cappuccino.mor1kx_cpu.padv_decode_o) + monitor_execute_insn_reg <= cappuccino.mor1kx_cpu.mor1kx_decode.decode_insn_i; + + assign monitor_execute_insn = monitor_execute_insn_reg; + + +`endif + // synthesis translate_on + + + end // block: cappuccino + /* verilator lint_off WIDTH */ + if (OPTION_CPU=="ESPRESSO") begin : espresso + /* verilator lint_on WIDTH */ + mor1kx_cpu_espresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .FEATURE_DATACACHE(FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH(OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH(OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS(OPTION_DCACHE_WAYS), + .FEATURE_DMMU(FEATURE_DMMU), + .FEATURE_INSTRUCTIONCACHE(FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH(OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS(OPTION_ICACHE_WAYS), + .FEATURE_IMMU(FEATURE_IMMU), + .FEATURE_PIC(FEATURE_PIC), + .FEATURE_TIMER(FEATURE_TIMER), + .FEATURE_DEBUGUNIT(FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS(FEATURE_PERFCOUNTERS), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_MULTICORE(FEATURE_MULTICORE), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE), + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH), + .FEATURE_DSX(FEATURE_DSX), + .FEATURE_FASTCONTEXTS(FEATURE_FASTCONTEXTS), + .FEATURE_OVERFLOW(FEATURE_OVERFLOW), + .FEATURE_CARRY_FLAG(FEATURE_CARRY_FLAG), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RF_WORDS(OPTION_RF_WORDS), + .OPTION_RESET_PC(OPTION_RESET_PC), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_MSYNC(FEATURE_MSYNC), + .FEATURE_PSYNC(FEATURE_PSYNC), + .FEATURE_CSYNC(FEATURE_CSYNC), + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8), + .OPTION_SHIFTER(OPTION_SHIFTER) + ) + mor1kx_cpu + (/*AUTOINST*/ + // Outputs + .ibus_adr_o (ibus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .ibus_req_o (ibus_req_o), + .ibus_burst_o (ibus_burst_o), + .dbus_adr_o (dbus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_dat_o (dbus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_req_o (dbus_req_o), + .dbus_bsel_o (dbus_bsel_o[3:0]), + .dbus_we_o (dbus_we_o), + .dbus_burst_o (dbus_burst_o), + .du_dat_o (du_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .du_ack_o (du_ack_o), + .du_stall_o (du_stall_o), + .spr_bus_addr_o (spr_bus_addr_o[15:0]), + .spr_bus_we_o (spr_bus_we_o), + .spr_bus_stb_o (spr_bus_stb_o), + .spr_bus_dat_o (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .spr_sr_o (spr_sr_o[15:0]), + // Inputs + .clk (clk), + .rst (rst), + .ibus_err_i (ibus_err_i), + .ibus_ack_i (ibus_ack_i), + .ibus_dat_i (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), + .dbus_err_i (dbus_err_i), + .dbus_ack_i (dbus_ack_i), + .dbus_dat_i (dbus_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .irq_i (irq_i[31:0]), + .du_addr_i (du_addr_i[15:0]), + .du_stb_i (du_stb_i), + .du_dat_i (du_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .du_we_i (du_we_i), + .du_stall_i (du_stall_i), + .spr_bus_dat_dmmu_i (spr_bus_dat_dmmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_dmmu_i (spr_bus_ack_dmmu_i), + .spr_bus_dat_immu_i (spr_bus_dat_immu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_immu_i (spr_bus_ack_immu_i), + .spr_bus_dat_mac_i (spr_bus_dat_mac_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_mac_i (spr_bus_ack_mac_i), + .spr_bus_dat_pmu_i (spr_bus_dat_pmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pmu_i (spr_bus_ack_pmu_i), + .spr_bus_dat_pcu_i (spr_bus_dat_pcu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pcu_i (spr_bus_ack_pcu_i), + .spr_bus_dat_fpu_i (spr_bus_dat_fpu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_fpu_i (spr_bus_ack_fpu_i), + .multicore_coreid_i (multicore_coreid_i[OPTION_OPERAND_WIDTH-1:0])); + + // synthesis translate_off +`ifndef SYNTHESIS + assign monitor_flag = monitor_flag_set ? 1 : + monitor_flag_clear ? 0 : + monitor_flag_sr; + assign monitor_clk = clk; + assign monitor_execute_insn = espresso.mor1kx_cpu.mor1kx_fetch_espresso.decode_insn_o; + assign monitor_execute_advance = espresso.mor1kx_cpu.mor1kx_ctrl_espresso.execute_done; + assign monitor_flag_set = espresso.mor1kx_cpu.mor1kx_ctrl_espresso.ctrl_flag_set_i; + assign monitor_flag_clear = espresso.mor1kx_cpu.mor1kx_ctrl_espresso.ctrl_flag_clear_i; + assign monitor_flag_sr = espresso.mor1kx_cpu.mor1kx_ctrl_espresso.flag; + assign monitor_spr_sr = {16'd0,espresso.mor1kx_cpu.mor1kx_ctrl_espresso.spr_sr[15:`OR1K_SPR_SR_F+1], + // Use the locally calculated flag value + monitor_flag, + espresso.mor1kx_cpu.mor1kx_ctrl_espresso.spr_sr[`OR1K_SPR_SR_F-1:0]}; + assign monitor_execute_pc = espresso.mor1kx_cpu.mor1kx_ctrl_espresso.spr_ppc; + assign monitor_rf_result_in = espresso.mor1kx_cpu.mor1kx_rf_espresso.result_i; + assign monitor_spr_esr = {16'd0,espresso.mor1kx_cpu.mor1kx_ctrl_espresso.spr_esr}; + assign monitor_spr_epcr = espresso.mor1kx_cpu.mor1kx_ctrl_espresso.spr_epcr; + assign monitor_spr_eear = espresso.mor1kx_cpu.mor1kx_ctrl_espresso.spr_eear; + assign monitor_branch_mispredict = 0; +`endif + // synthesis translate_on + + end // block: espresso + /* verilator lint_off WIDTH */ + if (OPTION_CPU=="PRONTO_ESPRESSO") begin : prontoespresso + /* verilator lint_on WIDTH */ + mor1kx_cpu_prontoespresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .FEATURE_DATACACHE(FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH(OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH(OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS(OPTION_DCACHE_WAYS), + .FEATURE_DMMU(FEATURE_DMMU), + .FEATURE_INSTRUCTIONCACHE(FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH(OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS(OPTION_ICACHE_WAYS), + .FEATURE_IMMU(FEATURE_IMMU), + .FEATURE_PIC(FEATURE_PIC), + .FEATURE_TIMER(FEATURE_TIMER), + .FEATURE_DEBUGUNIT(FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS(FEATURE_PERFCOUNTERS), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_MULTICORE(FEATURE_MULTICORE), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE), + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH), + .FEATURE_DSX(FEATURE_DSX), + .FEATURE_FASTCONTEXTS(FEATURE_FASTCONTEXTS), + .FEATURE_OVERFLOW(FEATURE_OVERFLOW), + .FEATURE_CARRY_FLAG(FEATURE_CARRY_FLAG), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RF_WORDS(OPTION_RF_WORDS), + .OPTION_RESET_PC(OPTION_RESET_PC), + .OPTION_TCM_FETCHER(OPTION_TCM_FETCHER), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_MSYNC(FEATURE_MSYNC), + .FEATURE_PSYNC(FEATURE_PSYNC), + .FEATURE_CSYNC(FEATURE_CSYNC), + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8), + .OPTION_SHIFTER(OPTION_SHIFTER) + ) + mor1kx_cpu + (/*AUTOINST*/ + // Outputs + .ibus_adr_o (ibus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .ibus_req_o (ibus_req_o), + .ibus_burst_o (ibus_burst_o), + .dbus_adr_o (dbus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_dat_o (dbus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_req_o (dbus_req_o), + .dbus_bsel_o (dbus_bsel_o[3:0]), + .dbus_we_o (dbus_we_o), + .dbus_burst_o (dbus_burst_o), + .du_dat_o (du_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .du_ack_o (du_ack_o), + .du_stall_o (du_stall_o), + .spr_bus_addr_o (spr_bus_addr_o[15:0]), + .spr_bus_we_o (spr_bus_we_o), + .spr_bus_stb_o (spr_bus_stb_o), + .spr_bus_dat_o (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .spr_sr_o (spr_sr_o[15:0]), + // Inputs + .clk (clk), + .rst (rst), + .ibus_err_i (ibus_err_i), + .ibus_ack_i (ibus_ack_i), + .ibus_dat_i (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), + .dbus_err_i (dbus_err_i), + .dbus_ack_i (dbus_ack_i), + .dbus_dat_i (dbus_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .irq_i (irq_i[31:0]), + .du_addr_i (du_addr_i[15:0]), + .du_stb_i (du_stb_i), + .du_dat_i (du_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .du_we_i (du_we_i), + .du_stall_i (du_stall_i), + .spr_bus_dat_dmmu_i (spr_bus_dat_dmmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_dmmu_i (spr_bus_ack_dmmu_i), + .spr_bus_dat_immu_i (spr_bus_dat_immu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_immu_i (spr_bus_ack_immu_i), + .spr_bus_dat_mac_i (spr_bus_dat_mac_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_mac_i (spr_bus_ack_mac_i), + .spr_bus_dat_pmu_i (spr_bus_dat_pmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pmu_i (spr_bus_ack_pmu_i), + .spr_bus_dat_pcu_i (spr_bus_dat_pcu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pcu_i (spr_bus_ack_pcu_i), + .spr_bus_dat_fpu_i (spr_bus_dat_fpu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_fpu_i (spr_bus_ack_fpu_i), + .multicore_coreid_i (multicore_coreid_i[OPTION_OPERAND_WIDTH-1:0])); + + // synthesis translate_off +`ifndef SYNTHESIS + assign monitor_flag = monitor_flag_set ? 1 : + monitor_flag_clear ? 0 : + monitor_flag_sr; + assign monitor_clk = clk; + assign monitor_execute_insn = prontoespresso.mor1kx_cpu.insn_fetch_to_decode; + assign monitor_execute_advance = prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.execute_done; + assign monitor_flag_set = prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.ctrl_flag_set_i; + assign monitor_flag_clear = prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.ctrl_flag_clear_i; + assign monitor_flag_sr = prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.flag; + assign monitor_spr_sr = {16'd0,prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.spr_sr[15:`OR1K_SPR_SR_F+1], + // Use the locally calculated flag value + monitor_flag, + prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.spr_sr[`OR1K_SPR_SR_F-1:0]}; + assign monitor_execute_pc = prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.spr_ppc; + assign monitor_rf_result_in = prontoespresso.mor1kx_cpu.mor1kx_rf_espresso.result_i; + assign monitor_spr_esr = {16'd0,prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.spr_esr}; + assign monitor_spr_epcr = prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.spr_epcr; + assign monitor_spr_eear = prontoespresso.mor1kx_cpu.mor1kx_ctrl_prontoespresso.spr_eear; + assign monitor_branch_mispredict = 0; +`endif + // synthesis translate_on + + end + /* verilator lint_off WIDTH */ + if (OPTION_CPU!="CAPPUCCINO" && OPTION_CPU!="ESPRESSO" && + OPTION_CPU!="PRONTO_ESPRESSO") + /* verilator lint_on WIDTH */ + begin + initial begin + $display("Error: OPTION_CPU, %s, not valid", OPTION_CPU); + $finish(); + end + end // else: !if(OPTION_CPU=="ESPRESSO") + endgenerate + +endmodule // mor1kx_cpu diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu_cappuccino.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu_cappuccino.v new file mode 100644 index 0000000..af01d7c --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu_cappuccino.v @@ -0,0 +1,1596 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: "Cappuccino" pipeline CPU module + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_cpu_cappuccino + #( + parameter OPTION_OPERAND_WIDTH = 32, + + parameter FEATURE_DATACACHE = "NONE", + parameter OPTION_DCACHE_BLOCK_WIDTH = 5, + parameter OPTION_DCACHE_SET_WIDTH = 9, + parameter OPTION_DCACHE_WAYS = 2, + parameter OPTION_DCACHE_LIMIT_WIDTH = 32, + parameter OPTION_DCACHE_SNOOP = "NONE", + parameter FEATURE_DMMU = "NONE", + parameter FEATURE_DMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_DMMU_SET_WIDTH = 6, + parameter OPTION_DMMU_WAYS = 1, + parameter FEATURE_INSTRUCTIONCACHE = "NONE", + parameter OPTION_ICACHE_BLOCK_WIDTH = 5, + parameter OPTION_ICACHE_SET_WIDTH = 9, + parameter OPTION_ICACHE_WAYS = 2, + parameter OPTION_ICACHE_LIMIT_WIDTH = 32, + parameter FEATURE_IMMU = "NONE", + parameter FEATURE_IMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_IMMU_SET_WIDTH = 6, + parameter OPTION_IMMU_WAYS = 1, + parameter FEATURE_TIMER = "ENABLED", + parameter FEATURE_DEBUGUNIT = "NONE", + parameter FEATURE_PERFCOUNTERS = "NONE", + parameter OPTION_PERFCOUNTERS_NUM = 0, + parameter FEATURE_MAC = "NONE", + + parameter FEATURE_SYSCALL = "ENABLED", + parameter FEATURE_TRAP = "ENABLED", + parameter FEATURE_RANGE = "ENABLED", + + parameter FEATURE_PIC = "ENABLED", + parameter OPTION_PIC_TRIGGER = "LEVEL", + parameter OPTION_PIC_NMI_WIDTH = 0, + + parameter FEATURE_DSX = "NONE", + parameter FEATURE_OVERFLOW = "NONE", + parameter FEATURE_CARRY_FLAG = "ENABLED", + + parameter FEATURE_FASTCONTEXTS = "NONE", + parameter OPTION_RF_CLEAR_ON_INIT = 0, + parameter OPTION_RF_NUM_SHADOW_GPR = 0, + parameter OPTION_RF_ADDR_WIDTH = 5, + parameter OPTION_RF_WORDS = 32, + + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + + parameter FEATURE_MULTIPLIER = "THREESTAGE", + parameter FEATURE_DIVIDER = "NONE", + + parameter OPTION_SHIFTER = "BARREL", + + parameter FEATURE_ADDC = "NONE", + parameter FEATURE_SRA = "ENABLED", + parameter FEATURE_ROR = "NONE", + parameter FEATURE_EXT = "NONE", + parameter FEATURE_CMOV = "NONE", + parameter FEATURE_FFL1 = "NONE", + parameter FEATURE_MSYNC = "ENABLED", + parameter FEATURE_PSYNC = "NONE", + parameter FEATURE_CSYNC = "NONE", + + parameter FEATURE_ATOMIC = "ENABLED", + + parameter FEATURE_FPU = "NONE", // ENABLED|NONE + parameter OPTION_FTOI_ROUNDING = "CPP", // "CPP" / "IEEE" + + parameter FEATURE_CUST1 = "NONE", + parameter FEATURE_CUST2 = "NONE", + parameter FEATURE_CUST3 = "NONE", + parameter FEATURE_CUST4 = "NONE", + parameter FEATURE_CUST5 = "NONE", + parameter FEATURE_CUST6 = "NONE", + parameter FEATURE_CUST7 = "NONE", + parameter FEATURE_CUST8 = "NONE", + + parameter FEATURE_STORE_BUFFER = "ENABLED", + parameter OPTION_STORE_BUFFER_DEPTH_WIDTH = 8, + + parameter FEATURE_MULTICORE = "NONE", + + parameter FEATURE_TRACEPORT_EXEC = "NONE", + parameter FEATURE_BRANCH_PREDICTOR = "SIMPLE" // SIMPLE|SAT_COUNTER|GSHARE + ) + ( + input clk, + input rst, + + // Instruction bus + input ibus_err_i, + input ibus_ack_i, + input [`OR1K_INSN_WIDTH-1:0] ibus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] ibus_adr_o, + output ibus_req_o, + output ibus_burst_o, + + // Data bus + input dbus_err_i, + input dbus_ack_i, + input [OPTION_OPERAND_WIDTH-1:0] dbus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] dbus_adr_o, + output [OPTION_OPERAND_WIDTH-1:0] dbus_dat_o, + output dbus_req_o, + output [3:0] dbus_bsel_o, + output dbus_we_o, + output dbus_burst_o, + + // Interrupts + input [31:0] irq_i, + + // Debug interface + input [15:0] du_addr_i, + input du_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] du_dat_i, + input du_we_i, + output [OPTION_OPERAND_WIDTH-1:0] du_dat_o, + output du_ack_o, + // Stall control from debug interface + input du_stall_i, + output du_stall_o, + + output reg traceport_exec_valid_o, + output reg [31:0] traceport_exec_pc_o, + output reg traceport_exec_jb_o, + output reg traceport_exec_jal_o, + output reg traceport_exec_jr_o, + output reg [31:0] traceport_exec_jbtarget_o, + output reg [`OR1K_INSN_WIDTH-1:0] traceport_exec_insn_o, + output [OPTION_OPERAND_WIDTH-1:0] traceport_exec_wbdata_o, + output [OPTION_RF_ADDR_WIDTH-1:0] traceport_exec_wbreg_o, + output traceport_exec_wben_o, + + // SPR accesses to external units (cache, mmu, etc.) + output [15:0] spr_bus_addr_o, + output spr_bus_we_o, + output spr_bus_stb_o, + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_mac_i, + input spr_bus_ack_mac_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pmu_i, + input spr_bus_ack_pmu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pcu_i, + input spr_bus_ack_pcu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_fpu_i, + input spr_bus_ack_fpu_i, + output [15:0] spr_sr_o, + + input [OPTION_OPERAND_WIDTH-1:0] multicore_coreid_i, + input [OPTION_OPERAND_WIDTH-1:0] multicore_numcores_i, + + input [31:0] snoop_adr_i, + input snoop_en_i + ); + + wire [OPTION_OPERAND_WIDTH-1:0] pc_fetch_to_decode; + wire [`OR1K_INSN_WIDTH-1:0] insn_fetch_to_decode; + wire [OPTION_OPERAND_WIDTH-1:0] pc_decode_to_execute; + wire [OPTION_OPERAND_WIDTH-1:0] pc_execute_to_ctrl; + + /*AUTOWIRE*/ + // Beginning of automatic wires (for undeclared instantiated-module outputs) + wire [OPTION_OPERAND_WIDTH-1:0] adder_result_o;// From mor1kx_execute_alu of mor1kx_execute_alu.v + wire [OPTION_OPERAND_WIDTH-1:0] alu_result_o;// From mor1kx_execute_alu of mor1kx_execute_alu.v + wire alu_valid_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire atomic_flag_clear_o; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire atomic_flag_set_o; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire branch_mispredict_o; // From mor1kx_branch_prediction of mor1kx_branch_prediction.v + wire carry_clear_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire carry_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_alu_result_o;// From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_branch_except_pc_o;// From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire ctrl_branch_exception_o;// From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire ctrl_bubble_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire ctrl_carry_clear_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_carry_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire ctrl_carry_set_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_epcr_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire ctrl_except_align_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_dbus_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_dpagefault_o;// From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_dtlb_miss_o;// From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_ibus_align_o;// From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_ibus_err_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_illegal_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_ipagefault_o;// From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_itlb_miss_o;// From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_syscall_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_except_trap_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_flag_clear_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_flag_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire ctrl_flag_set_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_lsu_adr_o;// From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire [1:0] ctrl_lsu_length_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_lsu_zext_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_mfspr_ack_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire ctrl_mtspr_ack_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire ctrl_op_lsu_atomic_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_op_lsu_load_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_op_lsu_store_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_op_mfspr_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_op_msync_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_op_mtspr_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_op_mul_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_op_rfe_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_overflow_clear_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_overflow_set_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire [`OR1K_FPCSR_WIDTH-1:0] ctrl_fpcsr_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_fpcsr_set_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire [`OR1K_FPCSR_RM_SIZE-1:0] ctrl_fpu_round_mode_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire ctrl_rf_wb_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_rfb_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire [OPTION_RF_ADDR_WIDTH-1:0] ctrl_rfd_adr_o;// From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire ctrl_valid_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire decode_adder_do_carry_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_adder_do_sub_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_branch_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] decode_branch_target_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire decode_bubble_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire decode_except_ibus_err_o;// From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire decode_except_illegal_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_except_ipagefault_o;// From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire decode_except_itlb_miss_o;// From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire decode_except_syscall_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_except_trap_o; // From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_IMM_WIDTH-1:0] decode_imm16_o; // From mor1kx_decode of mor1kx_decode.v + wire [OPTION_OPERAND_WIDTH-1:0] decode_immediate_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_immediate_sel_o; // From mor1kx_decode of mor1kx_decode.v + wire [9:0] decode_immjbr_upper_o; // From mor1kx_decode of mor1kx_decode.v + wire [1:0] decode_lsu_length_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_lsu_zext_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_add_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_alu_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_bf_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_bnf_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_branch_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_brcond_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_div_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_div_signed_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_div_unsigned_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_op_ffl1_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_jal_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_jbr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_jr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_lsu_atomic_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_lsu_load_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_lsu_store_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mfspr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_movhi_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_ext_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_msync_o; // From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_FPUOP_WIDTH-1:0] decode_op_fpu_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mtspr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mul_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mul_signed_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mul_unsigned_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_op_rfe_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_setflag_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_shift_o; // From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_o;// From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_secondary_o;// From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_OPCODE_WIDTH-1:0] decode_opc_insn_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_rf_wb_o; // From mor1kx_decode of mor1kx_decode.v + wire [OPTION_RF_ADDR_WIDTH-1:0] decode_rfa_adr_o;// From mor1kx_decode of mor1kx_decode.v + wire [OPTION_OPERAND_WIDTH-1:0] decode_rfa_o;// From mor1kx_rf_cappuccino of mor1kx_rf_cappuccino.v + wire [OPTION_RF_ADDR_WIDTH-1:0] decode_rfb_adr_o;// From mor1kx_decode of mor1kx_decode.v + wire [OPTION_OPERAND_WIDTH-1:0] decode_rfb_o;// From mor1kx_rf_cappuccino of mor1kx_rf_cappuccino.v + wire [OPTION_RF_ADDR_WIDTH-1:0] decode_rfd_adr_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_valid_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire doing_rfe_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire du_restart_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_o;// From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire execute_adder_do_carry_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_adder_do_sub_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_bubble_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_except_ibus_align_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_except_ibus_err_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_except_illegal_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_except_ipagefault_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_except_itlb_miss_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_except_syscall_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_except_trap_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [`OR1K_IMM_WIDTH-1:0] execute_imm16_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] execute_immediate_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_immediate_sel_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [9:0] execute_immjbr_upper_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] execute_jal_result_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [1:0] execute_lsu_length_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_lsu_zext_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] execute_mispredict_target_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_add_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_alu_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_bf_o; + wire execute_op_bnf_o; + wire execute_op_branch_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_brcond_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_div_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_div_signed_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_div_unsigned_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_ffl1_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_jal_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_jbr_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_jr_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_lsu_atomic_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_lsu_load_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_lsu_store_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_mfspr_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_movhi_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_ext_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_msync_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [`OR1K_FPUOP_WIDTH-1:0] execute_op_fpu_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_mtspr_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_mul_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_mul_signed_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_mul_unsigned_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_rfe_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_setflag_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_op_shift_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [`OR1K_ALU_OPC_WIDTH-1:0] execute_opc_alu_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [`OR1K_ALU_OPC_WIDTH-1:0] execute_opc_alu_secondary_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [`OR1K_OPCODE_WIDTH-1:0] execute_opc_insn_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_predicted_flag_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_rf_wb_o; // From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] execute_rfa_o;// From mor1kx_rf_cappuccino of mor1kx_rf_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] execute_rfb_o;// From mor1kx_rf_cappuccino of mor1kx_rf_cappuccino.v + wire [OPTION_RF_ADDR_WIDTH-1:0] execute_rfd_adr_o;// From mor1kx_decode_execute_cappuccino of mor1kx_decode_execute_cappuccino.v + wire execute_valid_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire fetch_exception_taken_o;// From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire fetch_rf_adr_valid_o; // From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfa_adr_o;// From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfb_adr_o;// From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire fetch_valid_o; // From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire flag_clear_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire flag_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire icache_hit_o; // From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire dcache_hit_o; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire lsu_except_align_o; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire lsu_except_dbus_o; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire lsu_except_dpagefault_o;// From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire lsu_except_dtlb_miss_o; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] lsu_result_o;// From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire lsu_valid_o; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] mfspr_dat_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire msync_stall_o; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] mul_result_o;// From mor1kx_execute_alu of mor1kx_execute_alu.v + wire overflow_clear_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire overflow_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire[`OR1K_FPCSR_WIDTH-1:0] fpcsr_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire fpcsr_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire padv_ctrl_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire padv_decode_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire padv_execute_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire padv_fetch_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire pipeline_flush_o; // From mor1kx_ctrl_cappuccino of mor1kx_ctrl_cappuccino.v + wire predicted_flag_o; // From mor1kx_branch_prediction of mor1kx_branch_prediction.v + wire [OPTION_OPERAND_WIDTH-1:0] rf_result_o; // From mor1kx_wb_mux_cappuccino of mor1kx_wb_mux_cappuccino.v + wire spr_bus_ack_dc_i; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire spr_bus_ack_dmmu_i; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire spr_bus_ack_ic_i; // From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire spr_bus_ack_immu_i; // From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dc_i;// From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dmmu_i;// From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_ic_i;// From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_immu_i;// From mor1kx_fetch_cappuccino of mor1kx_fetch_cappuccino.v + wire spr_gpr_ack_o; // From mor1kx_rf_cappuccino of mor1kx_rf_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_gpr_dat_o;// From mor1kx_rf_cappuccino of mor1kx_rf_cappuccino.v + wire [OPTION_OPERAND_WIDTH-1:0] store_buffer_epcr_o;// From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire store_buffer_err_o; // From mor1kx_lsu_cappuccino of mor1kx_lsu_cappuccino.v + wire wb_rf_wb_o; // From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + wire [OPTION_RF_ADDR_WIDTH-1:0] wb_rfd_adr_o;// From mor1kx_execute_ctrl_cappuccino of mor1kx_execute_ctrl_cappuccino.v + // End of automatics + + /* mor1kx_fetch_cappuccino AUTO_TEMPLATE ( + .padv_i (padv_fetch_o), + .padv_ctrl_i (padv_ctrl_o), + .decode_branch_i (decode_branch_o), + .decode_branch_target_i (decode_branch_target_o), + .ctrl_branch_exception_i (ctrl_branch_exception_o), + .ctrl_branch_except_pc_i (ctrl_branch_except_pc_o), + .doing_rfe_i (doing_rfe_o), + .pipeline_flush_i (pipeline_flush_o), + .pc_decode_o (pc_fetch_to_decode), + .decode_insn_o (insn_fetch_to_decode), + .du_restart_pc_i (du_restart_pc_o), + .du_restart_i (du_restart_o), + .decode_op_brcond_i (decode_op_brcond_o), + .branch_mispredict_i (branch_mispredict_o), + .execute_mispredict_target_i (execute_mispredict_target_o), + .spr_bus_dat_ic_o (spr_bus_dat_ic_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_ic_o (spr_bus_ack_ic_i), + .spr_bus_dat_immu_o (spr_bus_dat_immu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_immu_o (spr_bus_ack_immu_i), + .spr_bus_addr_i (spr_bus_addr_o[15:0]), + .spr_bus_we_i (spr_bus_we_o), + .spr_bus_stb_i (spr_bus_stb_o), + .spr_bus_dat_i (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .ic_enable (spr_sr_o[`OR1K_SPR_SR_ICE]), + .immu_enable_i (spr_sr_o[`OR1K_SPR_SR_IME]), + .supervisor_mode_i (spr_sr_o[`OR1K_SPR_SR_SM]), + ); */ + mor1kx_fetch_cappuccino + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .FEATURE_INSTRUCTIONCACHE(FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH(OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS(OPTION_ICACHE_WAYS), + .OPTION_ICACHE_LIMIT_WIDTH(OPTION_ICACHE_LIMIT_WIDTH), + .FEATURE_IMMU(FEATURE_IMMU), + .FEATURE_IMMU_HW_TLB_RELOAD(FEATURE_IMMU_HW_TLB_RELOAD), + .OPTION_IMMU_SET_WIDTH(OPTION_IMMU_SET_WIDTH), + .OPTION_IMMU_WAYS(OPTION_IMMU_WAYS) + ) + mor1kx_fetch_cappuccino + (/*AUTOINST*/ + // Outputs + .spr_bus_dat_ic_o (spr_bus_dat_ic_i[OPTION_OPERAND_WIDTH-1:0]), // Templated + .spr_bus_ack_ic_o (spr_bus_ack_ic_i), // Templated + .spr_bus_dat_immu_o (spr_bus_dat_immu_i[OPTION_OPERAND_WIDTH-1:0]), // Templated + .spr_bus_ack_immu_o (spr_bus_ack_immu_i), // Templated + .ibus_req_o (ibus_req_o), + .ibus_adr_o (ibus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .ibus_burst_o (ibus_burst_o), + .pc_decode_o (pc_fetch_to_decode), // Templated + .decode_insn_o (insn_fetch_to_decode), // Templated + .fetch_valid_o (fetch_valid_o), + .fetch_rfa_adr_o (fetch_rfa_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .fetch_rfb_adr_o (fetch_rfb_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .fetch_rf_adr_valid_o (fetch_rf_adr_valid_o), + .decode_except_ibus_err_o (decode_except_ibus_err_o), + .decode_except_itlb_miss_o (decode_except_itlb_miss_o), + .decode_except_ipagefault_o (decode_except_ipagefault_o), + .fetch_exception_taken_o (fetch_exception_taken_o), + .ic_hit_o (icache_hit_o), + // Inputs + .clk (clk), + .rst (rst), + .spr_bus_addr_i (spr_bus_addr_o[15:0]), // Templated + .spr_bus_we_i (spr_bus_we_o), // Templated + .spr_bus_stb_i (spr_bus_stb_o), // Templated + .spr_bus_dat_i (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), // Templated + .ic_enable (spr_sr_o[`OR1K_SPR_SR_ICE]), // Templated + .immu_enable_i (spr_sr_o[`OR1K_SPR_SR_IME]), // Templated + .supervisor_mode_i (spr_sr_o[`OR1K_SPR_SR_SM]), // Templated + .ibus_err_i (ibus_err_i), + .ibus_ack_i (ibus_ack_i), + .ibus_dat_i (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), + .padv_i (padv_fetch_o), // Templated + .padv_ctrl_i (padv_ctrl_o), // Templated + .decode_branch_i (decode_branch_o), // Templated + .decode_branch_target_i (decode_branch_target_o), // Templated + .ctrl_branch_exception_i (ctrl_branch_exception_o), // Templated + .ctrl_branch_except_pc_i (ctrl_branch_except_pc_o), // Templated + .du_restart_i (du_restart_o), // Templated + .du_restart_pc_i (du_restart_pc_o), // Templated + .decode_op_brcond_i (decode_op_brcond_o), // Templated + .branch_mispredict_i (branch_mispredict_o), // Templated + .execute_mispredict_target_i (execute_mispredict_target_o), // Templated + .pipeline_flush_i (pipeline_flush_o), // Templated + .doing_rfe_i (doing_rfe_o)); // Templated + + /* mor1kx_decode AUTO_TEMPLATE ( + .decode_insn_i (insn_fetch_to_decode), + ); */ + mor1kx_decode + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_MSYNC(FEATURE_MSYNC), + .FEATURE_PSYNC(FEATURE_PSYNC), + .FEATURE_CSYNC(FEATURE_CSYNC), + .FEATURE_ATOMIC(FEATURE_ATOMIC), + .FEATURE_FPU(FEATURE_FPU), // pipeline cappuccino: decode instance + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8) + ) + mor1kx_decode + (/*AUTOINST*/ + // Outputs + .decode_opc_alu_o (decode_opc_alu_o[`OR1K_ALU_OPC_WIDTH-1:0]), + .decode_opc_alu_secondary_o (decode_opc_alu_secondary_o[`OR1K_ALU_OPC_WIDTH-1:0]), + .decode_imm16_o (decode_imm16_o[`OR1K_IMM_WIDTH-1:0]), + .decode_immediate_o (decode_immediate_o[OPTION_OPERAND_WIDTH-1:0]), + .decode_immediate_sel_o (decode_immediate_sel_o), + .decode_immjbr_upper_o (decode_immjbr_upper_o[9:0]), + .decode_rfd_adr_o (decode_rfd_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .decode_rfa_adr_o (decode_rfa_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .decode_rfb_adr_o (decode_rfb_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .decode_rf_wb_o (decode_rf_wb_o), + .decode_op_jbr_o (decode_op_jbr_o), + .decode_op_jr_o (decode_op_jr_o), + .decode_op_jal_o (decode_op_jal_o), + .decode_op_bf_o (decode_op_bf_o), + .decode_op_bnf_o (decode_op_bnf_o), + .decode_op_brcond_o (decode_op_brcond_o), + .decode_op_branch_o (decode_op_branch_o), + .decode_op_alu_o (decode_op_alu_o), + .decode_op_lsu_load_o (decode_op_lsu_load_o), + .decode_op_lsu_store_o (decode_op_lsu_store_o), + .decode_op_lsu_atomic_o (decode_op_lsu_atomic_o), + .decode_lsu_length_o (decode_lsu_length_o[1:0]), + .decode_lsu_zext_o (decode_lsu_zext_o), + .decode_op_mfspr_o (decode_op_mfspr_o), + .decode_op_mtspr_o (decode_op_mtspr_o), + .decode_op_rfe_o (decode_op_rfe_o), + .decode_op_setflag_o (decode_op_setflag_o), + .decode_op_add_o (decode_op_add_o), + .decode_op_mul_o (decode_op_mul_o), + .decode_op_mul_signed_o (decode_op_mul_signed_o), + .decode_op_mul_unsigned_o (decode_op_mul_unsigned_o), + .decode_op_div_o (decode_op_div_o), + .decode_op_div_signed_o (decode_op_div_signed_o), + .decode_op_div_unsigned_o (decode_op_div_unsigned_o), + .decode_op_shift_o (decode_op_shift_o), + .decode_op_ffl1_o (decode_op_ffl1_o), + .decode_op_movhi_o (decode_op_movhi_o), + .decode_op_ext_o (decode_op_ext_o), + .decode_op_msync_o (decode_op_msync_o), + .decode_op_fpu_o (decode_op_fpu_o), + .decode_adder_do_sub_o (decode_adder_do_sub_o), + .decode_adder_do_carry_o (decode_adder_do_carry_o), + .decode_except_illegal_o (decode_except_illegal_o), + .decode_except_syscall_o (decode_except_syscall_o), + .decode_except_trap_o (decode_except_trap_o), + .decode_opc_insn_o (decode_opc_insn_o[`OR1K_OPCODE_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .decode_insn_i (insn_fetch_to_decode)); // Templated + + /* mor1kx_decode_execute_cappuccino AUTO_TEMPLATE ( + .padv_i (padv_decode_o), + .pc_decode_i (pc_fetch_to_decode), + .decode_rfb_i (decode_rfb_o), + .execute_rfb_i (execute_rfb_o), + .predicted_flag_i (predicted_flag_o), + .flag_i (ctrl_flag_o), + .pc_execute_o (pc_decode_to_execute), + .pipeline_flush_i (pipeline_flush_o), + .decode_opc_alu_i (decode_opc_alu_o), + .decode_opc_alu_secondary_i (decode_opc_alu_secondary_o), + .decode_imm16_i (decode_imm16_o), + .decode_immediate_i (decode_immediate_o), + .decode_immediate_sel_i (decode_immediate_sel_o), + .decode_immjbr_upper_i (decode_immjbr_upper_o), + .decode_adder_do_sub_i (decode_adder_do_sub_o), + .decode_adder_do_carry_i (decode_adder_do_carry_o), + .decode_rfd_adr_i (decode_rfd_adr_o), + .decode_rfa_adr_i (decode_rfa_adr_o), + .decode_rfb_adr_i (decode_rfb_adr_o), + .ctrl_rfd_adr_i (ctrl_rfd_adr_o), + .ctrl_op_lsu_load_i (ctrl_op_lsu_load_o), + .ctrl_op_mfspr_i (ctrl_op_mfspr_o), + .ctrl_op_mul_i (ctrl_op_mul_o), + .decode_rf_wb_i (decode_rf_wb_o), + .decode_op_alu_i (decode_op_alu_o), + .decode_op_setflag_i (decode_op_setflag_o), + .decode_op_jbr_i (decode_op_jbr_o), + .decode_op_jr_i (decode_op_jr_o), + .decode_op_jal_i (decode_op_jal_o), + .decode_op_bf_i (decode_op_bf_o), + .decode_op_bnf_i (decode_op_bnf_o), + .decode_op_brcond_i (decode_op_brcond_o), + .decode_op_branch_i (decode_op_branch_o), + .decode_op_lsu_load_i (decode_op_lsu_load_o), + .decode_op_lsu_store_i (decode_op_lsu_store_o), + .decode_op_lsu_atomic_i (decode_op_lsu_atomic_o), + .decode_lsu_length_i (decode_lsu_length_o[1:0]), + .decode_lsu_zext_i (decode_lsu_zext_o), + .decode_op_mfspr_i (decode_op_mfspr_o), + .decode_op_mtspr_i (decode_op_mtspr_o), + .decode_op_rfe_i (decode_op_rfe_o), + .decode_op_add_i (decode_op_add_o), + .decode_op_mul_i (decode_op_mul_o), + .decode_op_mul_signed_i (decode_op_mul_signed_o), + .decode_op_mul_unsigned_i (decode_op_mul_unsigned_o), + .decode_op_div_i (decode_op_div_o), + .decode_op_div_signed_i (decode_op_div_signed_o), + .decode_op_div_unsigned_i (decode_op_div_unsigned_o), + .decode_op_shift_i (decode_op_shift_o), + .decode_op_ffl1_i (decode_op_ffl1_o), + .decode_op_movhi_i (decode_op_movhi_o), + .decode_op_msync_i (decode_op_msync_o), + .decode_op_fpu_i (decode_op_fpu_o), + .decode_opc_insn_i (decode_opc_insn_o[`OR1K_OPCODE_WIDTH-1:0]), + .decode_except_ibus_err_i (decode_except_ibus_err_o), + .decode_except_itlb_miss_i (decode_except_itlb_miss_o), + .decode_except_ipagefault_i (decode_except_ipagefault_o), + .decode_except_illegal_i (decode_except_illegal_o), + .decode_except_syscall_i (decode_except_syscall_o), + .decode_except_trap_i (decode_except_trap_o), + ); */ + mor1kx_decode_execute_cappuccino + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_FPU(FEATURE_FPU), // pipeline cappuccino: decode_execute instance + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER) + ) + mor1kx_decode_execute_cappuccino + (/*AUTOINST*/ + // Outputs + .execute_predicted_flag_o (execute_predicted_flag_o), + .execute_mispredict_target_o (execute_mispredict_target_o[OPTION_OPERAND_WIDTH-1:0]), + .execute_opc_alu_o (execute_opc_alu_o[`OR1K_ALU_OPC_WIDTH-1:0]), + .execute_opc_alu_secondary_o (execute_opc_alu_secondary_o[`OR1K_ALU_OPC_WIDTH-1:0]), + .execute_imm16_o (execute_imm16_o[`OR1K_IMM_WIDTH-1:0]), + .execute_immediate_o (execute_immediate_o[OPTION_OPERAND_WIDTH-1:0]), + .execute_immediate_sel_o (execute_immediate_sel_o), + .execute_adder_do_sub_o (execute_adder_do_sub_o), + .execute_adder_do_carry_o (execute_adder_do_carry_o), + .execute_immjbr_upper_o (execute_immjbr_upper_o[9:0]), + .execute_rfd_adr_o (execute_rfd_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .execute_rf_wb_o (execute_rf_wb_o), + .execute_op_alu_o (execute_op_alu_o), + .execute_op_setflag_o (execute_op_setflag_o), + .execute_op_jbr_o (execute_op_jbr_o), + .execute_op_jr_o (execute_op_jr_o), + .execute_op_jal_o (execute_op_jal_o), + .execute_op_brcond_o (execute_op_brcond_o), + .execute_op_branch_o (execute_op_branch_o), + .execute_op_lsu_load_o (execute_op_lsu_load_o), + .execute_op_lsu_store_o (execute_op_lsu_store_o), + .execute_op_lsu_atomic_o (execute_op_lsu_atomic_o), + .execute_lsu_length_o (execute_lsu_length_o[1:0]), + .execute_lsu_zext_o (execute_lsu_zext_o), + .execute_op_mfspr_o (execute_op_mfspr_o), + .execute_op_mtspr_o (execute_op_mtspr_o), + .execute_op_rfe_o (execute_op_rfe_o), + .execute_op_add_o (execute_op_add_o), + .execute_op_mul_o (execute_op_mul_o), + .execute_op_mul_signed_o (execute_op_mul_signed_o), + .execute_op_mul_unsigned_o (execute_op_mul_unsigned_o), + .execute_op_div_o (execute_op_div_o), + .execute_op_div_signed_o (execute_op_div_signed_o), + .execute_op_div_unsigned_o (execute_op_div_unsigned_o), + .execute_op_shift_o (execute_op_shift_o), + .execute_op_ffl1_o (execute_op_ffl1_o), + .execute_op_movhi_o (execute_op_movhi_o), + .execute_op_ext_o (execute_op_ext_o), + .execute_op_msync_o (execute_op_msync_o), + .execute_op_fpu_o (execute_op_fpu_o), + .execute_op_bf_o (execute_op_bf_o), + .execute_op_bnf_o (execute_op_bnf_o), + .execute_jal_result_o (execute_jal_result_o[OPTION_OPERAND_WIDTH-1:0]), + .execute_opc_insn_o (execute_opc_insn_o[`OR1K_OPCODE_WIDTH-1:0]), + .decode_branch_o (decode_branch_o), + .decode_branch_target_o (decode_branch_target_o[OPTION_OPERAND_WIDTH-1:0]), + .execute_except_ibus_err_o (execute_except_ibus_err_o), + .execute_except_itlb_miss_o (execute_except_itlb_miss_o), + .execute_except_ipagefault_o (execute_except_ipagefault_o), + .execute_except_illegal_o (execute_except_illegal_o), + .execute_except_ibus_align_o (execute_except_ibus_align_o), + .execute_except_syscall_o (execute_except_syscall_o), + .execute_except_trap_o (execute_except_trap_o), + .pc_execute_o (pc_decode_to_execute), // Templated + .decode_valid_o (decode_valid_o), + .decode_bubble_o (decode_bubble_o), + .execute_bubble_o (execute_bubble_o), + // Inputs + .clk (clk), + .rst (rst), + .padv_i (padv_decode_o), // Templated + .pc_decode_i (pc_fetch_to_decode), // Templated + .decode_rfb_i (decode_rfb_o), // Templated + .execute_rfb_i (execute_rfb_o), // Templated + .predicted_flag_i (predicted_flag_o), // Templated + .pipeline_flush_i (pipeline_flush_o), // Templated + .decode_opc_alu_i (decode_opc_alu_o), // Templated + .decode_opc_alu_secondary_i (decode_opc_alu_secondary_o), // Templated + .decode_imm16_i (decode_imm16_o), // Templated + .decode_immediate_i (decode_immediate_o), // Templated + .decode_immediate_sel_i (decode_immediate_sel_o), // Templated + .decode_adder_do_sub_i (decode_adder_do_sub_o), // Templated + .decode_adder_do_carry_i (decode_adder_do_carry_o), // Templated + .decode_immjbr_upper_i (decode_immjbr_upper_o), // Templated + .decode_rfd_adr_i (decode_rfd_adr_o), // Templated + .decode_rfa_adr_i (decode_rfa_adr_o), // Templated + .decode_rfb_adr_i (decode_rfb_adr_o), // Templated + .ctrl_rfd_adr_i (ctrl_rfd_adr_o), // Templated + .ctrl_op_lsu_load_i (ctrl_op_lsu_load_o), // Templated + .ctrl_op_mfspr_i (ctrl_op_mfspr_o), // Templated + .ctrl_op_mul_i (ctrl_op_mul_o), // Templated + .decode_rf_wb_i (decode_rf_wb_o), // Templated + .decode_op_alu_i (decode_op_alu_o), // Templated + .decode_op_setflag_i (decode_op_setflag_o), // Templated + .decode_op_jbr_i (decode_op_jbr_o), // Templated + .decode_op_jr_i (decode_op_jr_o), // Templated + .decode_op_jal_i (decode_op_jal_o), // Templated + .decode_op_bf_i (decode_op_bf_o), // Templated + .decode_op_bnf_i (decode_op_bnf_o), // Templated + .decode_op_brcond_i (decode_op_brcond_o), // Templated + .decode_op_branch_i (decode_op_branch_o), // Templated + .decode_op_lsu_load_i (decode_op_lsu_load_o), // Templated + .decode_op_lsu_store_i (decode_op_lsu_store_o), // Templated + .decode_op_lsu_atomic_i (decode_op_lsu_atomic_o), // Templated + .decode_lsu_length_i (decode_lsu_length_o[1:0]), // Templated + .decode_lsu_zext_i (decode_lsu_zext_o), // Templated + .decode_op_mfspr_i (decode_op_mfspr_o), // Templated + .decode_op_mtspr_i (decode_op_mtspr_o), // Templated + .decode_op_rfe_i (decode_op_rfe_o), // Templated + .decode_op_add_i (decode_op_add_o), // Templated + .decode_op_mul_i (decode_op_mul_o), // Templated + .decode_op_mul_signed_i (decode_op_mul_signed_o), // Templated + .decode_op_mul_unsigned_i (decode_op_mul_unsigned_o), // Templated + .decode_op_div_i (decode_op_div_o), // Templated + .decode_op_div_signed_i (decode_op_div_signed_o), // Templated + .decode_op_div_unsigned_i (decode_op_div_unsigned_o), // Templated + .decode_op_shift_i (decode_op_shift_o), // Templated + .decode_op_ffl1_i (decode_op_ffl1_o), // Templated + .decode_op_movhi_i (decode_op_movhi_o), // Templated + .decode_op_ext_i (decode_op_ext_o), // Templated + .decode_op_msync_i (decode_op_msync_o), // Templated + .decode_op_fpu_i (decode_op_fpu_o), // Templated + .decode_opc_insn_i (decode_opc_insn_o[`OR1K_OPCODE_WIDTH-1:0]), // Templated + .decode_except_ibus_err_i (decode_except_ibus_err_o), // Templated + .decode_except_itlb_miss_i (decode_except_itlb_miss_o), // Templated + .decode_except_ipagefault_i (decode_except_ipagefault_o), // Templated + .decode_except_illegal_i (decode_except_illegal_o), // Templated + .decode_except_syscall_i (decode_except_syscall_o), // Templated + .decode_except_trap_i (decode_except_trap_o)); // Templated + + /* mor1kx_branch_prediction AUTO_TEMPLATE ( + .op_bf_i (decode_op_bf_o), + .op_bnf_i (decode_op_bnf_o), + .execute_bf_i (execute_op_bf_o), + .execute_bnf_i (execute_op_bnf_o), + .padv_decode_i (padv_decode_o), + .immjbr_upper_i (decode_immjbr_upper_o), + .prev_op_brcond_i (execute_op_brcond_o), + .prev_predicted_flag_i (execute_predicted_flag_o), + .brn_pc_i (pc_fetch_to_decode), + .flag_i (ctrl_flag_o), + );*/ + mor1kx_branch_prediction + #( + .FEATURE_BRANCH_PREDICTOR(FEATURE_BRANCH_PREDICTOR), + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH) + ) + mor1kx_branch_prediction + (/*AUTOINST*/ + // Outputs + .predicted_flag_o (predicted_flag_o), + .branch_mispredict_o (branch_mispredict_o), + // Inputs + .clk (clk), + .rst (rst), + .op_bf_i (decode_op_bf_o), // Templated + .op_bnf_i (decode_op_bnf_o), // Templated + .execute_bf_i (execute_op_bf_o), // Templated + .execute_bnf_i (execute_op_bnf_o), // Templated + .padv_decode_i (padv_decode_o), // Templated + .immjbr_upper_i (decode_immjbr_upper_o), // Templated + .prev_op_brcond_i (execute_op_brcond_o), // Templated + .prev_predicted_flag_i (execute_predicted_flag_o), // Templated + .brn_pc_i (pc_fetch_to_decode), // Templated + .flag_i (ctrl_flag_o)); // Templated + + /* mor1kx_execute_alu AUTO_TEMPLATE ( + .padv_decode_i (padv_decode_o), + .padv_execute_i (padv_execute_o), + .padv_ctrl_i (padv_ctrl_o), + .opc_alu_i (execute_opc_alu_o), + .opc_alu_secondary_i (execute_opc_alu_secondary_o), + .imm16_i (execute_imm16_o), + .decode_immediate_i (decode_immediate_o), + .decode_immediate_sel_i (decode_immediate_sel_o), + .immediate_i (execute_immediate_o), + .immediate_sel_i (execute_immediate_sel_o), + .decode_valid_i (decode_valid_o), + .decode_op_mul_i (decode_op_mul_o), + .op_alu_i (execute_op_alu_o), + .op_add_i (execute_op_add_o), + .op_mul_i (execute_op_mul_o), + .op_mul_signed_i (execute_op_mul_signed_o), + .op_mul_unsigned_i (execute_op_mul_unsigned_o), + .op_div_i (execute_op_div_o), + .op_div_signed_i (execute_op_div_signed_o), + .op_div_unsigned_i (execute_op_div_unsigned_o), + .op_shift_i (execute_op_shift_o), + .op_ffl1_i (execute_op_ffl1_o), + .op_setflag_i (execute_op_setflag_o), + .op_mtspr_i (execute_op_mtspr_o), + .op_mfspr_i (execute_op_mfspr_o), + .op_movhi_i (execute_op_movhi_o), + .op_fpu_i (execute_op_fpu_o), + .fpu_round_mode_i (ctrl_fpu_round_mode_o), + .op_jbr_i (execute_op_jbr_o), + .op_jr_i (execute_op_jr_o), + .immjbr_upper_i (execute_immjbr_upper_o), + .pc_execute_i (pc_decode_to_execute), + .adder_do_sub_i (execute_adder_do_sub_o), + .adder_do_carry_i (execute_adder_do_carry_o), + .decode_rfa_i (decode_rfa_o), + .decode_rfb_i (decode_rfb_o), + .rfa_i (execute_rfa_o), + .rfb_i (execute_rfb_o), + .flag_i (ctrl_flag_o), + .carry_i (ctrl_carry_o), + ); */ + mor1kx_execute_alu + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .FEATURE_OVERFLOW(FEATURE_OVERFLOW), + .FEATURE_CARRY_FLAG(FEATURE_CARRY_FLAG), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8), + .FEATURE_FPU(FEATURE_FPU), // pipeline cappuccino: execute_alu instance + .OPTION_FTOI_ROUNDING(OPTION_FTOI_ROUNDING), // pipeline cappuccino: execute_alu instance + .OPTION_SHIFTER(OPTION_SHIFTER), + .CALCULATE_BRANCH_DEST("FALSE") + ) + mor1kx_execute_alu + (/*AUTOINST*/ + // Outputs + .flag_set_o (flag_set_o), + .flag_clear_o (flag_clear_o), + .carry_set_o (carry_set_o), + .carry_clear_o (carry_clear_o), + .overflow_set_o (overflow_set_o), + .overflow_clear_o (overflow_clear_o), + .fpcsr_o (fpcsr_o), + .fpcsr_set_o (fpcsr_set_o), + .alu_result_o (alu_result_o[OPTION_OPERAND_WIDTH-1:0]), + .alu_valid_o (alu_valid_o), + .mul_result_o (mul_result_o[OPTION_OPERAND_WIDTH-1:0]), + .adder_result_o (adder_result_o[OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .padv_decode_i (padv_decode_o), // Templated + .padv_execute_i (padv_execute_o), // Templated + .padv_ctrl_i (padv_ctrl_o), // Templated + .pipeline_flush_i (pipeline_flush_o), + .opc_alu_i (execute_opc_alu_o), // Templated + .opc_alu_secondary_i (execute_opc_alu_secondary_o), // Templated + .imm16_i (execute_imm16_o), // Templated + .immediate_i (execute_immediate_o), // Templated + .immediate_sel_i (execute_immediate_sel_o), // Templated + .decode_immediate_i (decode_immediate_o), // Templated + .decode_immediate_sel_i (decode_immediate_sel_o), // Templated + .decode_valid_i (decode_valid_o), // Templated + .decode_op_mul_i (decode_op_mul_o), // Templated + .op_alu_i (execute_op_alu_o), // Templated + .op_add_i (execute_op_add_o), // Templated + .op_mul_i (execute_op_mul_o), // Templated + .op_mul_signed_i (execute_op_mul_signed_o), // Templated + .op_mul_unsigned_i (execute_op_mul_unsigned_o), // Templated + .op_div_i (execute_op_div_o), // Templated + .op_div_signed_i (execute_op_div_signed_o), // Templated + .op_div_unsigned_i (execute_op_div_unsigned_o), // Templated + .op_shift_i (execute_op_shift_o), // Templated + .op_ffl1_i (execute_op_ffl1_o), // Templated + .op_setflag_i (execute_op_setflag_o), // Templated + .op_mtspr_i (execute_op_mtspr_o), // Templated + .op_mfspr_i (execute_op_mfspr_o), // Templated + .op_movhi_i (execute_op_movhi_o), // Templated + .op_ext_i (execute_op_ext_o), // Templated + .op_fpu_i (execute_op_fpu_o), // Templated + .fpu_round_mode_i (ctrl_fpu_round_mode_o), // Templated + .op_jbr_i (execute_op_jbr_o), // Templated + .op_jr_i (execute_op_jr_o), // Templated + .immjbr_upper_i (execute_immjbr_upper_o), // Templated + .pc_execute_i (pc_decode_to_execute), // Templated + .adder_do_sub_i (execute_adder_do_sub_o), // Templated + .adder_do_carry_i (execute_adder_do_carry_o), // Templated + .decode_rfa_i (decode_rfa_o), // Templated + .decode_rfb_i (decode_rfb_o), // Templated + .rfa_i (execute_rfa_o), // Templated + .rfb_i (execute_rfb_o), // Templated + .flag_i (ctrl_flag_o), // Templated + .carry_i (ctrl_carry_o)); // Templated + + + /* mor1kx_lsu_cappuccino AUTO_TEMPLATE ( + .padv_execute_i (padv_execute_o), + .padv_ctrl_i (padv_ctrl_o), + .decode_valid_i (decode_valid_o), + .exec_lsu_adr_i (adder_result_o), + .ctrl_lsu_adr_i (ctrl_lsu_adr_o), + .ctrl_rfb_i (ctrl_rfb_o), + .exec_op_lsu_load_i (execute_op_lsu_load_o), + .exec_op_lsu_store_i (execute_op_lsu_store_o), + .exec_op_lsu_atomic_i (execute_op_lsu_atomic_o), + .ctrl_op_lsu_load_i (ctrl_op_lsu_load_o), + .ctrl_op_lsu_store_i (ctrl_op_lsu_store_o), + .ctrl_op_lsu_atomic_i (ctrl_op_lsu_atomic_o), + .ctrl_op_msync_i (ctrl_op_msync_o), + .ctrl_lsu_length_i (ctrl_lsu_length_o), + .ctrl_lsu_zext_i (ctrl_lsu_zext_o), + .ctrl_epcr_i (ctrl_epcr_o), + .pipeline_flush_i (pipeline_flush_o), + .dc_enable_i (spr_sr_o[`OR1K_SPR_SR_DCE]), + .dmmu_enable_i (spr_sr_o[`OR1K_SPR_SR_DME]), + .supervisor_mode_i (spr_sr_o[`OR1K_SPR_SR_SM]), + .spr_bus_dat_dc_o (spr_bus_dat_dc_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_dc_o (spr_bus_ack_dc_i), + .spr_bus_dat_dmmu_o (spr_bus_dat_dmmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_dmmu_o (spr_bus_ack_dmmu_i), + .spr_bus_addr_i (spr_bus_addr_o[15:0]), + .spr_bus_we_i (spr_bus_we_o), + .spr_bus_stb_i (spr_bus_stb_o), + .spr_bus_dat_i (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + ); */ + mor1kx_lsu_cappuccino + #( + .FEATURE_DATACACHE(FEATURE_DATACACHE), + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_DCACHE_BLOCK_WIDTH(OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH(OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS(OPTION_DCACHE_WAYS), + .OPTION_DCACHE_LIMIT_WIDTH(OPTION_DCACHE_LIMIT_WIDTH), + .OPTION_DCACHE_SNOOP(OPTION_DCACHE_SNOOP), + .FEATURE_DMMU(FEATURE_DMMU), + .FEATURE_DMMU_HW_TLB_RELOAD(FEATURE_DMMU_HW_TLB_RELOAD), + .OPTION_DMMU_SET_WIDTH(OPTION_DMMU_SET_WIDTH), + .OPTION_DMMU_WAYS(OPTION_DMMU_WAYS), + .FEATURE_STORE_BUFFER(FEATURE_STORE_BUFFER), + .OPTION_STORE_BUFFER_DEPTH_WIDTH(OPTION_STORE_BUFFER_DEPTH_WIDTH), + .FEATURE_ATOMIC(FEATURE_ATOMIC) + ) + mor1kx_lsu_cappuccino + (/*AUTOINST*/ + // Outputs + .store_buffer_epcr_o (store_buffer_epcr_o[OPTION_OPERAND_WIDTH-1:0]), + .lsu_result_o (lsu_result_o[OPTION_OPERAND_WIDTH-1:0]), + .lsu_valid_o (lsu_valid_o), + .lsu_except_dbus_o (lsu_except_dbus_o), + .lsu_except_align_o (lsu_except_align_o), + .lsu_except_dtlb_miss_o (lsu_except_dtlb_miss_o), + .lsu_except_dpagefault_o (lsu_except_dpagefault_o), + .store_buffer_err_o (store_buffer_err_o), + .atomic_flag_set_o (atomic_flag_set_o), + .atomic_flag_clear_o (atomic_flag_clear_o), + .msync_stall_o (msync_stall_o), + .spr_bus_dat_dc_o (spr_bus_dat_dc_i[OPTION_OPERAND_WIDTH-1:0]), // Templated + .spr_bus_ack_dc_o (spr_bus_ack_dc_i), // Templated + .spr_bus_dat_dmmu_o (spr_bus_dat_dmmu_i[OPTION_OPERAND_WIDTH-1:0]), // Templated + .spr_bus_ack_dmmu_o (spr_bus_ack_dmmu_i), // Templated + .dbus_adr_o (dbus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_req_o (dbus_req_o), + .dbus_dat_o (dbus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_bsel_o (dbus_bsel_o[3:0]), + .dbus_we_o (dbus_we_o), + .dbus_burst_o (dbus_burst_o), + .dc_hit_o (dcache_hit_o), + // Inputs + .clk (clk), + .rst (rst), + .padv_execute_i (padv_execute_o), // Templated + .padv_ctrl_i (padv_ctrl_o), // Templated + .decode_valid_i (decode_valid_o), // Templated + .exec_lsu_adr_i (adder_result_o), // Templated + .ctrl_lsu_adr_i (ctrl_lsu_adr_o), // Templated + .ctrl_rfb_i (ctrl_rfb_o), // Templated + .exec_op_lsu_load_i (execute_op_lsu_load_o), // Templated + .exec_op_lsu_store_i (execute_op_lsu_store_o), // Templated + .exec_op_lsu_atomic_i (execute_op_lsu_atomic_o), // Templated + .ctrl_op_lsu_load_i (ctrl_op_lsu_load_o), // Templated + .ctrl_op_lsu_store_i (ctrl_op_lsu_store_o), // Templated + .ctrl_op_lsu_atomic_i (ctrl_op_lsu_atomic_o), // Templated + .ctrl_op_msync_i (ctrl_op_msync_o), // Templated + .ctrl_lsu_length_i (ctrl_lsu_length_o), // Templated + .ctrl_lsu_zext_i (ctrl_lsu_zext_o), // Templated + .ctrl_epcr_i (ctrl_epcr_o), // Templated + .spr_bus_addr_i (spr_bus_addr_o[15:0]), // Templated + .spr_bus_we_i (spr_bus_we_o), // Templated + .spr_bus_stb_i (spr_bus_stb_o), // Templated + .spr_bus_dat_i (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), // Templated + .dc_enable_i (spr_sr_o[`OR1K_SPR_SR_DCE]), // Templated + .dmmu_enable_i (spr_sr_o[`OR1K_SPR_SR_DME]), // Templated + .supervisor_mode_i (spr_sr_o[`OR1K_SPR_SR_SM]), // Templated + .dbus_err_i (dbus_err_i), + .dbus_ack_i (dbus_ack_i), + .dbus_dat_i (dbus_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .pipeline_flush_i (pipeline_flush_o), // Templated + .snoop_adr_i (snoop_adr_i[31:0]), + .snoop_en_i (snoop_en_i)); + + + /* mor1kx_wb_mux_cappuccino AUTO_TEMPLATE ( + .alu_result_i (ctrl_alu_result_o), + .lsu_result_i (lsu_result_o), + .mul_result_i (mul_result_o), + .spr_i (mfspr_dat_o), + .op_mul_i (ctrl_op_mul_o), + .op_lsu_load_i (ctrl_op_lsu_load_o), + .pc_i (pc_execute_to_ctrl), + .op_mfspr_i (ctrl_op_mfspr_o), + ); */ + mor1kx_wb_mux_cappuccino + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH) + ) + mor1kx_wb_mux_cappuccino + (/*AUTOINST*/ + // Outputs + .rf_result_o (rf_result_o[OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .alu_result_i (ctrl_alu_result_o), // Templated + .lsu_result_i (lsu_result_o), // Templated + .mul_result_i (mul_result_o), // Templated + .spr_i (mfspr_dat_o), // Templated + .op_mul_i (ctrl_op_mul_o), // Templated + .op_lsu_load_i (ctrl_op_lsu_load_o), // Templated + .op_mfspr_i (ctrl_op_mfspr_o)); // Templated + + + /* mor1kx_rf_cappuccino AUTO_TEMPLATE ( + .padv_decode_i (padv_decode_o), + .padv_execute_i (padv_execute_o), + .padv_ctrl_i (padv_ctrl_o), + .fetch_rf_adr_valid_i (fetch_rf_adr_valid_o), + .fetch_rfa_adr_i (fetch_rfa_adr_o), + .fetch_rfb_adr_i (fetch_rfb_adr_o), + .decode_valid_i (decode_valid_o), + .decode_rfa_adr_i (decode_rfa_adr_o), + .decode_rfb_adr_i (decode_rfb_adr_o), + .execute_rfd_adr_i (execute_rfd_adr_o), + .ctrl_rfd_adr_i (ctrl_rfd_adr_o), + .wb_rfd_adr_i (wb_rfd_adr_o), + .spr_bus_addr_i (spr_bus_addr_o[15:0]), + .spr_bus_stb_i (spr_bus_stb_o), + .spr_bus_we_i (spr_bus_we_o), + .spr_bus_dat_i (spr_bus_dat_o), + .execute_rf_wb_i (execute_rf_wb_o), + .ctrl_rf_wb_i (ctrl_rf_wb_o), + .wb_rf_wb_i (wb_rf_wb_o), + .result_i (rf_result_o), + .ctrl_alu_result_i (ctrl_alu_result_o), + .pipeline_flush_i (pipeline_flush_o), + ); */ + mor1kx_rf_cappuccino + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .FEATURE_FASTCONTEXTS(FEATURE_FASTCONTEXTS), + .OPTION_RF_CLEAR_ON_INIT(OPTION_RF_CLEAR_ON_INIT), + .OPTION_RF_NUM_SHADOW_GPR(OPTION_RF_NUM_SHADOW_GPR), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RF_WORDS(OPTION_RF_WORDS), + .FEATURE_DEBUGUNIT(FEATURE_DEBUGUNIT) + ) + mor1kx_rf_cappuccino + (/*AUTOINST*/ + // Outputs + .spr_gpr_ack_o (spr_gpr_ack_o), + .spr_gpr_dat_o (spr_gpr_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .decode_rfa_o (decode_rfa_o[OPTION_OPERAND_WIDTH-1:0]), + .decode_rfb_o (decode_rfb_o[OPTION_OPERAND_WIDTH-1:0]), + .execute_rfa_o (execute_rfa_o[OPTION_OPERAND_WIDTH-1:0]), + .execute_rfb_o (execute_rfb_o[OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .padv_decode_i (padv_decode_o), // Templated + .padv_execute_i (padv_execute_o), // Templated + .padv_ctrl_i (padv_ctrl_o), // Templated + .decode_valid_i (decode_valid_o), // Templated + .fetch_rf_adr_valid_i (fetch_rf_adr_valid_o), // Templated + .fetch_rfa_adr_i (fetch_rfa_adr_o), // Templated + .fetch_rfb_adr_i (fetch_rfb_adr_o), // Templated + .decode_rfa_adr_i (decode_rfa_adr_o), // Templated + .decode_rfb_adr_i (decode_rfb_adr_o), // Templated + .execute_rfd_adr_i (execute_rfd_adr_o), // Templated + .ctrl_rfd_adr_i (ctrl_rfd_adr_o), // Templated + .wb_rfd_adr_i (wb_rfd_adr_o), // Templated + .spr_bus_addr_i (spr_bus_addr_o[15:0]), // Templated + .spr_bus_stb_i (spr_bus_stb_o), // Templated + .spr_bus_we_i (spr_bus_we_o), // Templated + .spr_bus_dat_i (spr_bus_dat_o), // Templated + .execute_rf_wb_i (execute_rf_wb_o), // Templated + .ctrl_rf_wb_i (ctrl_rf_wb_o), // Templated + .wb_rf_wb_i (wb_rf_wb_o), // Templated + .result_i (rf_result_o), // Templated + .ctrl_alu_result_i (ctrl_alu_result_o), // Templated + .pipeline_flush_i (pipeline_flush_o)); // Templated + + +`ifndef SYNTHESIS +// synthesis translate_off + /* Debug signals required for the debug monitor */ + +`include "mor1kx_utils.vh" + localparam RF_ADDR_WIDTH = calc_rf_addr_width(OPTION_RF_ADDR_WIDTH, + OPTION_RF_NUM_SHADOW_GPR); + + function [OPTION_OPERAND_WIDTH-1:0] get_gpr; + // verilator public + input [RF_ADDR_WIDTH-1:0] gpr_num; + begin + // TODO: handle load ops + if ((mor1kx_rf_cappuccino.execute_rfd_adr_i == gpr_num[4:0]) & + mor1kx_rf_cappuccino.execute_rf_wb_i) + get_gpr = alu_result_o; + else if ((mor1kx_rf_cappuccino.ctrl_rfd_adr_i == gpr_num[4:0]) & + mor1kx_rf_cappuccino.ctrl_rf_wb_i) + get_gpr = ctrl_alu_result_o; + else if ((mor1kx_rf_cappuccino.wb_rfd_adr_i == gpr_num[4:0]) & + mor1kx_rf_cappuccino.wb_rf_wb_i) + get_gpr = mor1kx_rf_cappuccino.result_i; + else + get_gpr = mor1kx_rf_cappuccino.rfa.mem[gpr_num]; + end + endfunction // + + + task set_gpr; + // verilator public + input [RF_ADDR_WIDTH-1:0] gpr_num; + input [OPTION_OPERAND_WIDTH-1:0] gpr_value; + begin + mor1kx_rf_cappuccino.rfa.mem[gpr_num] = gpr_value; + mor1kx_rf_cappuccino.rfb.mem[gpr_num] = gpr_value; + end + endtask +// synthesis translate_on +`endif + + + /* mor1kx_execute_ctrl_cappuccino AUTO_TEMPLATE ( + .padv_i (padv_execute_o), + .padv_ctrl_i (padv_ctrl_o), + .execute_except_ibus_err_i (execute_except_ibus_err_o), + .execute_except_itlb_miss_i (execute_except_itlb_miss_o), + .execute_except_ipagefault_i (execute_except_ipagefault_o), + .execute_except_illegal_i (execute_except_illegal_o), + .execute_except_ibus_align_i (execute_except_ibus_align_o), + .execute_except_syscall_i (execute_except_syscall_o), + .execute_except_trap_i (execute_except_trap_o), + .lsu_except_dbus_i (lsu_except_dbus_o), + .lsu_except_align_i (lsu_except_align_o), + .lsu_except_dtlb_miss_i (lsu_except_dtlb_miss_o), + .lsu_except_dpagefault_i (lsu_except_dpagefault_o), + .op_mul_i (execute_op_mul_o), + .op_lsu_load_i (execute_op_lsu_load_o), + .op_lsu_store_i (execute_op_lsu_store_o), + .op_lsu_atomic_i (execute_op_lsu_atomic_o), + .lsu_length_i (execute_lsu_length_o), + .lsu_zext_i (execute_lsu_zext_o), + .op_msync_i (execute_op_msync_o), + .op_mfspr_i (execute_op_mfspr_o), + .op_mtspr_i (execute_op_mtspr_o), + .alu_valid_i (alu_valid_o), + .lsu_valid_i (lsu_valid_o), + .msync_stall_i (msync_stall_o), + .alu_result_i (alu_result_o), + .adder_result_i (adder_result_o), + .execute_jal_result_i (execute_jal_result_o), + .op_jr_i (execute_op_jr_o), + .op_jal_i (execute_op_jal_o), + .op_rfe_i (execute_op_rfe_o), + .rfb_i (execute_rfb_o), + .flag_set_i (flag_set_o), + .flag_clear_i (flag_clear_o), + .pc_execute_i (pc_decode_to_execute), + .execute_rf_wb_i (execute_rf_wb_o), + .execute_rfd_adr_i (execute_rfd_adr_o), + .ctrl_mfspr_ack_i (ctrl_mfspr_ack_o), + .ctrl_mtspr_ack_i (ctrl_mtspr_ack_o), + .pipeline_flush_i (pipeline_flush_o), + .pc_ctrl_o (pc_execute_to_ctrl), + .execute_bubble_i (execute_bubble_o), + .carry_set_i (carry_set_o), + .carry_clear_i (carry_clear_o), + .overflow_set_i (overflow_set_o), + .overflow_clear_i (overflow_clear_o), + .fpcsr_i (fpcsr_o), + .fpcsr_set_i (fpcsr_set_o), + ); */ + mor1kx_execute_ctrl_cappuccino + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .FEATURE_FPU(FEATURE_FPU), // pipeline cappuccino: execute_ctrl instance + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER) + ) + mor1kx_execute_ctrl_cappuccino + (/*AUTOINST*/ + // Outputs + .ctrl_rf_wb_o (ctrl_rf_wb_o), + .wb_rf_wb_o (wb_rf_wb_o), + .ctrl_rfd_adr_o (ctrl_rfd_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .wb_rfd_adr_o (wb_rfd_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .ctrl_alu_result_o (ctrl_alu_result_o[OPTION_OPERAND_WIDTH-1:0]), + .ctrl_lsu_adr_o (ctrl_lsu_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .ctrl_rfb_o (ctrl_rfb_o[OPTION_OPERAND_WIDTH-1:0]), + .ctrl_flag_set_o (ctrl_flag_set_o), + .ctrl_flag_clear_o (ctrl_flag_clear_o), + .ctrl_carry_set_o (ctrl_carry_set_o), + .ctrl_carry_clear_o (ctrl_carry_clear_o), + .ctrl_overflow_set_o (ctrl_overflow_set_o), + .ctrl_overflow_clear_o (ctrl_overflow_clear_o), + .ctrl_fpcsr_o (ctrl_fpcsr_o), + .ctrl_fpcsr_set_o (ctrl_fpcsr_set_o), + .pc_ctrl_o (pc_execute_to_ctrl), // Templated + .ctrl_op_mul_o (ctrl_op_mul_o), + .ctrl_op_lsu_load_o (ctrl_op_lsu_load_o), + .ctrl_op_lsu_store_o (ctrl_op_lsu_store_o), + .ctrl_op_lsu_atomic_o (ctrl_op_lsu_atomic_o), + .ctrl_lsu_length_o (ctrl_lsu_length_o[1:0]), + .ctrl_lsu_zext_o (ctrl_lsu_zext_o), + .ctrl_op_msync_o (ctrl_op_msync_o), + .ctrl_op_mfspr_o (ctrl_op_mfspr_o), + .ctrl_op_mtspr_o (ctrl_op_mtspr_o), + .ctrl_op_rfe_o (ctrl_op_rfe_o), + .ctrl_except_ibus_err_o (ctrl_except_ibus_err_o), + .ctrl_except_itlb_miss_o (ctrl_except_itlb_miss_o), + .ctrl_except_ipagefault_o (ctrl_except_ipagefault_o), + .ctrl_except_ibus_align_o (ctrl_except_ibus_align_o), + .ctrl_except_illegal_o (ctrl_except_illegal_o), + .ctrl_except_syscall_o (ctrl_except_syscall_o), + .ctrl_except_dbus_o (ctrl_except_dbus_o), + .ctrl_except_dtlb_miss_o (ctrl_except_dtlb_miss_o), + .ctrl_except_dpagefault_o (ctrl_except_dpagefault_o), + .ctrl_except_align_o (ctrl_except_align_o), + .ctrl_except_trap_o (ctrl_except_trap_o), + .execute_valid_o (execute_valid_o), + .ctrl_valid_o (ctrl_valid_o), + // Inputs + .clk (clk), + .rst (rst), + .padv_i (padv_execute_o), // Templated + .padv_ctrl_i (padv_ctrl_o), // Templated + .execute_except_ibus_err_i (execute_except_ibus_err_o), // Templated + .execute_except_itlb_miss_i (execute_except_itlb_miss_o), // Templated + .execute_except_ipagefault_i (execute_except_ipagefault_o), // Templated + .execute_except_illegal_i (execute_except_illegal_o), // Templated + .execute_except_ibus_align_i (execute_except_ibus_align_o), // Templated + .execute_except_syscall_i (execute_except_syscall_o), // Templated + .lsu_except_dbus_i (lsu_except_dbus_o), // Templated + .lsu_except_align_i (lsu_except_align_o), // Templated + .lsu_except_dtlb_miss_i (lsu_except_dtlb_miss_o), // Templated + .lsu_except_dpagefault_i (lsu_except_dpagefault_o), // Templated + .execute_except_trap_i (execute_except_trap_o), // Templated + .pipeline_flush_i (pipeline_flush_o), // Templated + .op_mul_i (execute_op_mul_o), // Templated + .op_lsu_load_i (execute_op_lsu_load_o), // Templated + .op_lsu_store_i (execute_op_lsu_store_o), // Templated + .op_lsu_atomic_i (execute_op_lsu_atomic_o), // Templated + .lsu_length_i (execute_lsu_length_o), // Templated + .lsu_zext_i (execute_lsu_zext_o), // Templated + .op_msync_i (execute_op_msync_o), // Templated + .op_mfspr_i (execute_op_mfspr_o), // Templated + .op_mtspr_i (execute_op_mtspr_o), // Templated + .alu_valid_i (alu_valid_o), // Templated + .lsu_valid_i (lsu_valid_o), // Templated + .msync_stall_i (msync_stall_o), // Templated + .op_jr_i (execute_op_jr_o), // Templated + .op_jal_i (execute_op_jal_o), // Templated + .op_rfe_i (execute_op_rfe_o), // Templated + .alu_result_i (alu_result_o), // Templated + .adder_result_i (adder_result_o), // Templated + .rfb_i (execute_rfb_o), // Templated + .execute_jal_result_i (execute_jal_result_o), // Templated + .flag_set_i (flag_set_o), // Templated + .flag_clear_i (flag_clear_o), // Templated + .carry_set_i (carry_set_o), // Templated + .carry_clear_i (carry_clear_o), // Templated + .overflow_set_i (overflow_set_o), // Templated + .overflow_clear_i (overflow_clear_o), // Templated + .fpcsr_i (fpcsr_o), + .fpcsr_set_i (fpcsr_set_o), + .pc_execute_i (pc_decode_to_execute), // Templated + .execute_rf_wb_i (execute_rf_wb_o), // Templated + .execute_rfd_adr_i (execute_rfd_adr_o), // Templated + .execute_bubble_i (execute_bubble_o), // Templated + .ctrl_mfspr_ack_i (ctrl_mfspr_ack_o), // Templated + .ctrl_mtspr_ack_i (ctrl_mtspr_ack_o)); // Templated + + /* mor1kx_ctrl_cappuccino AUTO_TEMPLATE ( + .ctrl_alu_result_i (ctrl_alu_result_o), + .ctrl_lsu_adr_i (ctrl_lsu_adr_o), + .ctrl_rfb_i (ctrl_rfb_o), + .ctrl_flag_set_i (ctrl_flag_set_o), + .ctrl_flag_clear_i (ctrl_flag_clear_o), + .atomic_flag_set_i (atomic_flag_set_o), + .atomic_flag_clear_i (atomic_flag_clear_o), + .pc_ctrl_i (pc_execute_to_ctrl), + .pc_execute_i (pc_decode_to_execute), + .execute_op_branch_i (execute_op_branch_o), + .ctrl_op_mfspr_i (ctrl_op_mfspr_o), + .ctrl_op_mtspr_i (ctrl_op_mtspr_o), + .ctrl_op_rfe_i (ctrl_op_rfe_o), + .decode_branch_i (decode_branch_o), + .decode_branch_target_i (decode_branch_target_o), + .branch_mispredict_i (branch_mispredict_o), + .execute_mispredict_target_i (execute_mispredict_target_o), + .except_ibus_err_i (ctrl_except_ibus_err_o), + .except_itlb_miss_i (ctrl_except_itlb_miss_o), + .except_ipagefault_i (ctrl_except_ipagefault_o), + .except_ibus_align_i (ctrl_except_ibus_align_o), + .except_illegal_i (ctrl_except_illegal_o), + .except_syscall_i (ctrl_except_syscall_o), + .except_dbus_i (ctrl_except_dbus_o), + .except_dtlb_miss_i (ctrl_except_dtlb_miss_o), + .except_dpagefault_i (ctrl_except_dpagefault_o), + .except_trap_i (ctrl_except_trap_o), + .except_align_i (ctrl_except_align_o), + .fetch_valid_i (fetch_valid_o), + .decode_valid_i (decode_valid_o), + .execute_valid_i (execute_valid_o), + .ctrl_valid_i (ctrl_valid_o), + .fetch_exception_taken_i (fetch_exception_taken_o), + .decode_bubble_i (decode_bubble_o), + .execute_bubble_i (execute_bubble_o), + .store_buffer_epcr_i (store_buffer_epcr_o), + .store_buffer_err_i (store_buffer_err_o), + .ctrl_carry_set_i (ctrl_carry_set_o), + .ctrl_carry_clear_i (ctrl_carry_clear_o), + .ctrl_overflow_set_i (ctrl_overflow_set_o), + .ctrl_overflow_clear_i (ctrl_overflow_clear_o), + .ctrl_fpcsr_i (ctrl_fpcsr_o), + .ctrl_fpcsr_set_i (ctrl_fpcsr_set_o), + .spr_gpr_ack_i (spr_gpr_ack_o), + .spr_gpr_dat_i (spr_gpr_dat_o), + ) */ + mor1kx_ctrl_cappuccino + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .FEATURE_PIC(FEATURE_PIC), + .FEATURE_TIMER(FEATURE_TIMER), + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH), + .FEATURE_DATACACHE(FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH(OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH(OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS(OPTION_DCACHE_WAYS), + .FEATURE_DMMU(FEATURE_DMMU), + .OPTION_DMMU_SET_WIDTH(OPTION_DMMU_SET_WIDTH), + .OPTION_DMMU_WAYS(OPTION_DMMU_WAYS), + .FEATURE_INSTRUCTIONCACHE(FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH(OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS(OPTION_ICACHE_WAYS), + .FEATURE_IMMU(FEATURE_IMMU), + .OPTION_IMMU_SET_WIDTH(OPTION_IMMU_SET_WIDTH), + .OPTION_IMMU_WAYS(OPTION_IMMU_WAYS), + .FEATURE_DEBUGUNIT(FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS(FEATURE_PERFCOUNTERS), + .OPTION_PERFCOUNTERS_NUM(OPTION_PERFCOUNTERS_NUM), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_FPU(FEATURE_FPU), // pipeline cappuccino: ctrl instance + .FEATURE_MULTICORE(FEATURE_MULTICORE), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE), + .FEATURE_DSX(FEATURE_DSX), + .FEATURE_FASTCONTEXTS(FEATURE_FASTCONTEXTS), + .OPTION_RF_NUM_SHADOW_GPR(OPTION_RF_NUM_SHADOW_GPR), + .FEATURE_OVERFLOW(FEATURE_OVERFLOW), + .FEATURE_CARRY_FLAG(FEATURE_CARRY_FLAG) + ) + mor1kx_ctrl_cappuccino + (/*AUTOINST*/ + // Outputs + .ctrl_epcr_o (ctrl_epcr_o[OPTION_OPERAND_WIDTH-1:0]), + .mfspr_dat_o (mfspr_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .ctrl_mfspr_ack_o (ctrl_mfspr_ack_o), + .ctrl_mtspr_ack_o (ctrl_mtspr_ack_o), + .ctrl_flag_o (ctrl_flag_o), + .ctrl_carry_o (ctrl_carry_o), + .ctrl_fpu_round_mode_o (ctrl_fpu_round_mode_o), + .ctrl_branch_exception_o (ctrl_branch_exception_o), + .ctrl_branch_except_pc_o (ctrl_branch_except_pc_o[OPTION_OPERAND_WIDTH-1:0]), + .pipeline_flush_o (pipeline_flush_o), + .doing_rfe_o (doing_rfe_o), + .padv_fetch_o (padv_fetch_o), + .padv_decode_o (padv_decode_o), + .padv_execute_o (padv_execute_o), + .padv_ctrl_o (padv_ctrl_o), + .du_dat_o (du_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .du_ack_o (du_ack_o), + .du_stall_o (du_stall_o), + .du_restart_pc_o (du_restart_pc_o[OPTION_OPERAND_WIDTH-1:0]), + .du_restart_o (du_restart_o), + .spr_bus_addr_o (spr_bus_addr_o[15:0]), + .spr_bus_we_o (spr_bus_we_o), + .spr_bus_stb_o (spr_bus_stb_o), + .spr_bus_dat_o (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .spr_sr_o (spr_sr_o[15:0]), + .ctrl_bubble_o (ctrl_bubble_o), + // Inputs + .clk (clk), + .rst (rst), + .ctrl_alu_result_i (ctrl_alu_result_o), // Templated + .ctrl_lsu_adr_i (ctrl_lsu_adr_o), // Templated + .ctrl_rfb_i (ctrl_rfb_o), // Templated + .ctrl_flag_set_i (ctrl_flag_set_o), // Templated + .ctrl_flag_clear_i (ctrl_flag_clear_o), // Templated + .atomic_flag_set_i (atomic_flag_set_o), // Templated + .atomic_flag_clear_i (atomic_flag_clear_o), // Templated + .pc_ctrl_i (pc_execute_to_ctrl), // Templated + .ctrl_op_mfspr_i (ctrl_op_mfspr_o), // Templated + .ctrl_op_mtspr_i (ctrl_op_mtspr_o), // Templated + .ctrl_op_rfe_i (ctrl_op_rfe_o), // Templated + .decode_branch_i (decode_branch_o), // Templated + .decode_branch_target_i (decode_branch_target_o), // Templated + .branch_mispredict_i (branch_mispredict_o), // Templated + .execute_mispredict_target_i (execute_mispredict_target_o), // Templated + .pc_execute_i (pc_decode_to_execute), // Templated + .execute_op_branch_i (execute_op_branch_o), // Templated + .except_ibus_err_i (ctrl_except_ibus_err_o), // Templated + .except_itlb_miss_i (ctrl_except_itlb_miss_o), // Templated + .except_ipagefault_i (ctrl_except_ipagefault_o), // Templated + .except_ibus_align_i (ctrl_except_ibus_align_o), // Templated + .except_illegal_i (ctrl_except_illegal_o), // Templated + .except_syscall_i (ctrl_except_syscall_o), // Templated + .except_dbus_i (ctrl_except_dbus_o), // Templated + .except_dtlb_miss_i (ctrl_except_dtlb_miss_o), // Templated + .except_dpagefault_i (ctrl_except_dpagefault_o), // Templated + .except_trap_i (ctrl_except_trap_o), // Templated + .except_align_i (ctrl_except_align_o), // Templated + .fetch_valid_i (fetch_valid_o), // Templated + .decode_valid_i (decode_valid_o), // Templated + .execute_valid_i (execute_valid_o), // Templated + .execute_op_lsu_load_i (execute_op_lsu_load_o), + .execute_op_lsu_store_i (execute_op_lsu_store_o), + .ctrl_valid_i (ctrl_valid_o), // Templated + .fetch_exception_taken_i (fetch_exception_taken_o), // Templated + .decode_bubble_i (decode_bubble_o), // Templated + .execute_bubble_i (execute_bubble_o), // Templated + .irq_i (irq_i[31:0]), + .store_buffer_epcr_i (store_buffer_epcr_o), // Templated + .store_buffer_err_i (store_buffer_err_o), // Templated + .ctrl_carry_set_i (ctrl_carry_set_o), // Templated + .ctrl_carry_clear_i (ctrl_carry_clear_o), // Templated + .ctrl_overflow_set_i (ctrl_overflow_set_o), // Templated + .ctrl_overflow_clear_i (ctrl_overflow_clear_o), // Templated + .ctrl_fpcsr_i (ctrl_fpcsr_o), + .ctrl_fpcsr_set_i (ctrl_fpcsr_set_o), + .icache_hit_i (icache_hit_o), + .dcache_hit_i (dcache_hit_o), + .du_addr_i (du_addr_i[15:0]), + .du_stb_i (du_stb_i), + .du_dat_i (du_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .du_we_i (du_we_i), + .du_stall_i (du_stall_i), + .spr_bus_dat_dc_i (spr_bus_dat_dc_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_dc_i (spr_bus_ack_dc_i), + .spr_bus_dat_ic_i (spr_bus_dat_ic_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_ic_i (spr_bus_ack_ic_i), + .spr_bus_dat_dmmu_i (spr_bus_dat_dmmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_dmmu_i (spr_bus_ack_dmmu_i), + .spr_bus_dat_immu_i (spr_bus_dat_immu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_immu_i (spr_bus_ack_immu_i), + .spr_bus_dat_mac_i (spr_bus_dat_mac_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_mac_i (spr_bus_ack_mac_i), + .spr_bus_dat_pmu_i (spr_bus_dat_pmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pmu_i (spr_bus_ack_pmu_i), + .spr_bus_dat_pcu_i (spr_bus_dat_pcu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pcu_i (spr_bus_ack_pcu_i), + .spr_bus_dat_fpu_i (spr_bus_dat_fpu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_fpu_i (spr_bus_ack_fpu_i), + .spr_gpr_dat_i (spr_gpr_dat_o), // Templated + .spr_gpr_ack_i (spr_gpr_ack_o), // Templated + .multicore_coreid_i (multicore_coreid_i[OPTION_OPERAND_WIDTH-1:0]), + .multicore_numcores_i (multicore_numcores_i[OPTION_OPERAND_WIDTH-1:0])); + + reg [`OR1K_INSN_WIDTH-1:0] traceport_stage_decode_insn; + reg [`OR1K_INSN_WIDTH-1:0] traceport_stage_exec_insn; + + reg traceport_jal_execute_to_ctrl; + reg traceport_jr_execute_to_ctrl; + reg [31:0] traceport_jbtarget_decode_to_execute; + reg [31:0] traceport_jbtarget_execute_to_ctrl; + + reg traceport_waitexec; + + always @(posedge clk) begin + if (FEATURE_TRACEPORT_EXEC != "NONE") begin + if (rst) begin + traceport_waitexec <= 0; + end else begin + if (padv_decode_o) begin + traceport_stage_decode_insn <= insn_fetch_to_decode; + traceport_jbtarget_decode_to_execute <= decode_branch_target_o; + end + + if (padv_execute_o) begin + traceport_stage_exec_insn <= traceport_stage_decode_insn; + traceport_jbtarget_execute_to_ctrl <= traceport_jbtarget_decode_to_execute; + traceport_jal_execute_to_ctrl <= execute_op_jal_o; + traceport_jr_execute_to_ctrl <= execute_op_jr_o & !execute_op_jal_o; + end + + if (padv_ctrl_o) begin + traceport_exec_jal_o <= traceport_jal_execute_to_ctrl; + traceport_exec_jr_o <= traceport_jr_execute_to_ctrl; + traceport_exec_insn_o <= traceport_stage_exec_insn; + traceport_exec_jbtarget_o <= traceport_jbtarget_execute_to_ctrl; + end + + traceport_exec_pc_o <= pc_execute_to_ctrl; + + if (!traceport_waitexec) begin + if (padv_ctrl_o & !ctrl_bubble_o) begin + if (execute_valid_o) begin + traceport_exec_valid_o <= 1'b1; + end else begin + traceport_exec_valid_o <= 1'b0; + traceport_waitexec <= 1'b1; + end + end else if (ctrl_op_rfe_o) begin + traceport_exec_valid_o <= 1'b1; + end else begin + traceport_exec_valid_o <= 1'b0; + end + end else begin + if (execute_valid_o) begin + traceport_exec_valid_o <= 1'b1; + traceport_waitexec <= 1'b0; + end else begin + traceport_exec_valid_o <= 1'b0; + end + end // else: !if(!traceport_waitexec) + end // else: !if(rst) + end else begin // if (FEATURE_TRACEPORT_EXEC != "NONE") + traceport_stage_decode_insn <= {`OR1K_INSN_WIDTH{1'b0}}; + traceport_stage_exec_insn <= {`OR1K_INSN_WIDTH{1'b0}}; + traceport_exec_insn_o <= {`OR1K_INSN_WIDTH{1'b0}}; + traceport_exec_pc_o <= 32'h0; + traceport_exec_valid_o <= 1'b0; + end + end + + generate + if (FEATURE_TRACEPORT_EXEC != "NONE") begin + assign traceport_exec_wbreg_o = wb_rfd_adr_o; + assign traceport_exec_wben_o = wb_rf_wb_o; + assign traceport_exec_wbdata_o = rf_result_o; + end else begin + assign traceport_exec_wbreg_o = {OPTION_RF_ADDR_WIDTH{1'b0}}; + assign traceport_exec_wben_o = 1'b0; + assign traceport_exec_wbdata_o = {OPTION_OPERAND_WIDTH{1'b0}}; + end + endgenerate + +endmodule // mor1kx_cpu_cappuccino diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu_espresso.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu_espresso.v new file mode 100644 index 0000000..8e9cfa8 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu_espresso.v @@ -0,0 +1,790 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Espresso pipeline CPU module + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_cpu_espresso + #( + parameter OPTION_OPERAND_WIDTH = 32, + + parameter FEATURE_DATACACHE = "NONE", + parameter OPTION_DCACHE_BLOCK_WIDTH = 5, + parameter OPTION_DCACHE_SET_WIDTH = 9, + parameter OPTION_DCACHE_WAYS = 2, + parameter FEATURE_DMMU = "NONE", + parameter FEATURE_DMMU_HW_TLB_RELOAD = "NONE", + parameter FEATURE_INSTRUCTIONCACHE = "NONE", + parameter OPTION_ICACHE_BLOCK_WIDTH = 5, + parameter OPTION_ICACHE_SET_WIDTH = 9, + parameter OPTION_ICACHE_WAYS = 2, + parameter FEATURE_IMMU = "NONE", + parameter FEATURE_IMMU_HW_TLB_RELOAD = "NONE", + parameter FEATURE_TIMER = "ENABLED", + parameter FEATURE_DEBUGUNIT = "NONE", + parameter FEATURE_PERFCOUNTERS = "NONE", + parameter FEATURE_MAC = "NONE", + + parameter FEATURE_SYSCALL = "ENABLED", + parameter FEATURE_TRAP = "ENABLED", + parameter FEATURE_RANGE = "ENABLED", + + parameter FEATURE_PIC = "ENABLED", + parameter OPTION_PIC_TRIGGER = "LEVEL", + parameter OPTION_PIC_NMI_WIDTH = 0, + + parameter FEATURE_DSX = "NONE", + parameter FEATURE_FASTCONTEXTS = "NONE", + parameter FEATURE_OVERFLOW = "NONE", + parameter FEATURE_CARRY_FLAG = "ENABLED", + + parameter OPTION_RF_ADDR_WIDTH = 5, + parameter OPTION_RF_WORDS = 32, + + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + + parameter FEATURE_MULTIPLIER = "THREESTAGE", + parameter FEATURE_DIVIDER = "NONE", + + parameter FEATURE_ADDC = "NONE", + parameter FEATURE_SRA = "ENABLED", + parameter FEATURE_ROR = "NONE", + parameter FEATURE_EXT = "NONE", + parameter FEATURE_CMOV = "NONE", + parameter FEATURE_FFL1 = "NONE", + parameter FEATURE_MSYNC = "NONE", + parameter FEATURE_PSYNC = "NONE", + parameter FEATURE_CSYNC = "NONE", + + parameter FEATURE_CUST1 = "NONE", + parameter FEATURE_CUST2 = "NONE", + parameter FEATURE_CUST3 = "NONE", + parameter FEATURE_CUST4 = "NONE", + parameter FEATURE_CUST5 = "NONE", + parameter FEATURE_CUST6 = "NONE", + parameter FEATURE_CUST7 = "NONE", + parameter FEATURE_CUST8 = "NONE", + + parameter OPTION_SHIFTER = "BARREL", + + parameter FEATURE_MULTICORE = "NONE", + + parameter FEATURE_TRACEPORT_EXEC = "NONE" + ) + ( + input clk, + input rst, + + // Instruction bus + input ibus_err_i, + input ibus_ack_i, + input [`OR1K_INSN_WIDTH-1:0] ibus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] ibus_adr_o, + output ibus_req_o, + output ibus_burst_o, + + // Data bus + input dbus_err_i, + input dbus_ack_i, + input [OPTION_OPERAND_WIDTH-1:0] dbus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] dbus_adr_o, + output [OPTION_OPERAND_WIDTH-1:0] dbus_dat_o, + output dbus_req_o, + output [3:0] dbus_bsel_o, + output dbus_we_o, + output dbus_burst_o, + + // Interrupts + input [31:0] irq_i, + + // Debug interface + input [15:0] du_addr_i, + input du_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] du_dat_i, + input du_we_i, + output [OPTION_OPERAND_WIDTH-1:0] du_dat_o, + output du_ack_o, + // Stall control from debug interface + input du_stall_i, + output du_stall_o, + + // SPR accesses to external units (cache, mmu, etc.) + output [15:0] spr_bus_addr_o, + output spr_bus_we_o, + output spr_bus_stb_o, + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dmmu_i, + input spr_bus_ack_dmmu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_immu_i, + input spr_bus_ack_immu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_mac_i, + input spr_bus_ack_mac_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pmu_i, + input spr_bus_ack_pmu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pcu_i, + input spr_bus_ack_pcu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_fpu_i, + input spr_bus_ack_fpu_i, + output [15:0] spr_sr_o, + + input [OPTION_OPERAND_WIDTH-1:0] multicore_coreid_i + ); + + wire [OPTION_OPERAND_WIDTH-1:0] pc_fetch_to_decode; + wire [`OR1K_INSN_WIDTH-1:0] insn_fetch_to_decode; + wire [OPTION_OPERAND_WIDTH-1:0] pc_decode_to_execute; + wire [OPTION_OPERAND_WIDTH-1:0] pc_execute_to_ctrl; + + /*AUTOWIRE*/ + // Beginning of automatic wires (for undeclared instantiated-module outputs) + wire [OPTION_OPERAND_WIDTH-1:0] adder_result_o;// From mor1kx_execute_alu of mor1kx_execute_alu.v + wire [OPTION_OPERAND_WIDTH-1:0] alu_result_o;// From mor1kx_execute_alu of mor1kx_execute_alu.v + wire alu_valid_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire carry_clear_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire carry_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire carry_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire ctrl_branch_occur_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_branch_target_o;// From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire ctrl_mfspr_we_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire decode_adder_do_carry_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_adder_do_sub_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_except_ibus_err_o;// From mor1kx_fetch_espresso of mor1kx_fetch_espresso.v + wire decode_except_illegal_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_except_syscall_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_except_trap_o; // From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_IMM_WIDTH-1:0] decode_imm16_o; // From mor1kx_decode of mor1kx_decode.v + wire [OPTION_OPERAND_WIDTH-1:0] decode_immediate_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_immediate_sel_o; // From mor1kx_decode of mor1kx_decode.v + wire [9:0] decode_immjbr_upper_o; // From mor1kx_decode of mor1kx_decode.v + wire [1:0] decode_lsu_length_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_lsu_zext_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_add_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_alu_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_bf_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_bnf_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_branch_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_brcond_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_div_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_div_signed_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_div_unsigned_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_op_ffl1_o; // From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_FPUOP_WIDTH-1:0] decode_op_fpu_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_op_jal_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_jbr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_jr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_lsu_load_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_lsu_store_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mfspr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_movhi_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_ext_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_msync_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mtspr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mul_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mul_signed_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mul_unsigned_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_op_rfe_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_setflag_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_shift_o; // From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_o;// From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_secondary_o;// From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_OPCODE_WIDTH-1:0] decode_opc_insn_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_rf_wb_o; // From mor1kx_decode of mor1kx_decode.v + wire [OPTION_RF_ADDR_WIDTH-1:0] decode_rfa_adr_o;// From mor1kx_decode of mor1kx_decode.v + wire [OPTION_RF_ADDR_WIDTH-1:0] decode_rfb_adr_o;// From mor1kx_decode of mor1kx_decode.v + wire [OPTION_RF_ADDR_WIDTH-1:0] decode_rfd_adr_o;// From mor1kx_decode of mor1kx_decode.v + wire du_restart_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_o;// From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire exception_taken_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire execute_waiting_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire fetch_advancing_o; // From mor1kx_fetch_espresso of mor1kx_fetch_espresso.v + wire [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfa_adr_o;// From mor1kx_fetch_espresso of mor1kx_fetch_espresso.v + wire [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfb_adr_o;// From mor1kx_fetch_espresso of mor1kx_fetch_espresso.v + wire fetch_take_exception_branch_o;// From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire flag_clear_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire flag_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire flag_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire [`OR1K_FPCSR_WIDTH-1:0] fpcsr_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire fpcsr_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire lsu_except_align_o; // From mor1kx_lsu_espresso of mor1kx_lsu_espresso.v + wire lsu_except_dbus_o; // From mor1kx_lsu_espresso of mor1kx_lsu_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] lsu_result_o;// From mor1kx_lsu_espresso of mor1kx_lsu_espresso.v + wire lsu_valid_o; // From mor1kx_lsu_espresso of mor1kx_lsu_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] mfspr_dat_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] mul_result_o;// From mor1kx_execute_alu of mor1kx_execute_alu.v + wire next_fetch_done_o; // From mor1kx_fetch_espresso of mor1kx_fetch_espresso.v + wire overflow_clear_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire overflow_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire padv_decode_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire padv_execute_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire padv_fetch_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] pc_fetch_next_o;// From mor1kx_fetch_espresso of mor1kx_fetch_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] pc_fetch_o; // From mor1kx_fetch_espresso of mor1kx_fetch_espresso.v + wire pipeline_flush_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] rf_result_o; // From mor1kx_wb_mux_espresso of mor1kx_wb_mux_espresso.v + wire rf_we_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] rfa_o; // From mor1kx_rf_espresso of mor1kx_rf_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] rfb_o; // From mor1kx_rf_espresso of mor1kx_rf_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_npc_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_ppc_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + wire stepping_o; // From mor1kx_ctrl_espresso of mor1kx_ctrl_espresso.v + // End of automatics + + /* mor1kx_fetch_espresso AUTO_TEMPLATE ( + .padv_i (padv_fetch_o), + .branch_occur_i (ctrl_branch_occur_o), + .branch_dest_i (ctrl_branch_target_o), + .pipeline_flush_i (pipeline_flush_o), + .pc_decode_o (pc_fetch_to_decode), + .decode_insn_o (insn_fetch_to_decode), + .du_restart_pc_i (du_restart_pc_o), + .du_restart_i (du_restart_o), + .fetch_take_exception_branch_i (fetch_take_exception_branch_o), + .execute_waiting_i (execute_waiting_o), + .stepping_i (stepping_o), + ); */ + mor1kx_fetch_espresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC) + ) + mor1kx_fetch_espresso + (/*AUTOINST*/ + // Outputs + .ibus_adr_o (ibus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .ibus_req_o (ibus_req_o), + .ibus_burst_o (ibus_burst_o), + .decode_insn_o (insn_fetch_to_decode), // Templated + .next_fetch_done_o (next_fetch_done_o), + .fetch_rfa_adr_o (fetch_rfa_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .fetch_rfb_adr_o (fetch_rfb_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .pc_fetch_o (pc_fetch_o[OPTION_OPERAND_WIDTH-1:0]), + .pc_fetch_next_o (pc_fetch_next_o[OPTION_OPERAND_WIDTH-1:0]), + .decode_except_ibus_err_o (decode_except_ibus_err_o), + .fetch_advancing_o (fetch_advancing_o), + // Inputs + .clk (clk), + .rst (rst), + .ibus_err_i (ibus_err_i), + .ibus_ack_i (ibus_ack_i), + .ibus_dat_i (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), + .padv_i (padv_fetch_o), // Templated + .branch_occur_i (ctrl_branch_occur_o), // Templated + .branch_dest_i (ctrl_branch_target_o), // Templated + .du_restart_i (du_restart_o), // Templated + .du_restart_pc_i (du_restart_pc_o), // Templated + .fetch_take_exception_branch_i (fetch_take_exception_branch_o), // Templated + .execute_waiting_i (execute_waiting_o), // Templated + .du_stall_i (du_stall_i), + .stepping_i (stepping_o)); // Templated + + /* mor1kx_decode AUTO_TEMPLATE ( + .decode_insn_i (insn_fetch_to_decode), + .decode_op_lsu_atomic_o (), + ); */ + mor1kx_decode + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_MSYNC(FEATURE_MSYNC), + .FEATURE_PSYNC(FEATURE_PSYNC), + .FEATURE_CSYNC(FEATURE_CSYNC), + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8) + ) + mor1kx_decode + (/*AUTOINST*/ + // Outputs + .decode_opc_alu_o (decode_opc_alu_o[`OR1K_ALU_OPC_WIDTH-1:0]), + .decode_opc_alu_secondary_o (decode_opc_alu_secondary_o[`OR1K_ALU_OPC_WIDTH-1:0]), + .decode_imm16_o (decode_imm16_o[`OR1K_IMM_WIDTH-1:0]), + .decode_immediate_o (decode_immediate_o[OPTION_OPERAND_WIDTH-1:0]), + .decode_immediate_sel_o (decode_immediate_sel_o), + .decode_immjbr_upper_o (decode_immjbr_upper_o[9:0]), + .decode_rfd_adr_o (decode_rfd_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .decode_rfa_adr_o (decode_rfa_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .decode_rfb_adr_o (decode_rfb_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .decode_rf_wb_o (decode_rf_wb_o), + .decode_op_jbr_o (decode_op_jbr_o), + .decode_op_jr_o (decode_op_jr_o), + .decode_op_jal_o (decode_op_jal_o), + .decode_op_bf_o (decode_op_bf_o), + .decode_op_bnf_o (decode_op_bnf_o), + .decode_op_brcond_o (decode_op_brcond_o), + .decode_op_branch_o (decode_op_branch_o), + .decode_op_alu_o (decode_op_alu_o), + .decode_op_lsu_load_o (decode_op_lsu_load_o), + .decode_op_lsu_store_o (decode_op_lsu_store_o), + .decode_op_lsu_atomic_o (), // Templated + .decode_lsu_length_o (decode_lsu_length_o[1:0]), + .decode_lsu_zext_o (decode_lsu_zext_o), + .decode_op_mfspr_o (decode_op_mfspr_o), + .decode_op_mtspr_o (decode_op_mtspr_o), + .decode_op_rfe_o (decode_op_rfe_o), + .decode_op_setflag_o (decode_op_setflag_o), + .decode_op_add_o (decode_op_add_o), + .decode_op_mul_o (decode_op_mul_o), + .decode_op_mul_signed_o (decode_op_mul_signed_o), + .decode_op_mul_unsigned_o (decode_op_mul_unsigned_o), + .decode_op_div_o (decode_op_div_o), + .decode_op_div_signed_o (decode_op_div_signed_o), + .decode_op_div_unsigned_o (decode_op_div_unsigned_o), + .decode_op_shift_o (decode_op_shift_o), + .decode_op_ffl1_o (decode_op_ffl1_o), + .decode_op_movhi_o (decode_op_movhi_o), + .decode_op_ext_o (decode_op_ext_o), + .decode_op_msync_o (decode_op_msync_o), + .decode_op_fpu_o (decode_op_fpu_o[`OR1K_FPUOP_WIDTH-1:0]), + .decode_adder_do_sub_o (decode_adder_do_sub_o), + .decode_adder_do_carry_o (decode_adder_do_carry_o), + .decode_except_illegal_o (decode_except_illegal_o), + .decode_except_syscall_o (decode_except_syscall_o), + .decode_except_trap_o (decode_except_trap_o), + .decode_opc_insn_o (decode_opc_insn_o[`OR1K_OPCODE_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .decode_insn_i (insn_fetch_to_decode)); // Templated + + /* mor1kx_execute_alu AUTO_TEMPLATE ( + .padv_decode_i (padv_decode_o), + .padv_execute_i (padv_execute_o), + .padv_ctrl_i (1'b1), + .pipeline_flush_i (pipeline_flush_o), + .opc_alu_i (decode_opc_alu_o), + .opc_alu_secondary_i (decode_opc_alu_secondary_o), + .imm16_i (decode_imm16_o), + .immediate_i (decode_immediate_o), + .immediate_sel_i (decode_immediate_sel_o), + .decode_valid_i (padv_decode_o), + .decode_immediate_i (decode_immediate_o), + .decode_immediate_sel_i (decode_immediate_sel_o), + .decode_op_mul_i (decode_op_mul_o), + .op_alu_i (decode_op_alu_o), + .op_add_i (decode_op_add_o), + .op_mul_i (decode_op_mul_o), + .op_mul_signed_i (decode_op_mul_signed_o), + .op_mul_unsigned_i (decode_op_mul_unsigned_o), + .op_div_i (decode_op_div_o), + .op_div_signed_i (decode_op_div_signed_o), + .op_div_unsigned_i (decode_op_div_unsigned_o), + .op_shift_i (decode_op_shift_o), + .op_ffl1_i (decode_op_ffl1_o), + .op_setflag_i (decode_op_setflag_o), + .op_mtspr_i (decode_op_mtspr_o), + .op_mfspr_i (decode_op_mfspr_o), + .op_movhi_i (decode_op_movhi_o), + .op_ext_i (decode_op_ext_o), + .op_jbr_i (decode_op_jbr_o), + .op_jr_i (decode_op_jr_o), + .op_fpu_i (decode_op_fpu_o), + .fpu_round_mode_i (2'b00), + .immjbr_upper_i (decode_immjbr_upper_o), + .pc_execute_i (spr_ppc_o), + .adder_do_sub_i (decode_adder_do_sub_o), + .adder_do_carry_i (decode_adder_do_carry_o), + .decode_rfa_i (rfa_o), + .decode_rfb_i (rfb_o), + .rfa_i (rfa_o), + .rfb_i (rfb_o), + .flag_i (flag_o), + .carry_i (carry_o), + ); */ + mor1kx_execute_alu + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8), + .OPTION_SHIFTER(OPTION_SHIFTER) + ) + mor1kx_execute_alu + (/*AUTOINST*/ + // Outputs + .flag_set_o (flag_set_o), + .flag_clear_o (flag_clear_o), + .carry_set_o (carry_set_o), + .carry_clear_o (carry_clear_o), + .overflow_set_o (overflow_set_o), + .overflow_clear_o (overflow_clear_o), + .fpcsr_o (fpcsr_o[`OR1K_FPCSR_WIDTH-1:0]), + .fpcsr_set_o (fpcsr_set_o), + .alu_result_o (alu_result_o[OPTION_OPERAND_WIDTH-1:0]), + .alu_valid_o (alu_valid_o), + .mul_result_o (mul_result_o[OPTION_OPERAND_WIDTH-1:0]), + .adder_result_o (adder_result_o[OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .padv_decode_i (padv_decode_o), // Templated + .padv_execute_i (padv_execute_o), // Templated + .padv_ctrl_i (1'b1), // Templated + .pipeline_flush_i (pipeline_flush_o), // Templated + .opc_alu_i (decode_opc_alu_o), // Templated + .opc_alu_secondary_i (decode_opc_alu_secondary_o), // Templated + .imm16_i (decode_imm16_o), // Templated + .immediate_i (decode_immediate_o), // Templated + .immediate_sel_i (decode_immediate_sel_o), // Templated + .decode_immediate_i (decode_immediate_o), // Templated + .decode_immediate_sel_i (decode_immediate_sel_o), // Templated + .decode_valid_i (padv_decode_o), // Templated + .decode_op_mul_i (decode_op_mul_o), // Templated + .op_alu_i (decode_op_alu_o), // Templated + .op_add_i (decode_op_add_o), // Templated + .op_mul_i (decode_op_mul_o), // Templated + .op_mul_signed_i (decode_op_mul_signed_o), // Templated + .op_mul_unsigned_i (decode_op_mul_unsigned_o), // Templated + .op_div_i (decode_op_div_o), // Templated + .op_div_signed_i (decode_op_div_signed_o), // Templated + .op_div_unsigned_i (decode_op_div_unsigned_o), // Templated + .op_shift_i (decode_op_shift_o), // Templated + .op_ffl1_i (decode_op_ffl1_o), // Templated + .op_setflag_i (decode_op_setflag_o), // Templated + .op_mtspr_i (decode_op_mtspr_o), // Templated + .op_mfspr_i (decode_op_mfspr_o), // Templated + .op_movhi_i (decode_op_movhi_o), // Templated + .op_ext_i (decode_op_ext_o), // Templated + .op_fpu_i (decode_op_fpu_o), // Templated + .fpu_round_mode_i (2'b00), // Templated + .op_jbr_i (decode_op_jbr_o), // Templated + .op_jr_i (decode_op_jr_o), // Templated + .immjbr_upper_i (decode_immjbr_upper_o), // Templated + .pc_execute_i (spr_ppc_o), // Templated + .adder_do_sub_i (decode_adder_do_sub_o), // Templated + .adder_do_carry_i (decode_adder_do_carry_o), // Templated + .decode_rfa_i (rfa_o), // Templated + .decode_rfb_i (rfb_o), // Templated + .rfa_i (rfa_o), // Templated + .rfb_i (rfb_o), // Templated + .flag_i (flag_o), // Templated + .carry_i (carry_o)); // Templated + + + /* mor1kx_lsu_espresso AUTO_TEMPLATE ( + .padv_fetch_i (padv_fetch_o), + .lsu_adr_i (adder_result_o), + .rfb_i (rfb_o), + .op_lsu_load_i (decode_op_lsu_load_o), + .op_lsu_store_i (decode_op_lsu_store_o), + .lsu_length_i (decode_lsu_length_o), + .lsu_zext_i (decode_lsu_zext_o), + .exception_taken_i (exception_taken_o), + .du_restart_i (du_restart_o), + .stepping_i (stepping_o), + .next_fetch_done_i (next_fetch_done_o), + ); */ + mor1kx_lsu_espresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH) + ) + mor1kx_lsu_espresso + (/*AUTOINST*/ + // Outputs + .lsu_result_o (lsu_result_o[OPTION_OPERAND_WIDTH-1:0]), + .lsu_valid_o (lsu_valid_o), + .lsu_except_dbus_o (lsu_except_dbus_o), + .lsu_except_align_o (lsu_except_align_o), + .dbus_adr_o (dbus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_req_o (dbus_req_o), + .dbus_dat_o (dbus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_bsel_o (dbus_bsel_o[3:0]), + .dbus_we_o (dbus_we_o), + .dbus_burst_o (dbus_burst_o), + // Inputs + .clk (clk), + .rst (rst), + .padv_fetch_i (padv_fetch_o), // Templated + .lsu_adr_i (adder_result_o), // Templated + .rfb_i (rfb_o), // Templated + .op_lsu_load_i (decode_op_lsu_load_o), // Templated + .op_lsu_store_i (decode_op_lsu_store_o), // Templated + .lsu_length_i (decode_lsu_length_o), // Templated + .lsu_zext_i (decode_lsu_zext_o), // Templated + .exception_taken_i (exception_taken_o), // Templated + .du_restart_i (du_restart_o), // Templated + .stepping_i (stepping_o), // Templated + .next_fetch_done_i (next_fetch_done_o), // Templated + .dbus_err_i (dbus_err_i), + .dbus_ack_i (dbus_ack_i), + .dbus_dat_i (dbus_dat_i[OPTION_OPERAND_WIDTH-1:0])); + + + /* mor1kx_wb_mux_espresso AUTO_TEMPLATE ( + .alu_result_i (alu_result_o), + .lsu_result_i (lsu_result_o), + .spr_i (mfspr_dat_o), + .op_jal_i (decode_op_jal_o), + .op_lsu_load_i (decode_op_lsu_load_o), + .ppc_i (spr_ppc_o), + .op_mfspr_i (decode_op_mfspr_o), + .pc_fetch_next_i (pc_fetch_next_o), + ); */ + mor1kx_wb_mux_espresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH) + ) + mor1kx_wb_mux_espresso + (/*AUTOINST*/ + // Outputs + .rf_result_o (rf_result_o[OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .alu_result_i (alu_result_o), // Templated + .lsu_result_i (lsu_result_o), // Templated + .ppc_i (spr_ppc_o), // Templated + .pc_fetch_next_i (pc_fetch_next_o), // Templated + .spr_i (mfspr_dat_o), // Templated + .op_jal_i (decode_op_jal_o), // Templated + .op_lsu_load_i (decode_op_lsu_load_o), // Templated + .op_mfspr_i (decode_op_mfspr_o)); // Templated + + /* mor1kx_rf_espresso AUTO_TEMPLATE ( + .rf_we_i (rf_we_o), + .rf_re_i (fetch_advancing_o), + .rfd_adr_i (decode_rfd_adr_o), + .rfa_adr_i (fetch_rfa_adr_o), + .rfb_adr_i (fetch_rfb_adr_o), + .result_i (rf_result_o), + ); */ + mor1kx_rf_espresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RF_WORDS(OPTION_RF_WORDS) + ) + mor1kx_rf_espresso + (/*AUTOINST*/ + // Outputs + .rfa_o (rfa_o[OPTION_OPERAND_WIDTH-1:0]), + .rfb_o (rfb_o[OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .rfd_adr_i (decode_rfd_adr_o), // Templated + .rfa_adr_i (fetch_rfa_adr_o), // Templated + .rfb_adr_i (fetch_rfb_adr_o), // Templated + .rf_we_i (rf_we_o), // Templated + .rf_re_i (fetch_advancing_o), // Templated + .result_i (rf_result_o)); // Templated + + + /* Debug signals required for the debug monitor */ + function [OPTION_OPERAND_WIDTH-1:0] get_gpr; + // verilator public + input [4:0] gpr_num; + begin + // If we're writing, the value won't be in the GPR yet, so snoop + // it off the result in line. + if (rf_we_o) + get_gpr = rf_result_o; + else + get_gpr = mor1kx_rf_espresso.rfa.mem[gpr_num]; + end + endfunction + +`ifndef SYNTHESIS +// synthesis translate_off + task set_gpr; + // verilator public + input [4:0] gpr_num; + input [OPTION_OPERAND_WIDTH-1:0] gpr_value; + begin + mor1kx_rf_espresso.rfa.mem[gpr_num] = gpr_value; + mor1kx_rf_espresso.rfb.mem[gpr_num] = gpr_value; + end + endtask +// synthesis translate_on +`endif + + /* mor1kx_ctrl_espresso AUTO_TEMPLATE ( + .ctrl_alu_result_i (alu_result_o), + .ctrl_rfb_i (rfb_o), + .ctrl_flag_set_i (flag_set_o), + .ctrl_flag_clear_i (flag_clear_o), + .pc_ctrl_i (), + .pc_fetch_i (pc_fetch_o), + .ctrl_opc_insn_i (decode_opc_insn_o), + .ctrl_branch_target_i (ctrl_branch_target_o), + .op_lsu_load_i (decode_op_lsu_load_o), + .op_lsu_store_i (decode_op_lsu_store_o), + .alu_valid_i (alu_valid_o), + .lsu_valid_i (lsu_valid_o), + .op_jr_i (decode_op_jr_o), + .op_jbr_i (decode_op_jbr_o), + .except_ibus_err_i (decode_except_ibus_err_o), + .except_illegal_i (decode_except_illegal_o), + .except_syscall_i (decode_except_syscall_o), + .except_dbus_i (lsu_except_dbus_o), + .except_trap_i (decode_except_trap_o), + .except_align_i (lsu_except_align_o), + .next_fetch_done_i (next_fetch_done_o), + .execute_valid_i (execute_valid_o), + .execute_waiting_i (execute_waiting_o), + .fetch_branch_taken_i (fetch_branch_taken_o), + .rf_wb_i (decode_rf_wb_o), + .fetch_advancing_i (fetch_advancing_o), + .carry_set_i (carry_set_o), + .carry_clear_i (carry_clear_o), + .overflow_set_i (overflow_set_o), + .overflow_clear_i (overflow_clear_o), + .spr_bus_dat_dc_i (), + .spr_bus_ack_dc_i (), + .spr_bus_dat_ic_i (), + .spr_bus_ack_ic_i (), + ); */ + mor1kx_ctrl_espresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .FEATURE_PIC(FEATURE_PIC), + .FEATURE_TIMER(FEATURE_TIMER), + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH), + .FEATURE_DSX(FEATURE_DSX), + .FEATURE_FASTCONTEXTS(FEATURE_FASTCONTEXTS), + .FEATURE_OVERFLOW(FEATURE_OVERFLOW), + .FEATURE_DATACACHE(FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH(OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH(OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS(OPTION_DCACHE_WAYS), + .FEATURE_DMMU(FEATURE_DMMU), + .FEATURE_INSTRUCTIONCACHE(FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH(OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS(OPTION_ICACHE_WAYS), + .FEATURE_IMMU(FEATURE_IMMU), + .FEATURE_DEBUGUNIT(FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS(FEATURE_PERFCOUNTERS), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_MULTICORE(FEATURE_MULTICORE), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE) + ) + mor1kx_ctrl_espresso + (/*AUTOINST*/ + // Outputs + .flag_o (flag_o), + .spr_npc_o (spr_npc_o[OPTION_OPERAND_WIDTH-1:0]), + .spr_ppc_o (spr_ppc_o[OPTION_OPERAND_WIDTH-1:0]), + .mfspr_dat_o (mfspr_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .ctrl_mfspr_we_o (ctrl_mfspr_we_o), + .carry_o (carry_o), + .pipeline_flush_o (pipeline_flush_o), + .padv_fetch_o (padv_fetch_o), + .padv_decode_o (padv_decode_o), + .padv_execute_o (padv_execute_o), + .fetch_take_exception_branch_o (fetch_take_exception_branch_o), + .exception_taken_o (exception_taken_o), + .execute_waiting_o (execute_waiting_o), + .stepping_o (stepping_o), + .du_dat_o (du_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .du_ack_o (du_ack_o), + .du_stall_o (du_stall_o), + .du_restart_pc_o (du_restart_pc_o[OPTION_OPERAND_WIDTH-1:0]), + .du_restart_o (du_restart_o), + .spr_bus_addr_o (spr_bus_addr_o[15:0]), + .spr_bus_we_o (spr_bus_we_o), + .spr_bus_stb_o (spr_bus_stb_o), + .spr_bus_dat_o (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .spr_sr_o (spr_sr_o[15:0]), + .ctrl_branch_target_o (ctrl_branch_target_o[OPTION_OPERAND_WIDTH-1:0]), + .ctrl_branch_occur_o (ctrl_branch_occur_o), + .rf_we_o (rf_we_o), + // Inputs + .clk (clk), + .rst (rst), + .ctrl_alu_result_i (alu_result_o), // Templated + .ctrl_rfb_i (rfb_o), // Templated + .ctrl_flag_set_i (flag_set_o), // Templated + .ctrl_flag_clear_i (flag_clear_o), // Templated + .ctrl_opc_insn_i (decode_opc_insn_o), // Templated + .pc_fetch_i (pc_fetch_o), // Templated + .fetch_advancing_i (fetch_advancing_o), // Templated + .except_ibus_err_i (decode_except_ibus_err_o), // Templated + .except_illegal_i (decode_except_illegal_o), // Templated + .except_syscall_i (decode_except_syscall_o), // Templated + .except_dbus_i (lsu_except_dbus_o), // Templated + .except_trap_i (decode_except_trap_o), // Templated + .except_align_i (lsu_except_align_o), // Templated + .next_fetch_done_i (next_fetch_done_o), // Templated + .alu_valid_i (alu_valid_o), // Templated + .lsu_valid_i (lsu_valid_o), // Templated + .op_lsu_load_i (decode_op_lsu_load_o), // Templated + .op_lsu_store_i (decode_op_lsu_store_o), // Templated + .op_jr_i (decode_op_jr_o), // Templated + .op_jbr_i (decode_op_jbr_o), // Templated + .irq_i (irq_i[31:0]), + .carry_set_i (carry_set_o), // Templated + .carry_clear_i (carry_clear_o), // Templated + .overflow_set_i (overflow_set_o), // Templated + .overflow_clear_i (overflow_clear_o), // Templated + .du_addr_i (du_addr_i[15:0]), + .du_stb_i (du_stb_i), + .du_dat_i (du_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .du_we_i (du_we_i), + .du_stall_i (du_stall_i), + .spr_bus_dat_dc_i (), // Templated + .spr_bus_ack_dc_i (), // Templated + .spr_bus_dat_ic_i (), // Templated + .spr_bus_ack_ic_i (), // Templated + .spr_bus_dat_dmmu_i (spr_bus_dat_dmmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_dmmu_i (spr_bus_ack_dmmu_i), + .spr_bus_dat_immu_i (spr_bus_dat_immu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_immu_i (spr_bus_ack_immu_i), + .spr_bus_dat_mac_i (spr_bus_dat_mac_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_mac_i (spr_bus_ack_mac_i), + .spr_bus_dat_pmu_i (spr_bus_dat_pmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pmu_i (spr_bus_ack_pmu_i), + .spr_bus_dat_pcu_i (spr_bus_dat_pcu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pcu_i (spr_bus_ack_pcu_i), + .spr_bus_dat_fpu_i (spr_bus_dat_fpu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_fpu_i (spr_bus_ack_fpu_i), + .multicore_coreid_i (multicore_coreid_i[OPTION_OPERAND_WIDTH-1:0]), + .rf_wb_i (decode_rf_wb_o)); // Templated + +endmodule // mor1kx_cpu_espresso diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu_prontoespresso.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu_prontoespresso.v new file mode 100644 index 0000000..d95afe0 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_cpu_prontoespresso.v @@ -0,0 +1,896 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: "Pronto espresso" pipeline CPU module + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_cpu_prontoespresso + #( + parameter OPTION_OPERAND_WIDTH = 32, + + parameter FEATURE_DATACACHE = "NONE", + parameter OPTION_DCACHE_BLOCK_WIDTH = 5, + parameter OPTION_DCACHE_SET_WIDTH = 9, + parameter OPTION_DCACHE_WAYS = 2, + parameter FEATURE_DMMU = "NONE", + parameter FEATURE_DMMU_HW_TLB_RELOAD = "NONE", + parameter FEATURE_INSTRUCTIONCACHE = "NONE", + parameter OPTION_ICACHE_BLOCK_WIDTH = 5, + parameter OPTION_ICACHE_SET_WIDTH = 9, + parameter OPTION_ICACHE_WAYS = 2, + parameter FEATURE_IMMU = "NONE", + parameter FEATURE_IMMU_HW_TLB_RELOAD = "NONE", + parameter FEATURE_TIMER = "ENABLED", + parameter FEATURE_DEBUGUNIT = "NONE", + parameter FEATURE_PERFCOUNTERS = "NONE", + parameter FEATURE_MAC = "NONE", + + parameter FEATURE_SYSCALL = "ENABLED", + parameter FEATURE_TRAP = "ENABLED", + parameter FEATURE_RANGE = "ENABLED", + + parameter FEATURE_PIC = "ENABLED", + parameter OPTION_PIC_TRIGGER = "LEVEL", + parameter OPTION_PIC_NMI_WIDTH = 0, + + parameter FEATURE_DSX = "NONE", + parameter FEATURE_FASTCONTEXTS = "NONE", + parameter FEATURE_OVERFLOW = "NONE", + parameter FEATURE_CARRY_FLAG = "ENABLED", + + parameter OPTION_RF_ADDR_WIDTH = 5, + parameter OPTION_RF_WORDS = 32, + + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + + parameter OPTION_TCM_FETCHER = "DISABLED", + + parameter FEATURE_MULTIPLIER = "THREESTAGE", + parameter FEATURE_DIVIDER = "NONE", + + parameter FEATURE_ADDC = "NONE", + parameter FEATURE_SRA = "ENABLED", + parameter FEATURE_ROR = "NONE", + parameter FEATURE_EXT = "NONE", + parameter FEATURE_CMOV = "NONE", + parameter FEATURE_FFL1 = "NONE", + parameter FEATURE_MSYNC = "NONE", + parameter FEATURE_PSYNC = "NONE", + parameter FEATURE_CSYNC = "NONE", + + parameter FEATURE_CUST1 = "NONE", + parameter FEATURE_CUST2 = "NONE", + parameter FEATURE_CUST3 = "NONE", + parameter FEATURE_CUST4 = "NONE", + parameter FEATURE_CUST5 = "NONE", + parameter FEATURE_CUST6 = "NONE", + parameter FEATURE_CUST7 = "NONE", + parameter FEATURE_CUST8 = "NONE", + + parameter OPTION_SHIFTER = "BARREL", + + parameter FEATURE_MULTICORE = "NONE", + + parameter FEATURE_TRACEPORT_EXEC = "NONE" + ) + ( + input clk, + input rst, + + // Instruction bus + input ibus_err_i, + input ibus_ack_i, + input [`OR1K_INSN_WIDTH-1:0] ibus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] ibus_adr_o, + output ibus_req_o, + output ibus_burst_o, + + // Data bus + input dbus_err_i, + input dbus_ack_i, + input [OPTION_OPERAND_WIDTH-1:0] dbus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] dbus_adr_o, + output [OPTION_OPERAND_WIDTH-1:0] dbus_dat_o, + output dbus_req_o, + output [3:0] dbus_bsel_o, + output dbus_we_o, + output dbus_burst_o, + + // Interrupts + input [31:0] irq_i, + + // Debug interface + input [15:0] du_addr_i, + input du_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] du_dat_i, + input du_we_i, + output [OPTION_OPERAND_WIDTH-1:0] du_dat_o, + output du_ack_o, + // Stall control from debug interface + input du_stall_i, + output du_stall_o, + + // SPR accesses to external units (cache, mmu, etc.) + output [15:0] spr_bus_addr_o, + output spr_bus_we_o, + output spr_bus_stb_o, + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dmmu_i, + input spr_bus_ack_dmmu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_immu_i, + input spr_bus_ack_immu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_mac_i, + input spr_bus_ack_mac_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pmu_i, + input spr_bus_ack_pmu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pcu_i, + input spr_bus_ack_pcu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_fpu_i, + input spr_bus_ack_fpu_i, + output [15:0] spr_sr_o, + + // The multicore core identifier + input [OPTION_OPERAND_WIDTH-1:0] multicore_coreid_i + ); + + wire [OPTION_OPERAND_WIDTH-1:0] pc_fetch_to_decode; + wire [`OR1K_INSN_WIDTH-1:0] insn_fetch_to_decode; + wire [OPTION_OPERAND_WIDTH-1:0] pc_decode_to_execute; + wire [OPTION_OPERAND_WIDTH-1:0] pc_execute_to_ctrl; + + /*AUTOWIRE*/ + // Beginning of automatic wires (for undeclared instantiated-module outputs) + wire [OPTION_OPERAND_WIDTH-1:0] adder_result_o;// From mor1kx_execute_alu of mor1kx_execute_alu.v + wire [OPTION_OPERAND_WIDTH-1:0] alu_result_o;// From mor1kx_execute_alu of mor1kx_execute_alu.v + wire alu_valid_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire carry_clear_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire carry_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire carry_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire ctrl_branch_occur_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_branch_target_o;// From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire ctrl_insn_done_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire ctrl_mfspr_we_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire decode_adder_do_carry_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_adder_do_sub_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_except_ibus_err_o;// From mor1kx_fetch_prontoespresso of mor1kx_fetch_tcm_prontoespresso.v, ... + wire decode_except_illegal_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_except_syscall_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_except_trap_o; // From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_IMM_WIDTH-1:0] decode_imm16_o; // From mor1kx_decode of mor1kx_decode.v + wire [OPTION_OPERAND_WIDTH-1:0] decode_immediate_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_immediate_sel_o; // From mor1kx_decode of mor1kx_decode.v + wire [9:0] decode_immjbr_upper_o; // From mor1kx_decode of mor1kx_decode.v + wire [1:0] decode_lsu_length_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_lsu_zext_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_add_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_alu_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_bf_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_bnf_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_branch_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_brcond_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_div_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_div_signed_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_div_unsigned_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_op_ffl1_o; // From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_FPUOP_WIDTH-1:0] decode_op_fpu_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_op_jal_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_jbr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_jr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_lsu_load_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_lsu_store_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mfspr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_movhi_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_ext_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_msync_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mtspr_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mul_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mul_signed_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_mul_unsigned_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_op_rfe_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_setflag_o; // From mor1kx_decode of mor1kx_decode.v + wire decode_op_shift_o; // From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_o;// From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_secondary_o;// From mor1kx_decode of mor1kx_decode.v + wire [`OR1K_OPCODE_WIDTH-1:0] decode_opc_insn_o;// From mor1kx_decode of mor1kx_decode.v + wire decode_rf_wb_o; // From mor1kx_decode of mor1kx_decode.v + wire [OPTION_RF_ADDR_WIDTH-1:0] decode_rfa_adr_o;// From mor1kx_decode of mor1kx_decode.v + wire [OPTION_RF_ADDR_WIDTH-1:0] decode_rfb_adr_o;// From mor1kx_decode of mor1kx_decode.v + wire [OPTION_RF_ADDR_WIDTH-1:0] decode_rfd_adr_o;// From mor1kx_decode of mor1kx_decode.v + wire du_restart_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_o;// From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire exception_taken_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire execute_waiting_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire fetch_quick_branch_o; // From mor1kx_fetch_prontoespresso of mor1kx_fetch_prontoespresso.v + wire fetch_ready_o; // From mor1kx_fetch_prontoespresso of mor1kx_fetch_tcm_prontoespresso.v, ... + wire fetch_rf_re_o; // From mor1kx_fetch_prontoespresso of mor1kx_fetch_tcm_prontoespresso.v, ... + wire [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfa_adr_o;// From mor1kx_fetch_prontoespresso of mor1kx_fetch_tcm_prontoespresso.v, ... + wire [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfb_adr_o;// From mor1kx_fetch_prontoespresso of mor1kx_fetch_tcm_prontoespresso.v, ... + wire fetch_sleep_o; // From mor1kx_fetch_prontoespresso of mor1kx_fetch_tcm_prontoespresso.v, ... + wire fetch_take_exception_branch_o;// From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] fetched_pc_o;// From mor1kx_fetch_prontoespresso of mor1kx_fetch_tcm_prontoespresso.v, ... + wire flag_clear_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire flag_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire flag_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire [`OR1K_FPCSR_WIDTH-1:0] fpcsr_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire fpcsr_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire [OPTION_OPERAND_WIDTH-1:0] link_addr_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire lsu_except_align_o; // From mor1kx_lsu_espresso of mor1kx_lsu_espresso.v + wire lsu_except_dbus_o; // From mor1kx_lsu_espresso of mor1kx_lsu_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] lsu_result_o;// From mor1kx_lsu_espresso of mor1kx_lsu_espresso.v + wire lsu_valid_o; // From mor1kx_lsu_espresso of mor1kx_lsu_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] mfspr_dat_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] mul_result_o;// From mor1kx_execute_alu of mor1kx_execute_alu.v + wire overflow_clear_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire overflow_set_o; // From mor1kx_execute_alu of mor1kx_execute_alu.v + wire padv_decode_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire padv_execute_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire padv_fetch_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] pc_fetch_next_o;// From mor1kx_fetch_prontoespresso of mor1kx_fetch_tcm_prontoespresso.v, ... + wire pipeline_flush_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] rf_result_o; // From mor1kx_wb_mux_espresso of mor1kx_wb_mux_espresso.v + wire rf_we_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] rfa_o; // From mor1kx_rf_espresso of mor1kx_rf_espresso.v + wire [OPTION_OPERAND_WIDTH-1:0] rfb_o; // From mor1kx_rf_espresso of mor1kx_rf_espresso.v + wire spr_bus_ack_ic_i; // From mor1kx_fetch_prontoespresso of mor1kx_fetch_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_ic_i;// From mor1kx_fetch_prontoespresso of mor1kx_fetch_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_npc_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire [OPTION_OPERAND_WIDTH-1:0] spr_ppc_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + wire stepping_o; // From mor1kx_ctrl_prontoespresso of mor1kx_ctrl_prontoespresso.v + // End of automatics + + generate + if (OPTION_TCM_FETCHER=="ENABLED") + begin : fetch_tcm + + /* mor1kx_fetch_tcm_prontoespresso AUTO_TEMPLATE ( + .padv_i (padv_fetch_o), + .branch_occur_i (ctrl_branch_occur_o), + .branch_dest_i (ctrl_branch_target_o), + .pipeline_flush_i (pipeline_flush_o), + .pc_decode_o (pc_fetch_to_decode), + .decode_insn_o (insn_fetch_to_decode), + .du_restart_pc_i (du_restart_pc_o), + .du_restart_i (du_restart_o), + .fetch_take_exception_branch_i (fetch_take_exception_branch_o), + .execute_waiting_i (execute_waiting_o), + .stepping_i (stepping_o), + .flag_i (flag_o), + .flag_clear_i (flag_clear_o), + .flag_set_i (flag_set_o), + ); */ + mor1kx_fetch_tcm_prontoespresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC) + ) + mor1kx_fetch_prontoespresso + (/*AUTOINST*/ + // Outputs + .ibus_adr_o (ibus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .ibus_req_o (ibus_req_o), + .decode_insn_o (insn_fetch_to_decode), // Templated + .fetched_pc_o (fetched_pc_o[OPTION_OPERAND_WIDTH-1:0]), + .fetch_ready_o (fetch_ready_o), + .fetch_rfa_adr_o (fetch_rfa_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .fetch_rfb_adr_o (fetch_rfb_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .fetch_rf_re_o (fetch_rf_re_o), + .pc_fetch_next_o (pc_fetch_next_o[OPTION_OPERAND_WIDTH-1:0]), + .decode_except_ibus_err_o (decode_except_ibus_err_o), + .fetch_sleep_o (fetch_sleep_o), + // Inputs + .clk (clk), + .rst (rst), + .ibus_err_i (ibus_err_i), + .ibus_ack_i (ibus_ack_i), + .ibus_dat_i (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), + .padv_i (padv_fetch_o), // Templated + .branch_occur_i (ctrl_branch_occur_o), // Templated + .branch_dest_i (ctrl_branch_target_o), // Templated + .du_restart_i (du_restart_o), // Templated + .du_restart_pc_i (du_restart_pc_o), // Templated + .fetch_take_exception_branch_i(fetch_take_exception_branch_o), // Templated + .execute_waiting_i (execute_waiting_o), // Templated + .du_stall_i (du_stall_i), + .stepping_i (stepping_o), // Templated + .flag_i (flag_o), // Templated + .flag_clear_i (flag_clear_o), // Templated + .flag_set_i (flag_set_o)); // Templated + + end + else + begin : fetch + + /* mor1kx_fetch_prontoespresso AUTO_TEMPLATE ( + .padv_i (padv_fetch_o), + .branch_occur_i (ctrl_branch_occur_o), + .branch_dest_i (ctrl_branch_target_o), + .ctrl_insn_done_i (ctrl_insn_done_o), + .pipeline_flush_i (pipeline_flush_o), + .pc_decode_o (pc_fetch_to_decode), + .decode_insn_o (insn_fetch_to_decode), + .du_restart_pc_i (du_restart_pc_o), + .du_restart_i (du_restart_o), + .fetch_take_exception_branch_i (fetch_take_exception_branch_o), + .execute_waiting_i (execute_waiting_o), + .stepping_i (stepping_o), + .flag_i (flag_o), + .flag_clear_i (flag_clear_o), + .flag_set_i (flag_set_o), + .spr_bus_dat_ic_o (spr_bus_dat_ic_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_ic_o (spr_bus_ack_ic_i), + .spr_bus_addr_i (spr_bus_addr_o[15:0]), + .spr_bus_we_i (spr_bus_we_o), + .spr_bus_stb_i (spr_bus_stb_o), + .spr_bus_dat_i (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .ic_enable (spr_sr_o[`OR1K_SPR_SR_ICE]), + ); */ + mor1kx_fetch_prontoespresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .FEATURE_INSTRUCTIONCACHE(FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH) + ) + mor1kx_fetch_prontoespresso + (/*AUTOINST*/ + // Outputs + .ibus_adr_o (ibus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .ibus_req_o (ibus_req_o), + .ibus_burst_o (ibus_burst_o), + .decode_insn_o (insn_fetch_to_decode), // Templated + .fetched_pc_o (fetched_pc_o[OPTION_OPERAND_WIDTH-1:0]), + .fetch_ready_o (fetch_ready_o), + .fetch_rfa_adr_o (fetch_rfa_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .fetch_rfb_adr_o (fetch_rfb_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .fetch_rf_re_o (fetch_rf_re_o), + .pc_fetch_next_o (pc_fetch_next_o[OPTION_OPERAND_WIDTH-1:0]), + .decode_except_ibus_err_o (decode_except_ibus_err_o), + .fetch_sleep_o (fetch_sleep_o), + .fetch_quick_branch_o (fetch_quick_branch_o), + .spr_bus_dat_ic_o (spr_bus_dat_ic_i[OPTION_OPERAND_WIDTH-1:0]), // Templated + .spr_bus_ack_ic_o (spr_bus_ack_ic_i), // Templated + // Inputs + .clk (clk), + .rst (rst), + .ibus_err_i (ibus_err_i), + .ibus_ack_i (ibus_ack_i), + .ibus_dat_i (ibus_dat_i[`OR1K_INSN_WIDTH-1:0]), + .ic_enable (spr_sr_o[`OR1K_SPR_SR_ICE]), // Templated + .padv_i (padv_fetch_o), // Templated + .branch_occur_i (ctrl_branch_occur_o), // Templated + .branch_dest_i (ctrl_branch_target_o), // Templated + .ctrl_insn_done_i (ctrl_insn_done_o), // Templated + .du_restart_i (du_restart_o), // Templated + .du_restart_pc_i (du_restart_pc_o), // Templated + .fetch_take_exception_branch_i(fetch_take_exception_branch_o), // Templated + .execute_waiting_i (execute_waiting_o), // Templated + .du_stall_i (du_stall_i), + .stepping_i (stepping_o), // Templated + .flag_i (flag_o), // Templated + .flag_clear_i (flag_clear_o), // Templated + .flag_set_i (flag_set_o), // Templated + .spr_bus_addr_i (spr_bus_addr_o[15:0]), // Templated + .spr_bus_we_i (spr_bus_we_o), // Templated + .spr_bus_stb_i (spr_bus_stb_o), // Templated + .spr_bus_dat_i (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0])); // Templated + end // else: !if(OPTION_TCM_FETCHER=="ENABLED") + endgenerate + + /* mor1kx_decode AUTO_TEMPLATE ( + .decode_insn_i (insn_fetch_to_decode), + .decode_op_lsu_atomic_o (), + ); */ + mor1kx_decode + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_MSYNC(FEATURE_MSYNC), + .FEATURE_PSYNC(FEATURE_PSYNC), + .FEATURE_CSYNC(FEATURE_CSYNC), + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8) + ) + mor1kx_decode + (/*AUTOINST*/ + // Outputs + .decode_opc_alu_o (decode_opc_alu_o[`OR1K_ALU_OPC_WIDTH-1:0]), + .decode_opc_alu_secondary_o (decode_opc_alu_secondary_o[`OR1K_ALU_OPC_WIDTH-1:0]), + .decode_imm16_o (decode_imm16_o[`OR1K_IMM_WIDTH-1:0]), + .decode_immediate_o (decode_immediate_o[OPTION_OPERAND_WIDTH-1:0]), + .decode_immediate_sel_o (decode_immediate_sel_o), + .decode_immjbr_upper_o (decode_immjbr_upper_o[9:0]), + .decode_rfd_adr_o (decode_rfd_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .decode_rfa_adr_o (decode_rfa_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .decode_rfb_adr_o (decode_rfb_adr_o[OPTION_RF_ADDR_WIDTH-1:0]), + .decode_rf_wb_o (decode_rf_wb_o), + .decode_op_jbr_o (decode_op_jbr_o), + .decode_op_jr_o (decode_op_jr_o), + .decode_op_jal_o (decode_op_jal_o), + .decode_op_bf_o (decode_op_bf_o), + .decode_op_bnf_o (decode_op_bnf_o), + .decode_op_brcond_o (decode_op_brcond_o), + .decode_op_branch_o (decode_op_branch_o), + .decode_op_alu_o (decode_op_alu_o), + .decode_op_lsu_load_o (decode_op_lsu_load_o), + .decode_op_lsu_store_o (decode_op_lsu_store_o), + .decode_op_lsu_atomic_o (), // Templated + .decode_lsu_length_o (decode_lsu_length_o[1:0]), + .decode_lsu_zext_o (decode_lsu_zext_o), + .decode_op_mfspr_o (decode_op_mfspr_o), + .decode_op_mtspr_o (decode_op_mtspr_o), + .decode_op_rfe_o (decode_op_rfe_o), + .decode_op_setflag_o (decode_op_setflag_o), + .decode_op_add_o (decode_op_add_o), + .decode_op_mul_o (decode_op_mul_o), + .decode_op_mul_signed_o (decode_op_mul_signed_o), + .decode_op_mul_unsigned_o (decode_op_mul_unsigned_o), + .decode_op_div_o (decode_op_div_o), + .decode_op_div_signed_o (decode_op_div_signed_o), + .decode_op_div_unsigned_o (decode_op_div_unsigned_o), + .decode_op_shift_o (decode_op_shift_o), + .decode_op_ffl1_o (decode_op_ffl1_o), + .decode_op_movhi_o (decode_op_movhi_o), + .decode_op_ext_o (decode_op_ext_o), + .decode_op_msync_o (decode_op_msync_o), + .decode_op_fpu_o (decode_op_fpu_o[`OR1K_FPUOP_WIDTH-1:0]), + .decode_adder_do_sub_o (decode_adder_do_sub_o), + .decode_adder_do_carry_o (decode_adder_do_carry_o), + .decode_except_illegal_o (decode_except_illegal_o), + .decode_except_syscall_o (decode_except_syscall_o), + .decode_except_trap_o (decode_except_trap_o), + .decode_opc_insn_o (decode_opc_insn_o[`OR1K_OPCODE_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .decode_insn_i (insn_fetch_to_decode)); // Templated + + /* mor1kx_execute_alu AUTO_TEMPLATE ( + .padv_decode_i (padv_decode_o), + .padv_execute_i (padv_execute_o), + .padv_ctrl_i (1'b1), + .pipeline_flush_i (pipeline_flush_o), + .opc_alu_i (decode_opc_alu_o), + .opc_alu_secondary_i (decode_opc_alu_secondary_o), + .imm16_i (decode_imm16_o), + .immediate_i (decode_immediate_o), + .immediate_sel_i (decode_immediate_sel_o), + .decode_valid_i (padv_decode_o), + .decode_immediate_i (decode_immediate_o), + .decode_immediate_sel_i (decode_immediate_sel_o), + .decode_op_mul_i (decode_op_mul_o), + .op_alu_i (decode_op_alu_o), + .op_add_i (decode_op_add_o), + .op_mul_i (decode_op_mul_o), + .op_mul_signed_i (decode_op_mul_signed_o), + .op_mul_unsigned_i (decode_op_mul_unsigned_o), + .op_div_i (decode_op_div_o), + .op_div_signed_i (decode_op_div_signed_o), + .op_div_unsigned_i (decode_op_div_unsigned_o), + .op_shift_i (decode_op_shift_o), + .op_ffl1_i (decode_op_ffl1_o), + .op_setflag_i (decode_op_setflag_o), + .op_mtspr_i (decode_op_mtspr_o), + .op_mfspr_i (decode_op_mfspr_o), + .op_movhi_i (decode_op_movhi_o), + .op_ext_i (decode_op_ext_o), + .op_jbr_i (decode_op_jbr_o), + .op_jr_i (decode_op_jr_o), + .op_fpu_i (decode_op_fpu_o), + .fpu_round_mode_i (2'b00), + .immjbr_upper_i (decode_immjbr_upper_o), + .pc_execute_i (spr_ppc_o), + .adder_do_sub_i (decode_adder_do_sub_o), + .adder_do_carry_i (decode_adder_do_carry_o), + .decode_rfa_i (rfa_o), + .decode_rfb_i (rfb_o), + .rfa_i (rfa_o), + .rfb_i (rfb_o), + .flag_i (flag_o), + .carry_i (carry_o), + ); */ + mor1kx_execute_alu + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .FEATURE_MULTIPLIER(FEATURE_MULTIPLIER), + .FEATURE_DIVIDER(FEATURE_DIVIDER), + .FEATURE_ADDC(FEATURE_ADDC), + .FEATURE_SRA(FEATURE_SRA), + .FEATURE_ROR(FEATURE_ROR), + .FEATURE_EXT(FEATURE_EXT), + .FEATURE_CMOV(FEATURE_CMOV), + .FEATURE_FFL1(FEATURE_FFL1), + .FEATURE_CUST1(FEATURE_CUST1), + .FEATURE_CUST2(FEATURE_CUST2), + .FEATURE_CUST3(FEATURE_CUST3), + .FEATURE_CUST4(FEATURE_CUST4), + .FEATURE_CUST5(FEATURE_CUST5), + .FEATURE_CUST6(FEATURE_CUST6), + .FEATURE_CUST7(FEATURE_CUST7), + .FEATURE_CUST8(FEATURE_CUST8), + .OPTION_SHIFTER(OPTION_SHIFTER) + ) + mor1kx_execute_alu + (/*AUTOINST*/ + // Outputs + .flag_set_o (flag_set_o), + .flag_clear_o (flag_clear_o), + .carry_set_o (carry_set_o), + .carry_clear_o (carry_clear_o), + .overflow_set_o (overflow_set_o), + .overflow_clear_o (overflow_clear_o), + .fpcsr_o (fpcsr_o[`OR1K_FPCSR_WIDTH-1:0]), + .fpcsr_set_o (fpcsr_set_o), + .alu_result_o (alu_result_o[OPTION_OPERAND_WIDTH-1:0]), + .alu_valid_o (alu_valid_o), + .mul_result_o (mul_result_o[OPTION_OPERAND_WIDTH-1:0]), + .adder_result_o (adder_result_o[OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .padv_decode_i (padv_decode_o), // Templated + .padv_execute_i (padv_execute_o), // Templated + .padv_ctrl_i (1'b1), // Templated + .pipeline_flush_i (pipeline_flush_o), // Templated + .opc_alu_i (decode_opc_alu_o), // Templated + .opc_alu_secondary_i (decode_opc_alu_secondary_o), // Templated + .imm16_i (decode_imm16_o), // Templated + .immediate_i (decode_immediate_o), // Templated + .immediate_sel_i (decode_immediate_sel_o), // Templated + .decode_immediate_i (decode_immediate_o), // Templated + .decode_immediate_sel_i (decode_immediate_sel_o), // Templated + .decode_valid_i (padv_decode_o), // Templated + .decode_op_mul_i (decode_op_mul_o), // Templated + .op_alu_i (decode_op_alu_o), // Templated + .op_add_i (decode_op_add_o), // Templated + .op_mul_i (decode_op_mul_o), // Templated + .op_mul_signed_i (decode_op_mul_signed_o), // Templated + .op_mul_unsigned_i (decode_op_mul_unsigned_o), // Templated + .op_div_i (decode_op_div_o), // Templated + .op_div_signed_i (decode_op_div_signed_o), // Templated + .op_div_unsigned_i (decode_op_div_unsigned_o), // Templated + .op_shift_i (decode_op_shift_o), // Templated + .op_ffl1_i (decode_op_ffl1_o), // Templated + .op_setflag_i (decode_op_setflag_o), // Templated + .op_mtspr_i (decode_op_mtspr_o), // Templated + .op_mfspr_i (decode_op_mfspr_o), // Templated + .op_movhi_i (decode_op_movhi_o), // Templated + .op_ext_i (decode_op_ext_o), // Templated + .op_fpu_i (decode_op_fpu_o), // Templated + .fpu_round_mode_i (2'b00), // Templated + .op_jbr_i (decode_op_jbr_o), // Templated + .op_jr_i (decode_op_jr_o), // Templated + .immjbr_upper_i (decode_immjbr_upper_o), // Templated + .pc_execute_i (spr_ppc_o), // Templated + .adder_do_sub_i (decode_adder_do_sub_o), // Templated + .adder_do_carry_i (decode_adder_do_carry_o), // Templated + .decode_rfa_i (rfa_o), // Templated + .decode_rfb_i (rfb_o), // Templated + .rfa_i (rfa_o), // Templated + .rfb_i (rfb_o), // Templated + .flag_i (flag_o), // Templated + .carry_i (carry_o)); // Templated + + + /* mor1kx_lsu_espresso AUTO_TEMPLATE ( + .padv_fetch_i (padv_fetch_o), + .lsu_adr_i (adder_result_o), + .rfb_i (rfb_o), + .op_lsu_load_i (decode_op_lsu_load_o), + .op_lsu_store_i (decode_op_lsu_store_o), + .lsu_length_i (decode_lsu_length_o), + .lsu_zext_i (decode_lsu_zext_o), + .exception_taken_i (exception_taken_o), + .du_restart_i (du_restart_o), + .stepping_i (stepping_o), + .next_fetch_done_i (fetch_ready_o), + ); */ + mor1kx_lsu_espresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH) + ) + mor1kx_lsu_espresso + (/*AUTOINST*/ + // Outputs + .lsu_result_o (lsu_result_o[OPTION_OPERAND_WIDTH-1:0]), + .lsu_valid_o (lsu_valid_o), + .lsu_except_dbus_o (lsu_except_dbus_o), + .lsu_except_align_o (lsu_except_align_o), + .dbus_adr_o (dbus_adr_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_req_o (dbus_req_o), + .dbus_dat_o (dbus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .dbus_bsel_o (dbus_bsel_o[3:0]), + .dbus_we_o (dbus_we_o), + .dbus_burst_o (dbus_burst_o), + // Inputs + .clk (clk), + .rst (rst), + .padv_fetch_i (padv_fetch_o), // Templated + .lsu_adr_i (adder_result_o), // Templated + .rfb_i (rfb_o), // Templated + .op_lsu_load_i (decode_op_lsu_load_o), // Templated + .op_lsu_store_i (decode_op_lsu_store_o), // Templated + .lsu_length_i (decode_lsu_length_o), // Templated + .lsu_zext_i (decode_lsu_zext_o), // Templated + .exception_taken_i (exception_taken_o), // Templated + .du_restart_i (du_restart_o), // Templated + .stepping_i (stepping_o), // Templated + .next_fetch_done_i (fetch_ready_o), // Templated + .dbus_err_i (dbus_err_i), + .dbus_ack_i (dbus_ack_i), + .dbus_dat_i (dbus_dat_i[OPTION_OPERAND_WIDTH-1:0])); + + + /* mor1kx_wb_mux_espresso AUTO_TEMPLATE ( + .alu_result_i (alu_result_o), + .lsu_result_i (lsu_result_o), + .spr_i (mfspr_dat_o), + .op_jal_i (decode_op_jal_o), + .op_lsu_load_i (decode_op_lsu_load_o), + .ppc_i (spr_ppc_o), + .op_mfspr_i (decode_op_mfspr_o), + .pc_fetch_next_i (link_addr_o), + ); */ + mor1kx_wb_mux_espresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH) + ) + mor1kx_wb_mux_espresso + (/*AUTOINST*/ + // Outputs + .rf_result_o (rf_result_o[OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .alu_result_i (alu_result_o), // Templated + .lsu_result_i (lsu_result_o), // Templated + .ppc_i (spr_ppc_o), // Templated + .pc_fetch_next_i (link_addr_o), // Templated + .spr_i (mfspr_dat_o), // Templated + .op_jal_i (decode_op_jal_o), // Templated + .op_lsu_load_i (decode_op_lsu_load_o), // Templated + .op_mfspr_i (decode_op_mfspr_o)); // Templated + + + /* mor1kx_rf_espresso AUTO_TEMPLATE ( + .rf_we_i (rf_we_o), + .rf_re_i (fetch_rf_re_o), + .rfd_adr_i (decode_rfd_adr_o), + .rfa_adr_i (fetch_rfa_adr_o), + .rfb_adr_i (fetch_rfb_adr_o), + .result_i (rf_result_o), + ); */ + mor1kx_rf_espresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RF_ADDR_WIDTH(OPTION_RF_ADDR_WIDTH), + .OPTION_RF_WORDS(OPTION_RF_WORDS) + ) + mor1kx_rf_espresso + (/*AUTOINST*/ + // Outputs + .rfa_o (rfa_o[OPTION_OPERAND_WIDTH-1:0]), + .rfb_o (rfb_o[OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .clk (clk), + .rst (rst), + .rfd_adr_i (decode_rfd_adr_o), // Templated + .rfa_adr_i (fetch_rfa_adr_o), // Templated + .rfb_adr_i (fetch_rfb_adr_o), // Templated + .rf_we_i (rf_we_o), // Templated + .rf_re_i (fetch_rf_re_o), // Templated + .result_i (rf_result_o)); // Templated + + + /* Debug signals required for the debug monitor */ + function [OPTION_OPERAND_WIDTH-1:0] get_gpr; + // verilator public + input [4:0] gpr_num; + begin + // If we're writing, the value won't be in the GPR yet, so snoop + // it off the result in line. + if (rf_we_o) + get_gpr = rf_result_o; + else + get_gpr = mor1kx_rf_espresso.rfa.mem[gpr_num]; + end + endfunction // + + +`ifndef SYNTHESIS +// synthesis translate_off + task set_gpr; + // verilator public + input [4:0] gpr_num; + input [OPTION_OPERAND_WIDTH-1:0] gpr_value; + begin + mor1kx_rf_espresso.rfa.mem[gpr_num] = gpr_value; + mor1kx_rf_espresso.rfb.mem[gpr_num] = gpr_value; + end + endtask +// synthesis translate_on +`endif + + /* mor1kx_ctrl_prontoespresso AUTO_TEMPLATE ( + .ctrl_alu_result_i (alu_result_o), + .ctrl_rfb_i (rfb_o), + .ctrl_flag_set_i (flag_set_o), + .ctrl_flag_clear_i (flag_clear_o), + .pc_ctrl_i (), + .pc_fetch_next_i (pc_fetch_next_o), + .ctrl_opc_insn_i (decode_opc_insn_o), + .ctrl_branch_target_i (ctrl_branch_target_o), + .op_lsu_load_i (decode_op_lsu_load_o), + .op_lsu_store_i (decode_op_lsu_store_o), + .alu_valid_i (alu_valid_o), + .lsu_valid_i (lsu_valid_o), + .op_jr_i (decode_op_jr_o), + .op_jbr_i (decode_op_jbr_o), + .except_ibus_err_i (decode_except_ibus_err_o), + .except_illegal_i (decode_except_illegal_o), + .except_syscall_i (decode_except_syscall_o), + .except_dbus_i (lsu_except_dbus_o), + .except_trap_i (decode_except_trap_o), + .except_align_i (lsu_except_align_o), + .fetch_ready_i (fetch_ready_o), + .execute_valid_i (execute_valid_o), + .execute_waiting_i (execute_waiting_o), + .fetch_branch_taken_i (fetch_branch_taken_o), + .fetch_ppc_i (fetched_pc_o), + .fetch_sleep_i (fetch_sleep_o), + .fetch_quick_branch_i (fetch_quick_branch_o), + .rf_wb_i (decode_rf_wb_o), + .spr_bus_dat_dc_i (), + .spr_bus_ack_dc_i (), + .carry_set_i (carry_set_o), + .carry_clear_i (carry_clear_o), + .overflow_set_i (overflow_set_o), + .overflow_clear_i (overflow_clear_o), + ); */ + mor1kx_ctrl_prontoespresso + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_RESET_PC(OPTION_RESET_PC), + .FEATURE_PIC(FEATURE_PIC), + .FEATURE_TIMER(FEATURE_TIMER), + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH), + .FEATURE_DSX(FEATURE_DSX), + .FEATURE_FASTCONTEXTS(FEATURE_FASTCONTEXTS), + .FEATURE_OVERFLOW(FEATURE_OVERFLOW), + .FEATURE_DATACACHE(FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH(OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH(OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS(OPTION_DCACHE_WAYS), + .FEATURE_DMMU(FEATURE_DMMU), + .FEATURE_INSTRUCTIONCACHE(FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH(OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS(OPTION_ICACHE_WAYS), + .FEATURE_IMMU(FEATURE_IMMU), + .FEATURE_DEBUGUNIT(FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS(FEATURE_PERFCOUNTERS), + .FEATURE_MAC(FEATURE_MAC), + .FEATURE_MULTICORE(FEATURE_MULTICORE), + .FEATURE_SYSCALL(FEATURE_SYSCALL), + .FEATURE_TRAP(FEATURE_TRAP), + .FEATURE_RANGE(FEATURE_RANGE) + ) + mor1kx_ctrl_prontoespresso + (/*AUTOINST*/ + // Outputs + .spr_npc_o (spr_npc_o[OPTION_OPERAND_WIDTH-1:0]), + .spr_ppc_o (spr_ppc_o[OPTION_OPERAND_WIDTH-1:0]), + .link_addr_o (link_addr_o[OPTION_OPERAND_WIDTH-1:0]), + .mfspr_dat_o (mfspr_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .ctrl_mfspr_we_o (ctrl_mfspr_we_o), + .flag_o (flag_o), + .carry_o (carry_o), + .pipeline_flush_o (pipeline_flush_o), + .padv_fetch_o (padv_fetch_o), + .padv_decode_o (padv_decode_o), + .padv_execute_o (padv_execute_o), + .fetch_take_exception_branch_o (fetch_take_exception_branch_o), + .exception_taken_o (exception_taken_o), + .execute_waiting_o (execute_waiting_o), + .stepping_o (stepping_o), + .du_dat_o (du_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .du_ack_o (du_ack_o), + .du_stall_o (du_stall_o), + .du_restart_pc_o (du_restart_pc_o[OPTION_OPERAND_WIDTH-1:0]), + .du_restart_o (du_restart_o), + .spr_bus_addr_o (spr_bus_addr_o[15:0]), + .spr_bus_we_o (spr_bus_we_o), + .spr_bus_stb_o (spr_bus_stb_o), + .spr_bus_dat_o (spr_bus_dat_o[OPTION_OPERAND_WIDTH-1:0]), + .spr_sr_o (spr_sr_o[15:0]), + .ctrl_branch_target_o (ctrl_branch_target_o[OPTION_OPERAND_WIDTH-1:0]), + .ctrl_insn_done_o (ctrl_insn_done_o), + .ctrl_branch_occur_o (ctrl_branch_occur_o), + .rf_we_o (rf_we_o), + // Inputs + .clk (clk), + .rst (rst), + .ctrl_alu_result_i (alu_result_o), // Templated + .ctrl_rfb_i (rfb_o), // Templated + .ctrl_flag_set_i (flag_set_o), // Templated + .ctrl_flag_clear_i (flag_clear_o), // Templated + .ctrl_opc_insn_i (decode_opc_insn_o), // Templated + .fetch_ppc_i (fetched_pc_o), // Templated + .pc_fetch_next_i (pc_fetch_next_o), // Templated + .fetch_sleep_i (fetch_sleep_o), // Templated + .except_ibus_err_i (decode_except_ibus_err_o), // Templated + .except_illegal_i (decode_except_illegal_o), // Templated + .except_syscall_i (decode_except_syscall_o), // Templated + .except_dbus_i (lsu_except_dbus_o), // Templated + .except_trap_i (decode_except_trap_o), // Templated + .except_align_i (lsu_except_align_o), // Templated + .fetch_ready_i (fetch_ready_o), // Templated + .fetch_quick_branch_i (fetch_quick_branch_o), // Templated + .alu_valid_i (alu_valid_o), // Templated + .lsu_valid_i (lsu_valid_o), // Templated + .op_lsu_load_i (decode_op_lsu_load_o), // Templated + .op_lsu_store_i (decode_op_lsu_store_o), // Templated + .op_jr_i (decode_op_jr_o), // Templated + .op_jbr_i (decode_op_jbr_o), // Templated + .irq_i (irq_i[31:0]), + .carry_set_i (carry_set_o), // Templated + .carry_clear_i (carry_clear_o), // Templated + .overflow_set_i (overflow_set_o), // Templated + .overflow_clear_i (overflow_clear_o), // Templated + .du_addr_i (du_addr_i[15:0]), + .du_stb_i (du_stb_i), + .du_dat_i (du_dat_i[OPTION_OPERAND_WIDTH-1:0]), + .du_we_i (du_we_i), + .du_stall_i (du_stall_i), + .spr_bus_dat_dc_i (), // Templated + .spr_bus_ack_dc_i (), // Templated + .spr_bus_dat_ic_i (spr_bus_dat_ic_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_ic_i (spr_bus_ack_ic_i), + .spr_bus_dat_dmmu_i (spr_bus_dat_dmmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_dmmu_i (spr_bus_ack_dmmu_i), + .spr_bus_dat_immu_i (spr_bus_dat_immu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_immu_i (spr_bus_ack_immu_i), + .spr_bus_dat_mac_i (spr_bus_dat_mac_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_mac_i (spr_bus_ack_mac_i), + .spr_bus_dat_pmu_i (spr_bus_dat_pmu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pmu_i (spr_bus_ack_pmu_i), + .spr_bus_dat_pcu_i (spr_bus_dat_pcu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_pcu_i (spr_bus_ack_pcu_i), + .spr_bus_dat_fpu_i (spr_bus_dat_fpu_i[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_ack_fpu_i (spr_bus_ack_fpu_i), + .multicore_coreid_i (multicore_coreid_i[OPTION_OPERAND_WIDTH-1:0]), + .rf_wb_i (decode_rf_wb_o)); // Templated + +endmodule // mor1kx_cpu_prontoespresso diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ctrl_cappuccino.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ctrl_cappuccino.v new file mode 100644 index 0000000..147bc33 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ctrl_cappuccino.v @@ -0,0 +1,1593 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx control unit + + inputs from execute stage + + generate pipeline controls + + manage SPRs + + issue addresses for exceptions to fetch stage + control branches going to fetch stage + + contains tick timer + + contains PIC logic + + Copyright (C) 2012 Julius Baxter + Copyright (C) 2012-2013 Stefan Kristiansson + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_ctrl_cappuccino + #( + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + + parameter FEATURE_SYSCALL = "ENABLED", + parameter FEATURE_TRAP = "ENABLED", + parameter FEATURE_RANGE = "ENABLED", + + parameter FEATURE_DATACACHE = "NONE", + parameter OPTION_DCACHE_BLOCK_WIDTH = 5, + parameter OPTION_DCACHE_SET_WIDTH = 9, + parameter OPTION_DCACHE_WAYS = 2, + parameter FEATURE_DMMU = "NONE", + parameter OPTION_DMMU_SET_WIDTH = 6, + parameter OPTION_DMMU_WAYS = 1, + parameter FEATURE_INSTRUCTIONCACHE = "NONE", + parameter OPTION_ICACHE_BLOCK_WIDTH = 5, + parameter OPTION_ICACHE_SET_WIDTH = 9, + parameter OPTION_ICACHE_WAYS = 2, + parameter FEATURE_IMMU = "NONE", + parameter OPTION_IMMU_SET_WIDTH = 6, + parameter OPTION_IMMU_WAYS = 1, + parameter FEATURE_TIMER = "ENABLED", + parameter FEATURE_DEBUGUNIT = "NONE", + parameter FEATURE_PERFCOUNTERS = "NONE", + parameter OPTION_PERFCOUNTERS_NUM = 0, + parameter FEATURE_PMU = "NONE", + parameter FEATURE_MAC = "NONE", + parameter FEATURE_FPU = "NONE", + parameter FEATURE_MULTICORE = "NONE", + + parameter FEATURE_PIC = "ENABLED", + parameter OPTION_PIC_TRIGGER = "LEVEL", + parameter OPTION_PIC_NMI_WIDTH = 0, + + parameter FEATURE_DSX ="NONE", + parameter FEATURE_FASTCONTEXTS = "NONE", + parameter OPTION_RF_NUM_SHADOW_GPR = 0, + parameter FEATURE_OVERFLOW = "NONE", + parameter FEATURE_CARRY_FLAG = "ENABLED", + + parameter SPR_SR_WIDTH = 16, + parameter SPR_SR_RESET_VALUE = 16'h8001 + ) + ( + input clk, + input rst, + + // ALU result - either jump target, SPR address + input [OPTION_OPERAND_WIDTH-1:0] ctrl_alu_result_i, + + // LSU address, needed for effective address + input [OPTION_OPERAND_WIDTH-1:0] ctrl_lsu_adr_i, + + // Operand B from RF might be jump address, might be value for SPR + input [OPTION_OPERAND_WIDTH-1:0] ctrl_rfb_i, + + input ctrl_flag_set_i, + input ctrl_flag_clear_i, + input atomic_flag_set_i, + input atomic_flag_clear_i, + + input [OPTION_OPERAND_WIDTH-1:0] pc_ctrl_i, + + input ctrl_op_mfspr_i, + input ctrl_op_mtspr_i, + input ctrl_op_rfe_i, + + // Indicate if branch will be taken based on instruction currently in + // decode stage. + input decode_branch_i, + input [OPTION_OPERAND_WIDTH-1:0] decode_branch_target_i, + + input branch_mispredict_i, + input [OPTION_OPERAND_WIDTH-1:0] execute_mispredict_target_i, + + // PC of execute stage (NPC) + input [OPTION_OPERAND_WIDTH-1:0] pc_execute_i, + + input execute_op_branch_i, + + // Exception inputs, registered on output of execute stage + input except_ibus_err_i, + input except_itlb_miss_i, + input except_ipagefault_i, + input except_ibus_align_i, + input except_illegal_i, + input except_syscall_i, + input except_dbus_i, + input except_dtlb_miss_i, + input except_dpagefault_i, + input except_trap_i, + input except_align_i, + + // Inputs from two units that can stall proceedings + input fetch_valid_i, + input decode_valid_i, + input execute_valid_i, + input ctrl_valid_i, + + input fetch_exception_taken_i, + + input decode_bubble_i, + input execute_bubble_i, + + // Inputs from decode-exec stage to PCU + input execute_op_lsu_load_i, + input execute_op_lsu_store_i, + + // Inputs from icache and dcache + input icache_hit_i, + input dcache_hit_i, + + // External IRQ lines in + input [31:0] irq_i, + + // Exception PC output, used in the lsu to properly signal dbus errors that + // has went through the store buffer + output [OPTION_OPERAND_WIDTH-1:0] ctrl_epcr_o, + // Exception PC input coming from the store buffer + input [OPTION_OPERAND_WIDTH-1:0] store_buffer_epcr_i, + + input store_buffer_err_i, + + // SPR data out + output [OPTION_OPERAND_WIDTH-1:0] mfspr_dat_o, + + // WE to RF for l.mfspr + output ctrl_mfspr_ack_o, + output ctrl_mtspr_ack_o, + + // Flag out to branch control, combinatorial + output ctrl_flag_o, + + // Arithmetic flags to and from ALU + output ctrl_carry_o, + input ctrl_carry_set_i, + input ctrl_carry_clear_i, + input ctrl_overflow_set_i, + input ctrl_overflow_clear_i, + + // FPU Status flags to and from ALU + output [`OR1K_FPCSR_RM_SIZE-1:0] ctrl_fpu_round_mode_o, + input [`OR1K_FPCSR_WIDTH-1:0] ctrl_fpcsr_i, + input ctrl_fpcsr_set_i, + + // Branch indicator from control unit (l.rfe/exception) + output ctrl_branch_exception_o, + // PC out to fetch stage for l.rfe, exceptions + output [OPTION_OPERAND_WIDTH-1:0] ctrl_branch_except_pc_o, + + // Clear instructions from decode and fetch stage + output pipeline_flush_o, + + // Indicate that a rfe is going on + output doing_rfe_o, + + output padv_fetch_o, + output padv_decode_o, + output padv_execute_o, + output padv_ctrl_o, + + // Debug bus + input [15:0] du_addr_i, + input du_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] du_dat_i, + input du_we_i, + output [OPTION_OPERAND_WIDTH-1:0] du_dat_o, + output du_ack_o, + // Stall control from debug interface + input du_stall_i, + output du_stall_o, + output [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_o, + output du_restart_o, + + // SPR accesses to external units (cache, mmu, etc.) + output [15:0] spr_bus_addr_o, + output spr_bus_we_o, + output spr_bus_stb_o, + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dc_i, + input spr_bus_ack_dc_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_ic_i, + input spr_bus_ack_ic_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dmmu_i, + input spr_bus_ack_dmmu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_immu_i, + input spr_bus_ack_immu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_mac_i, + input spr_bus_ack_mac_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pmu_i, + input spr_bus_ack_pmu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pcu_i, + input spr_bus_ack_pcu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_fpu_i, + input spr_bus_ack_fpu_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_gpr_dat_i, + input spr_gpr_ack_i, + output [15:0] spr_sr_o, + + output reg ctrl_bubble_o, + + input [OPTION_OPERAND_WIDTH-1:0] multicore_coreid_i, + input [OPTION_OPERAND_WIDTH-1:0] multicore_numcores_i + ); + + // Internal signals + reg [SPR_SR_WIDTH-1:0] spr_sr; + reg [SPR_SR_WIDTH-1:0] spr_esr; + reg [OPTION_OPERAND_WIDTH-1:0] spr_epcr; + reg [OPTION_OPERAND_WIDTH-1:0] spr_eear; + reg [OPTION_OPERAND_WIDTH-1:0] spr_evbar; + + // Programmable Interrupt Control SPRs + wire [31:0] spr_picmr; + wire [31:0] spr_picsr; + + // Tick Timer SPRs + wire [31:0] spr_ttmr; + wire [31:0] spr_ttcr; + + // FPU Control & Status Register + // and related exeption signals + reg [`OR1K_FPCSR_WIDTH-1:0] spr_fpcsr; + wire except_fpu; + + reg [OPTION_OPERAND_WIDTH-1:0] spr_ppc; + reg [OPTION_OPERAND_WIDTH-1:0] spr_npc; + reg execute_delay_slot; + reg ctrl_delay_slot; + + wire execute_waiting; + reg execute_waiting_r; + + reg decode_execute_halt; + + reg exception_taken; + + reg [OPTION_OPERAND_WIDTH-1:0] last_branch_insn_pc; + reg [OPTION_OPERAND_WIDTH-1:0] last_branch_target_pc; + reg padv_ctrl; + + reg exception_r; + + reg [OPTION_OPERAND_WIDTH-1:0] exception_pc_addr; + + reg waiting_for_fetch; + + reg doing_rfe_r; + wire doing_rfe; + wire deassert_doing_rfe; + + wire exception, exception_pending; + + reg ctrl_stage_exceptions; + + wire exception_re; + + wire except_ticktimer; + wire except_pic; + + wire except_range; + + wire [15:0] spr_addr; + + wire [OPTION_OPERAND_WIDTH-1:0] b; + + wire deassert_decode_execute_halt; + + /* Debug SPRs */ + reg [31:0] spr_dmr1; + reg [31:0] spr_dmr2; + reg [31:0] spr_dsr; + reg [31:0] spr_drr; + + /* DU internal control signals */ + wire du_access; + reg cpu_stall; + wire du_restart_from_stall; + reg [5:0] pstep; + wire stepping; + wire stepped_into_delay_slot; + reg stepped_into_exception; + reg stepped_into_rfe; + wire du_npc_write; + reg du_npc_written; + wire stall_on_trap; + + /* Wires for SPR management */ + wire spr_access_valid; + wire spr_we; + wire spr_read; + wire spr_ack; + wire [OPTION_OPERAND_WIDTH-1:0] spr_write_dat; + reg [11:0] spr_access; + wire [11:0] spr_access_ack; + wire [31:0] spr_internal_read_dat [0:11]; + wire spr_read_access; + wire spr_write_access; + wire spr_bus_access; + reg [OPTION_OPERAND_WIDTH-1:0] spr_sys_group_read; + wire [3:0] spr_group; + + /* Wires from mor1kx_cfgrs module */ + wire [31:0] spr_vr; + wire [31:0] spr_vr2; + wire [31:0] spr_avr; + wire [31:0] spr_upr; + wire [31:0] spr_cpucfgr; + wire [31:0] spr_dmmucfgr; + wire [31:0] spr_immucfgr; + wire [31:0] spr_dccfgr; + wire [31:0] spr_iccfgr; + wire [31:0] spr_dcfgr; + wire [31:0] spr_pccfgr; + wire [31:0] spr_isr [0:7]; + + assign b = ctrl_rfb_i; + + assign ctrl_branch_exception_o = (exception_r | ctrl_op_rfe_i | doing_rfe) & + !exception_taken; + assign exception_pending = (except_ibus_err_i | except_ibus_align_i | + except_illegal_i | except_syscall_i | + except_dbus_i | except_align_i | + except_ticktimer | except_range | except_fpu | + except_pic | except_trap_i | + except_itlb_miss_i | except_ipagefault_i | + except_dtlb_miss_i | except_dpagefault_i); + + assign exception = exception_pending & + (padv_ctrl & !ctrl_bubble_o | ctrl_stage_exceptions); + + assign exception_re = exception & !exception_r & !exception_taken; + + assign except_range = (FEATURE_RANGE!="NONE") ? spr_sr[`OR1K_SPR_SR_OVE] && + (spr_sr[`OR1K_SPR_SR_OV] | ctrl_overflow_set_i) & + !doing_rfe : 0; + + assign deassert_decode_execute_halt = fetch_exception_taken_i & + decode_execute_halt; + + assign ctrl_branch_except_pc_o = (ctrl_op_rfe_i | doing_rfe) ? spr_epcr : + exception_pc_addr; + + assign ctrl_epcr_o = ctrl_delay_slot ? pc_ctrl_i - 4 : pc_ctrl_i; + + always @(posedge clk) + ctrl_stage_exceptions <= except_align_i | except_dbus_i | except_range | + except_fpu | + except_dtlb_miss_i | except_dpagefault_i; + + always @(posedge clk) + if (exception & !exception_r) + casez( + { + except_itlb_miss_i, + except_ipagefault_i, + except_ibus_err_i, + except_illegal_i, + except_align_i, + except_ibus_align_i, + except_syscall_i, + except_dtlb_miss_i, + except_dpagefault_i, + except_trap_i, + except_dbus_i, + except_range, + except_fpu, + except_pic, + except_ticktimer + } + ) + 15'b1??????????????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_ITLB_VECTOR,8'd0}; + 15'b01?????????????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_IPF_VECTOR,8'd0}; + 15'b001????????????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_BERR_VECTOR,8'd0}; + 15'b0001???????????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_ILLEGAL_VECTOR,8'd0}; + 15'b00001??????????, + 15'b000001?????????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_ALIGN_VECTOR,8'd0}; + 15'b0000001????????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_SYSCALL_VECTOR,8'd0}; + 15'b00000001???????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_DTLB_VECTOR,8'd0}; + 15'b000000001??????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_DPF_VECTOR,8'd0}; + 15'b0000000001?????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_TRAP_VECTOR,8'd0}; + 15'b00000000001????: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_BERR_VECTOR,8'd0}; + 15'b000000000001???: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_RANGE_VECTOR,8'd0}; + 15'b0000000000001??: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_FP_VECTOR,8'd0}; + 15'b00000000000001?: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_INT_VECTOR,8'd0}; + //15'b00000000000001: + default: + exception_pc_addr <= spr_evbar | + {19'd0,`OR1K_TT_VECTOR,8'd0}; + endcase // casex (... + + assign execute_waiting = !execute_valid_i; + + assign padv_fetch_o = !execute_waiting & !cpu_stall & !decode_bubble_i + & (!stepping | (stepping & pstep[0] & !fetch_valid_i)); + + assign padv_decode_o = fetch_valid_i & !execute_waiting & + !decode_execute_halt & !cpu_stall + & (!stepping | (stepping & pstep[1])); + + assign padv_execute_o = ((decode_valid_i & !execute_waiting & + /* Stop fetch before exception branch continuing */ + !(exception_r & fetch_exception_taken_i)) | + (!execute_waiting & execute_waiting_r & + fetch_valid_i) | + // Case where execute became ready before fetch + // after delay in execute stage + (waiting_for_fetch & fetch_valid_i)) & + // Not exceptions occurring + !decode_execute_halt & !exception_re & !ctrl_op_rfe_i + & !cpu_stall & (!stepping | (stepping & pstep[2])); + + assign padv_ctrl_o = padv_ctrl; + + assign spr_addr = du_access ? du_addr_i : ctrl_alu_result_i[15:0]; + assign ctrl_mfspr_ack_o = spr_ack; + assign ctrl_mtspr_ack_o = spr_ack; + + // Pipeline flush + assign pipeline_flush_o = (padv_ctrl & ctrl_op_rfe_i) | + (exception_re) | + cpu_stall; + + // Flag output + wire ctrl_flag_clear = ctrl_flag_clear_i | atomic_flag_clear_i; + wire ctrl_flag_set = ctrl_flag_set_i | atomic_flag_set_i; + + assign ctrl_flag_o = (!ctrl_flag_clear & spr_sr[`OR1K_SPR_SR_F]) | + ctrl_flag_set; + + // Carry output + assign ctrl_carry_o = FEATURE_CARRY_FLAG!="NONE" & + (!ctrl_carry_clear_i & spr_sr[`OR1K_SPR_SR_CY] | + ctrl_carry_set_i); + + // Ctrl stage pipeline advance signal is one cycle behind execute stage's + always @(posedge clk `OR_ASYNC_RST) + if (rst) + padv_ctrl <= 0; + else + padv_ctrl <= padv_execute_o; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_waiting_r <= 0; + else if (!execute_waiting) + execute_waiting_r <= 0; + else if (decode_valid_i & execute_waiting) + execute_waiting_r <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_execute_halt <= 0; + else if (du_restart_from_stall) + decode_execute_halt <= 0; + else if (decode_execute_halt & deassert_decode_execute_halt) + decode_execute_halt <= 0; + else if ((ctrl_op_rfe_i | exception) & !decode_execute_halt & + !exception_taken) + decode_execute_halt <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + exception_r <= 0; + else if (exception_taken | du_restart_from_stall) + exception_r <= 0; + else if (exception & !exception_r) + exception_r <= 1; + + // Signal to indicate that the incoming exception or l.rfe has been taken + // and we're waiting for it to propagate through the pipeline. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + exception_taken <= 0; + else if (exception_taken) + exception_taken <= 0; + else if (exception_r & fetch_exception_taken_i) + exception_taken <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + last_branch_insn_pc <= 0; + else if (padv_execute_o & execute_op_branch_i) + last_branch_insn_pc <= pc_execute_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + last_branch_target_pc <= 0; + else if (padv_execute_o & branch_mispredict_i) + last_branch_target_pc <= execute_mispredict_target_i; + else if (padv_decode_o & decode_branch_i) + last_branch_target_pc <= decode_branch_target_i; + + // Used to gate execute stage's advance signal in the case where a LSU op has + // finished before the next instruction has been fetched. Typically this + // occurs when not using icache and doing lots of memory accesses. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + waiting_for_fetch <= 0; + else if (fetch_valid_i) + waiting_for_fetch <= 0; + else if (!execute_waiting & execute_waiting_r & !fetch_valid_i) + waiting_for_fetch <= 1; + + + assign doing_rfe = ((padv_ctrl & ctrl_op_rfe_i) | doing_rfe_r) & + !deassert_doing_rfe; + + assign doing_rfe_o = doing_rfe; + + assign deassert_doing_rfe = fetch_exception_taken_i & doing_rfe_r; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + doing_rfe_r <= 0; + else if (deassert_doing_rfe) + doing_rfe_r <= 0; + else if (padv_ctrl) + doing_rfe_r <= ctrl_op_rfe_i; + + assign spr_sr_o = spr_sr; + + + // FPU related: FPCSR and exception + generate + `ifdef OR1K_FPCSR_MASK_FLAGS + reg [`OR1K_FPCSR_ALLF_SIZE-1:0] spr_fpcsr_mf; // mask for FPU flags + `endif + + /* verilator lint_off WIDTH */ + if (FEATURE_FPU != "NONE") begin : fpu_csr_ena + /* verilator lint_on WIDTH */ + assign ctrl_fpu_round_mode_o = spr_fpcsr[`OR1K_FPCSR_RM]; + + // select all flags + `ifdef OR1K_FPCSR_MASK_FLAGS + wire [`OR1K_FPCSR_ALLF_SIZE-1:0] masked_fpres_flags = + ctrl_fpcsr_i[`OR1K_FPCSR_ALLF] & spr_fpcsr_mf; + + wire [`OR1K_FPCSR_ALLF_SIZE-1:0] masked_fpcsr_flags = + spr_fpcsr[`OR1K_FPCSR_ALLF] & spr_fpcsr_mf; + + + wire [`OR1K_FPCSR_ALLF_SIZE-1:0] fpu_allf = + ctrl_fpcsr_set_i ? masked_fpres_flags : + masked_fpcsr_flags; + `else + wire [`OR1K_FPCSR_ALLF_SIZE-1:0] fpu_allf = + ctrl_fpcsr_set_i ? ctrl_fpcsr_i[`OR1K_FPCSR_ALLF] : + spr_fpcsr[`OR1K_FPCSR_ALLF]; + `endif + + assign except_fpu = (~doing_rfe) & + spr_fpcsr[`OR1K_FPCSR_FPEE] & + (|fpu_allf); + + // FPU Control & status register + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) begin + spr_fpcsr <= `OR1K_FPCSR_RESET_VALUE; + `ifdef OR1K_FPCSR_MASK_FLAGS + spr_fpcsr_mf <= `OR1K_FPCSR_MASK_RESET_VALUE; + `endif + end + else if (exception_re) begin + spr_fpcsr[`OR1K_FPCSR_ALLF] <= fpu_allf; + spr_fpcsr[`OR1K_FPCSR_RM] <= spr_fpcsr[`OR1K_FPCSR_RM]; + spr_fpcsr[`OR1K_FPCSR_FPEE] <= 1'b0; + end + else if ((spr_we & spr_access[`OR1K_SPR_SYS_BASE] & + (spr_sr[`OR1K_SPR_SR_SM] & padv_ctrl | du_access)) && + `SPR_OFFSET(spr_addr)==`SPR_OFFSET(`OR1K_SPR_FPCSR_ADDR)) begin + spr_fpcsr <= spr_write_dat[`OR1K_FPCSR_WIDTH-1:0]; // update all fields + `ifdef OR1K_FPCSR_MASK_FLAGS + spr_fpcsr_mf <= spr_write_dat[`OR1K_FPCSR_MASK_ALL]; + `endif + end + else if (padv_ctrl & ctrl_fpcsr_set_i) begin + spr_fpcsr[`OR1K_FPCSR_ALLF] <= fpu_allf; + spr_fpcsr[`OR1K_FPCSR_RM] <= spr_fpcsr[`OR1K_FPCSR_RM]; + spr_fpcsr[`OR1K_FPCSR_FPEE] <= spr_fpcsr[`OR1K_FPCSR_FPEE]; + end + end // FPCSR reg's always(@posedge clk) + end + else begin : fpu_csr_none + assign ctrl_fpu_round_mode_o = {`OR1K_FPCSR_RM_SIZE{1'b0}}; + assign except_fpu = 0; + // FPU Control & status register + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) begin + spr_fpcsr <= {`OR1K_FPCSR_WIDTH{1'b0}}; + `ifdef OR1K_FPCSR_MASK_FLAGS + spr_fpcsr_mf <= {`OR1K_FPCSR_ALLF_SIZE{1'b0}}; + `endif + end + end // FPCSR reg's always(@posedge clk) + end + endgenerate // FPU related: FPCSR and exception + + + // Supervision register + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_sr <= SPR_SR_RESET_VALUE; + else if (exception_re) + begin + // Go into supervisor mode, disable interrupts, MMUs + spr_sr[`OR1K_SPR_SR_SM ] <= 1'b1; + if (FEATURE_TIMER!="NONE") + spr_sr[`OR1K_SPR_SR_TEE ] <= 1'b0; + if (FEATURE_PIC!="NONE") + spr_sr[`OR1K_SPR_SR_IEE ] <= 1'b0; + if (FEATURE_DMMU!="NONE") + spr_sr[`OR1K_SPR_SR_DME ] <= 1'b0; + if (FEATURE_IMMU!="NONE") + spr_sr[`OR1K_SPR_SR_IME ] <= 1'b0; + if (FEATURE_DSX!="NONE") + spr_sr[`OR1K_SPR_SR_DSX ] <= ctrl_delay_slot; + if (FEATURE_OVERFLOW!="NONE") + spr_sr[`OR1K_SPR_SR_OVE ] <= 1'b0; + end + else if ((spr_we & spr_access[`OR1K_SPR_SYS_BASE] & + (spr_sr[`OR1K_SPR_SR_SM] & padv_ctrl | du_access)) && + `SPR_OFFSET(spr_addr)==`SPR_OFFSET(`OR1K_SPR_SR_ADDR)) + begin + spr_sr[`OR1K_SPR_SR_SM ] <= spr_write_dat[`OR1K_SPR_SR_SM ]; + + spr_sr[`OR1K_SPR_SR_F ] <= spr_write_dat[`OR1K_SPR_SR_F ]; + + if (FEATURE_TIMER!="NONE") + spr_sr[`OR1K_SPR_SR_TEE ] <= spr_write_dat[`OR1K_SPR_SR_TEE ]; + + if (FEATURE_PIC!="NONE") + spr_sr[`OR1K_SPR_SR_IEE ] <= spr_write_dat[`OR1K_SPR_SR_IEE ]; + + if (FEATURE_DATACACHE!="NONE") + spr_sr[`OR1K_SPR_SR_DCE ] <= spr_write_dat[`OR1K_SPR_SR_DCE ]; + + if (FEATURE_INSTRUCTIONCACHE!="NONE") + spr_sr[`OR1K_SPR_SR_ICE ] <= spr_write_dat[`OR1K_SPR_SR_ICE ]; + + if (FEATURE_DMMU!="NONE") + spr_sr[`OR1K_SPR_SR_DME ] <= spr_write_dat[`OR1K_SPR_SR_DME ]; + + if (FEATURE_IMMU!="NONE") + spr_sr[`OR1K_SPR_SR_IME ] <= spr_write_dat[`OR1K_SPR_SR_IME ]; + + if (FEATURE_FASTCONTEXTS!="NONE") + spr_sr[`OR1K_SPR_SR_CE ] <= spr_write_dat[`OR1K_SPR_SR_CE ]; + + if (FEATURE_CARRY_FLAG!="NONE") + spr_sr[`OR1K_SPR_SR_CY] <= spr_write_dat[`OR1K_SPR_SR_CY]; + + if (FEATURE_OVERFLOW!="NONE") begin + spr_sr[`OR1K_SPR_SR_OV ] <= spr_write_dat[`OR1K_SPR_SR_OV ]; + spr_sr[`OR1K_SPR_SR_OVE ] <= spr_write_dat[`OR1K_SPR_SR_OVE ]; + end + + if (FEATURE_DSX!="NONE") + spr_sr[`OR1K_SPR_SR_DSX ] <= spr_write_dat[`OR1K_SPR_SR_DSX ]; + + spr_sr[`OR1K_SPR_SR_EPH ] <= spr_write_dat[`OR1K_SPR_SR_EPH ]; + end + else if (padv_ctrl) + begin + spr_sr[`OR1K_SPR_SR_F ] <= ctrl_flag_set ? 1 : + ctrl_flag_clear ? 0 : + spr_sr[`OR1K_SPR_SR_F ]; + + if (FEATURE_CARRY_FLAG!="NONE") + spr_sr[`OR1K_SPR_SR_CY] <= ctrl_carry_set_i ? 1 : + ctrl_carry_clear_i ? 0 : + spr_sr[`OR1K_SPR_SR_CY]; + if (FEATURE_OVERFLOW!="NONE") + spr_sr[`OR1K_SPR_SR_OV ] <= ctrl_overflow_set_i ? 1 : + ctrl_overflow_clear_i ? 0 : + spr_sr[`OR1K_SPR_SR_OV ]; + // Skip FO. TODO: make this even more selective. + if (ctrl_op_rfe_i) + spr_sr[14:0] <= spr_esr[14:0]; + end + + + // Exception SR + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_esr <= SPR_SR_RESET_VALUE; + else if (exception_re) + begin + spr_esr <= spr_sr; + if (FEATURE_OVERFLOW!="NONE") + begin + if (ctrl_overflow_set_i) + spr_esr[`OR1K_SPR_SR_OV] <= 1'b1; + else if (ctrl_overflow_clear_i) + spr_esr[`OR1K_SPR_SR_OV] <= 1'b0; + end + if (FEATURE_CARRY_FLAG!="NONE") begin + if (ctrl_carry_set_i) + spr_esr[`OR1K_SPR_SR_CY] <= 1'b1; + else if (ctrl_carry_clear_i) + spr_esr[`OR1K_SPR_SR_CY] <= 1'b0; + end + end + else if (spr_we && spr_access[`OR1K_SPR_SYS_BASE] && + `SPR_OFFSET(spr_addr)==`SPR_OFFSET(`OR1K_SPR_ESR0_ADDR)) + begin + spr_esr[`OR1K_SPR_SR_SM ] <= spr_write_dat[`OR1K_SPR_SR_SM ]; + + spr_esr[`OR1K_SPR_SR_F ] <= spr_write_dat[`OR1K_SPR_SR_F ]; + + if (FEATURE_TIMER!="NONE") + spr_esr[`OR1K_SPR_SR_TEE ] <= spr_write_dat[`OR1K_SPR_SR_TEE ]; + + if (FEATURE_PIC!="NONE") + spr_esr[`OR1K_SPR_SR_IEE ] <= spr_write_dat[`OR1K_SPR_SR_IEE ]; + + if (FEATURE_DATACACHE!="NONE") + spr_esr[`OR1K_SPR_SR_DCE ] <= spr_write_dat[`OR1K_SPR_SR_DCE ]; + + if (FEATURE_INSTRUCTIONCACHE!="NONE") + spr_esr[`OR1K_SPR_SR_ICE ] <= spr_write_dat[`OR1K_SPR_SR_ICE ]; + + if (FEATURE_DMMU!="NONE") + spr_esr[`OR1K_SPR_SR_DME ] <= spr_write_dat[`OR1K_SPR_SR_DME ]; + + if (FEATURE_IMMU!="NONE") + spr_esr[`OR1K_SPR_SR_IME ] <= spr_write_dat[`OR1K_SPR_SR_IME ]; + + if (FEATURE_FASTCONTEXTS!="NONE") + spr_esr[`OR1K_SPR_SR_CE ] <= spr_write_dat[`OR1K_SPR_SR_CE ]; + + if (FEATURE_CARRY_FLAG!="NONE") + spr_esr[`OR1K_SPR_SR_CY] <= spr_write_dat[`OR1K_SPR_SR_CY]; + + if (FEATURE_OVERFLOW!="NONE") begin + spr_esr[`OR1K_SPR_SR_OV ] <= spr_write_dat[`OR1K_SPR_SR_OV ]; + spr_esr[`OR1K_SPR_SR_OVE ] <= spr_write_dat[`OR1K_SPR_SR_OVE ]; + end + + if (FEATURE_DSX!="NONE") + spr_esr[`OR1K_SPR_SR_DSX ] <= spr_write_dat[`OR1K_SPR_SR_DSX ]; + + spr_esr[`OR1K_SPR_SR_EPH ] <= spr_write_dat[`OR1K_SPR_SR_EPH ]; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + ctrl_bubble_o <= 0; + else if (padv_execute_o) + ctrl_bubble_o <= execute_bubble_i; + + // Exception PC + always @(posedge clk) + if (exception_re) begin + if (except_ibus_err_i) + spr_epcr <= last_branch_insn_pc; + // Syscall is a special case, we return back to the instruction _after_ + // the syscall instruction, unless the syscall was in a delay slot + else if (except_syscall_i) + spr_epcr <= ctrl_delay_slot ? ctrl_epcr_o : pc_ctrl_i + 4; + else if (store_buffer_err_i) + spr_epcr <= store_buffer_epcr_i; + // Update EPCR unless we are handing over to the debug unit hardware + // i.e. single stepping. + else if (!(except_trap_i & stall_on_trap)) + spr_epcr <= ctrl_epcr_o; + end else if (spr_we && spr_access[`OR1K_SPR_SYS_BASE] && + `SPR_OFFSET(spr_addr)==`SPR_OFFSET(`OR1K_SPR_EPCR0_ADDR)) begin + spr_epcr <= spr_write_dat; + end + + // Exception Effective Address + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_eear <= {OPTION_OPERAND_WIDTH{1'b0}}; + else if (/*padv_ctrl & exception*/ exception_re) + begin + if (except_ibus_err_i | except_itlb_miss_i | except_ipagefault_i) + spr_eear <= pc_ctrl_i; + else + spr_eear <= ctrl_lsu_adr_i; + end + + // Track the PC + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_ppc <= OPTION_RESET_PC; + else if (padv_ctrl) + spr_ppc <= pc_ctrl_i; + + // Generate the NPC for SPR accesses + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_npc <= OPTION_RESET_PC; + else if (du_npc_write) + spr_npc <= du_dat_i; + else if (du_npc_written) + spr_npc <= spr_npc; + else if (stepping) begin + if (stepped_into_rfe) + spr_npc <= spr_epcr; + else if (stepped_into_delay_slot) + spr_npc <= last_branch_target_pc; + else if (stepped_into_exception) + spr_npc <= exception_pc_addr; + else + spr_npc <= pc_ctrl_i + 4; + end else if (stall_on_trap & padv_ctrl & except_trap_i) + spr_npc <= pc_ctrl_i; + else if (cpu_stall & padv_ctrl) + spr_npc <= ctrl_delay_slot ? pc_ctrl_i - 4 : pc_ctrl_i; + else if (!cpu_stall) + spr_npc <= pc_execute_i; + + // Exception Vector Address + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_evbar <= {OPTION_OPERAND_WIDTH{1'b0}}; + else if (spr_we && spr_access[`OR1K_SPR_SYS_BASE] && + `SPR_OFFSET(spr_addr)==`SPR_OFFSET(`OR1K_SPR_EVBAR_ADDR)) + spr_evbar <= {spr_write_dat[OPTION_OPERAND_WIDTH-1:13], 13'd0}; + + // Remember when we're in a delay slot in execute stage. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_delay_slot <= 0; + else if (padv_execute_o) + execute_delay_slot <= execute_op_branch_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + ctrl_delay_slot <= 0; + else if (padv_execute_o) + ctrl_delay_slot <= execute_delay_slot; + + mor1kx_cfgrs + #(.FEATURE_PIC (FEATURE_PIC), + .FEATURE_TIMER (FEATURE_TIMER), + .OPTION_PIC_TRIGGER (OPTION_PIC_TRIGGER), + .FEATURE_DSX (FEATURE_DSX), + .FEATURE_FASTCONTEXTS (FEATURE_FASTCONTEXTS), + .OPTION_RF_NUM_SHADOW_GPR (OPTION_RF_NUM_SHADOW_GPR), + .FEATURE_OVERFLOW (FEATURE_OVERFLOW), + .FEATURE_DATACACHE (FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH (OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH (OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS (OPTION_DCACHE_WAYS), + .FEATURE_DMMU (FEATURE_DMMU), + .OPTION_DMMU_SET_WIDTH (OPTION_DMMU_SET_WIDTH), + .OPTION_DMMU_WAYS (OPTION_DMMU_WAYS), + .FEATURE_INSTRUCTIONCACHE (FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH (OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH (OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS (OPTION_ICACHE_WAYS), + .FEATURE_IMMU (FEATURE_IMMU), + .OPTION_IMMU_SET_WIDTH (OPTION_IMMU_SET_WIDTH), + .OPTION_IMMU_WAYS (OPTION_IMMU_WAYS), + .FEATURE_DEBUGUNIT (FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS (FEATURE_PERFCOUNTERS), + .OPTION_PERFCOUNTERS_NUM (OPTION_PERFCOUNTERS_NUM), + .FEATURE_MAC (FEATURE_MAC), + .FEATURE_FPU (FEATURE_FPU), // mor1kx_cfgrs instance + .FEATURE_SYSCALL (FEATURE_SYSCALL), + .FEATURE_TRAP (FEATURE_TRAP), + .FEATURE_RANGE (FEATURE_RANGE), + .FEATURE_DELAYSLOT ("ENABLED"), + .FEATURE_EVBAR ("ENABLED") + ) + mor1kx_cfgrs + (/*AUTOINST*/ + // Outputs + .spr_vr (spr_vr[31:0]), + .spr_vr2 (spr_vr2[31:0]), + .spr_upr (spr_upr[31:0]), + .spr_cpucfgr (spr_cpucfgr[31:0]), + .spr_dmmucfgr (spr_dmmucfgr[31:0]), + .spr_immucfgr (spr_immucfgr[31:0]), + .spr_dccfgr (spr_dccfgr[31:0]), + .spr_iccfgr (spr_iccfgr[31:0]), + .spr_dcfgr (spr_dcfgr[31:0]), + .spr_pccfgr (spr_pccfgr[31:0]), + .spr_avr (spr_avr[31:0])); + + /* Implementation-specific registers */ + assign spr_isr[0] = 0; + assign spr_isr[1] = 0; + assign spr_isr[2] = 0; + assign spr_isr[3] = 0; + assign spr_isr[4] = 0; + assign spr_isr[5] = 0; + assign spr_isr[6] = 0; + assign spr_isr[7] = 0; + + // System group (0) SPR data out + always @* begin + spr_sys_group_read = 0; + if (spr_access[`OR1K_SPR_SYS_BASE]) + case(`SPR_OFFSET(spr_addr)) + `SPR_OFFSET(`OR1K_SPR_VR_ADDR): + spr_sys_group_read = spr_vr; + `SPR_OFFSET(`OR1K_SPR_VR2_ADDR): + spr_sys_group_read = {spr_vr2[31:8], `MOR1KX_PIPEID_CAPPUCCINO}; + `SPR_OFFSET(`OR1K_SPR_AVR_ADDR): + spr_sys_group_read = spr_avr; + `SPR_OFFSET(`OR1K_SPR_UPR_ADDR): + spr_sys_group_read = spr_upr; + `SPR_OFFSET(`OR1K_SPR_CPUCFGR_ADDR): + spr_sys_group_read = spr_cpucfgr; + `SPR_OFFSET(`OR1K_SPR_DMMUCFGR_ADDR): + spr_sys_group_read = spr_dmmucfgr; + `SPR_OFFSET(`OR1K_SPR_IMMUCFGR_ADDR): + spr_sys_group_read = spr_immucfgr; + `SPR_OFFSET(`OR1K_SPR_DCCFGR_ADDR): + spr_sys_group_read = spr_dccfgr; + `SPR_OFFSET(`OR1K_SPR_ICCFGR_ADDR): + spr_sys_group_read = spr_iccfgr; + `SPR_OFFSET(`OR1K_SPR_DCFGR_ADDR): + spr_sys_group_read = spr_dcfgr; + `SPR_OFFSET(`OR1K_SPR_PCCFGR_ADDR): + spr_sys_group_read = spr_pccfgr; + `SPR_OFFSET(`OR1K_SPR_NPC_ADDR): + spr_sys_group_read = spr_npc; + `SPR_OFFSET(`OR1K_SPR_SR_ADDR): + spr_sys_group_read = {{(OPTION_OPERAND_WIDTH-SPR_SR_WIDTH){1'b0}}, + spr_sr}; + + `SPR_OFFSET(`OR1K_SPR_PPC_ADDR): + spr_sys_group_read = spr_ppc; + `ifdef OR1K_FPCSR_MASK_FLAGS + `SPR_OFFSET(`OR1K_SPR_FPCSR_ADDR): + spr_sys_group_read = + {{(OPTION_OPERAND_WIDTH-`OR1K_FPCSR_WIDTH-`OR1K_FPCSR_ALLF_SIZE){1'b0}}, + spr_fpcsr_mf,spr_fpcsr}; + `else + `SPR_OFFSET(`OR1K_SPR_FPCSR_ADDR): + spr_sys_group_read = {{(OPTION_OPERAND_WIDTH-`OR1K_FPCSR_WIDTH){1'b0}}, + spr_fpcsr}; + `endif + `SPR_OFFSET(`OR1K_SPR_EPCR0_ADDR): + spr_sys_group_read = spr_epcr; + `SPR_OFFSET(`OR1K_SPR_EEAR0_ADDR): + spr_sys_group_read = spr_eear; + `SPR_OFFSET(`OR1K_SPR_ESR0_ADDR): + spr_sys_group_read = {{(OPTION_OPERAND_WIDTH-SPR_SR_WIDTH){1'b0}}, + spr_esr}; + `SPR_OFFSET(`OR1K_SPR_EVBAR_ADDR): + spr_sys_group_read = spr_evbar; + `SPR_OFFSET(`OR1K_SPR_ISR0_ADDR): + spr_sys_group_read = spr_isr[0]; + `SPR_OFFSET(`OR1K_SPR_ISR0_ADDR) +1: + spr_sys_group_read = spr_isr[1]; + `SPR_OFFSET(`OR1K_SPR_ISR0_ADDR) +2: + spr_sys_group_read = spr_isr[2]; + `SPR_OFFSET(`OR1K_SPR_ISR0_ADDR) +3: + spr_sys_group_read = spr_isr[3]; + `SPR_OFFSET(`OR1K_SPR_ISR0_ADDR) +4: + spr_sys_group_read = spr_isr[4]; + `SPR_OFFSET(`OR1K_SPR_ISR0_ADDR) +5: + spr_sys_group_read = spr_isr[5]; + `SPR_OFFSET(`OR1K_SPR_ISR0_ADDR) +6: + spr_sys_group_read = spr_isr[6]; + `SPR_OFFSET(`OR1K_SPR_ISR0_ADDR) +7: + spr_sys_group_read = spr_isr[7]; + + `SPR_OFFSET(`OR1K_SPR_COREID_ADDR): + // If the multicore feature is activated this address returns the + // core identifier, 0 otherwise + spr_sys_group_read = (FEATURE_MULTICORE!="NONE") ? + multicore_coreid_i : 0; + `SPR_OFFSET(`OR1K_SPR_NUMCORES_ADDR): + // If the multicore feature is activated this address returns the + // core identifier, 0 otherwise + spr_sys_group_read = (FEATURE_MULTICORE!="NONE") ? + multicore_numcores_i : 0; + + default: + // GPR read + if (spr_addr[10:9] == 2'h2) + spr_sys_group_read = spr_gpr_dat_i; // Register file + endcase + end + + /* System group read data MUX in */ + assign spr_internal_read_dat[`OR1K_SPR_SYS_BASE] = spr_sys_group_read; + /* System group ack generation */ + + assign spr_access_ack[`OR1K_SPR_SYS_BASE] = spr_access[`OR1K_SPR_SYS_BASE] & + ((spr_addr[10:9] == 2'h2) ? + spr_gpr_ack_i : 1); + + // + // Generate data to the register file for mfspr operations + // Read datas are simply ORed since set to 0 when not + // concerned by spr access. + // + assign mfspr_dat_o = spr_internal_read_dat[`OR1K_SPR_SYS_BASE] | + spr_internal_read_dat[`OR1K_SPR_DMMU_BASE] | + spr_internal_read_dat[`OR1K_SPR_IMMU_BASE] | + spr_internal_read_dat[`OR1K_SPR_DC_BASE] | + spr_internal_read_dat[`OR1K_SPR_IC_BASE] | + spr_internal_read_dat[`OR1K_SPR_MAC_BASE] | + spr_internal_read_dat[`OR1K_SPR_DU_BASE] | + spr_internal_read_dat[`OR1K_SPR_PC_BASE] | + spr_internal_read_dat[`OR1K_SPR_PM_BASE] | + spr_internal_read_dat[`OR1K_SPR_PIC_BASE] | + spr_internal_read_dat[`OR1K_SPR_TT_BASE] | + spr_internal_read_dat[`OR1K_SPR_FPU_BASE]; + + // PIC SPR control + generate + + if (FEATURE_PIC !="NONE") begin : pic + + /* mor1kx_pic AUTO_TEMPLATE ( + .spr_picsr_o (spr_picsr), + .spr_picmr_o (spr_picmr), + .spr_bus_ack (spr_access_ack[`OR1K_SPR_PIC_BASE]), + .spr_dat_o (spr_internal_read_dat[`OR1K_SPR_PIC_BASE]), + // Inputs + .spr_we_i (spr_we), + .spr_access_i (spr_access[`OR1K_SPR_PIC_BASE]) + .spr_addr_i (spr_addr), + .spr_dat_i (spr_write_dat), + );*/ + mor1kx_pic + #( + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH) + ) + mor1kx_pic + (/*AUTOINST*/ + // Outputs + .spr_picmr_o (spr_picmr), // Templated + .spr_picsr_o (spr_picsr), // Templated + .spr_bus_ack (spr_access_ack[`OR1K_SPR_PIC_BASE]), // Templated + .spr_dat_o (spr_internal_read_dat[`OR1K_SPR_PIC_BASE]), // Templated + // Inputs + .clk (clk), + .rst (rst), + .irq_i (irq_i[31:0]), + .spr_access_i (spr_access[`OR1K_SPR_PIC_BASE]), // Templated + .spr_we_i (spr_we), // Templated + .spr_addr_i (spr_addr), // Templated + .spr_dat_i (spr_write_dat)); // Templated + + + assign except_pic = (|spr_picsr) & spr_sr[`OR1K_SPR_SR_IEE] & + !ctrl_op_mtspr_i & !doing_rfe; + end + else begin + assign except_pic = 0; + assign spr_picsr = 0; + assign spr_picmr = 0; + assign spr_access_ack[`OR1K_SPR_PIC_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_PIC_BASE] = 0; + end // else: !if(FEATURE_PIC !="NONE") + endgenerate + + // PCU SPR control + wire dchache_miss = !dcache_hit_i & ((execute_op_lsu_load_i | execute_op_lsu_store_i) & padv_execute_o); + generate + if (FEATURE_PERFCOUNTERS !="NONE") begin : pcu + + /* mor1kx_pcu AUTO_TEMPLATE ( + .spr_bus_ack (spr_access_ack[`OR1K_SPR_PC_BASE]), + .spr_dat_o (spr_internal_read_dat[`OR1K_SPR_PC_BASE]), + // Inputs + .spr_we_i (spr_we), + .spr_re_i (spr_read), + .spr_access_i (spr_access[`OR1K_SPR_PC_BASE]) + .spr_addr_i (spr_addr), + .spr_dat_i (spr_write_dat), + );*/ + mor1kx_pcu + #( + .OPTION_PERFCOUNTERS_NUM(OPTION_PERFCOUNTERS_NUM) + ) + mor1kx_pcu + (/*AUTOINST*/ + // Outputs + .spr_bus_ack (spr_access_ack[`OR1K_SPR_PC_BASE]), // Templated + .spr_dat_o (spr_internal_read_dat[`OR1K_SPR_PC_BASE]), // Templated + // Inputs + .clk (clk), + .rst (rst), + .spr_access_i (spr_access[`OR1K_SPR_PC_BASE]), // Templated + .spr_we_i (spr_we), // Templated + .spr_re_i (spr_read), // Templated + .spr_addr_i (spr_addr), // Templated + .spr_dat_i (spr_write_dat), // Templated + .spr_sys_mode_i (spr_sr[`OR1K_SPR_SR_SM]), + .pcu_event_load_i (execute_op_lsu_load_i & padv_execute_o), + .pcu_event_store_i (execute_op_lsu_store_i & padv_execute_o), + .pcu_event_ifetch_i (fetch_valid_i), + .pcu_event_dcache_miss_i(dchache_miss), + .pcu_event_icache_miss_i(!icache_hit_i & !waiting_for_fetch), + .pcu_event_ifetch_stall_i(!padv_fetch_o), + .pcu_event_lsu_stall_i(!ctrl_valid_i), + .pcu_event_brn_stall_i(branch_mispredict_i & padv_decode_o), + .pcu_event_dtlb_miss_i(except_dtlb_miss_i), + .pcu_event_itlb_miss_i(except_itlb_miss_i), + .pcu_event_datadep_stall_i(execute_waiting) + ); + end + else begin + assign spr_access_ack[`OR1K_SPR_PC_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_PC_BASE] = 0; + end // else: !if(FEATURE_PERFCOUNTERS !="NONE") + endgenerate + + generate + if (FEATURE_TIMER!="NONE") begin : tt + + /* mor1kx_ticktimer AUTO_TEMPLATE ( + .spr_ttmr_o (spr_ttmr), + .spr_ttcr_o (spr_ttcr), + .spr_bus_ack (spr_access_ack[`OR1K_SPR_TT_BASE]), + .spr_dat_o (spr_internal_read_dat[`OR1K_SPR_TT_BASE]), + // Inputs + .spr_access_i (spr_access[`OR1K_SPR_TT_BASE]), + .spr_we_i (spr_we), + .spr_addr_i (spr_addr), + .spr_dat_i (spr_write_dat), + );*/ + mor1kx_ticktimer mor1kx_ticktimer + (/*AUTOINST*/ + // Outputs + .spr_ttmr_o (spr_ttmr), // Templated + .spr_ttcr_o (spr_ttcr), // Templated + .spr_bus_ack (spr_access_ack[`OR1K_SPR_TT_BASE]), // Templated + .spr_dat_o (spr_internal_read_dat[`OR1K_SPR_TT_BASE]), // Templated + // Inputs + .clk (clk), + .rst (rst), + .spr_access_i (spr_access[`OR1K_SPR_TT_BASE]), // Templated + .spr_we_i (spr_we), // Templated + .spr_addr_i (spr_addr), // Templated + .spr_dat_i (spr_write_dat)); // Templated + + assign except_ticktimer = spr_ttmr[28] & spr_sr[`OR1K_SPR_SR_TEE] & + !ctrl_op_mtspr_i & !doing_rfe; + + end // if (FEATURE_TIMER!="NONE") + else begin + assign except_ticktimer = 0; + assign spr_ttmr = 0; + assign spr_ttcr = 0; + assign spr_access_ack[`OR1K_SPR_TT_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_TT_BASE] = 0; + end // else: !if(FEATURE_TIMER!="NONE") + endgenerate + + /* SPR access control - allow accesses from either the instructions or from + the debug interface */ + assign spr_read_access = (ctrl_op_mfspr_i | (du_access & !du_we_i)); + assign spr_write_access = (ctrl_op_mtspr_i | (du_access & du_we_i)); + + assign spr_write_dat = du_access ? du_dat_i : b; + assign spr_we = spr_write_access & spr_access_valid; + assign spr_read = spr_read_access & spr_access_valid; + + /* A bus out to other units that live outside of the control unit */ + assign spr_bus_addr_o = spr_addr; + assign spr_bus_we_o = spr_write_access & spr_access_valid & spr_bus_access; + assign spr_bus_stb_o = (spr_read_access | spr_write_access) & + spr_access_valid & spr_bus_access; + assign spr_bus_dat_o = spr_write_dat; + + assign spr_group = spr_addr[14:11]; + + /* Select spr */ + always @(*) begin + spr_access = 0; + case(spr_group) + // System group + `OR1K_SPR_SYS_BASE: + spr_access[`OR1K_SPR_SYS_BASE] = 1'b1; + // DMMU + `OR1K_SPR_DMMU_BASE: + spr_access[`OR1K_SPR_DMMU_BASE] = (FEATURE_DMMU!="NONE"); + // IMMU + `OR1K_SPR_IMMU_BASE: + spr_access[`OR1K_SPR_IMMU_BASE] = (FEATURE_IMMU!="NONE"); + // Data cache + `OR1K_SPR_DC_BASE: + spr_access[`OR1K_SPR_DC_BASE] = (FEATURE_DATACACHE!="NONE"); + // Instruction cache + `OR1K_SPR_IC_BASE: + spr_access[`OR1K_SPR_IC_BASE] = (FEATURE_INSTRUCTIONCACHE!= "NONE"); + // MAC unit + `OR1K_SPR_MAC_BASE: + spr_access[`OR1K_SPR_MAC_BASE] = (FEATURE_MAC!="NONE"); + // Debug unit + `OR1K_SPR_DU_BASE: + spr_access[`OR1K_SPR_DU_BASE] = (FEATURE_DEBUGUNIT!="NONE"); + // Performance counters + `OR1K_SPR_PC_BASE: + spr_access[`OR1K_SPR_PC_BASE] = (FEATURE_PERFCOUNTERS!="NONE"); + // Power Management + `OR1K_SPR_PM_BASE: + spr_access[`OR1K_SPR_PM_BASE] = (FEATURE_PMU!="NONE"); + // PIC + `OR1K_SPR_PIC_BASE: + spr_access[`OR1K_SPR_PIC_BASE] = (FEATURE_PIC!="NONE"); + // Tick timer + `OR1K_SPR_TT_BASE: + spr_access[`OR1K_SPR_TT_BASE] = (FEATURE_TIMER!="NONE"); + // FPU + `OR1K_SPR_FPU_BASE: + spr_access[`OR1K_SPR_FPU_BASE] = (FEATURE_FPU!="NONE"); + /* generate invalid if the group is not present in the design */ + default: + spr_access = 0; + endcase + end + + // Is the SPR in the design? + assign spr_access_valid = |spr_access; + + assign spr_ack = (|spr_access_ack) | !spr_access_valid; + + /* Is a SPR bus access needed, or is the requested SPR in this file? */ + assign spr_bus_access = /* Any of the units we don't have in this file */ + /* System group */ + !(spr_access[`OR1K_SPR_SYS_BASE] || + /* Debug Group */ + spr_access[`OR1K_SPR_DU_BASE] || + /* PIC Group */ + spr_access[`OR1K_SPR_PIC_BASE] || + /* Tick Group */ + spr_access[`OR1K_SPR_TT_BASE]) || + // GPR + (spr_access[`OR1K_SPR_SYS_BASE] && + spr_addr[10:9]==2'h2); + + generate + if (FEATURE_DEBUGUNIT!="NONE") begin : du + + reg [OPTION_OPERAND_WIDTH-1:0] du_read_dat; + + reg du_ack; + reg du_stall_r; + reg [1:0] branch_step; + + assign du_access = du_stb_i; + + // Generate ack back to the debug interface bus + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_ack <= 0; + else if (du_ack) + du_ack <= 0; + else if (du_stb_i) begin + du_ack <= spr_ack; + end + + assign du_ack_o = du_ack; + + /* Data back to the debug bus */ + always @(posedge clk) + du_read_dat <= mfspr_dat_o; + + assign du_dat_o = du_read_dat; + + always @(posedge clk) + if (rst) + cpu_stall <= 0; + else if (!du_stall_i) + cpu_stall <= 0; + else if (padv_execute_o & !execute_bubble_i & du_stall_i | + du_stall_o) + cpu_stall <= 1; + + /* goes out to the debug interface and comes back 1 cycle later + via du_stall_i */ + assign du_stall_o = stepping & pstep[4] | + (stall_on_trap & padv_ctrl & except_trap_i); + + /* Pulse to indicate we're restarting after a stall */ + assign du_restart_from_stall = du_stall_r & !du_stall_i; + + /* NPC debug control logic */ + assign du_npc_write = (du_we_i && du_addr_i==`OR1K_SPR_NPC_ADDR && + du_ack_o); + + /* Pick the traps-cause-stall bit out of the DSR */ + assign stall_on_trap = spr_dsr[`OR1K_SPR_DSR_TE]; + + /* record if NPC was written while we were stalled. + If so, we will use this value for restarting */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_npc_written <= 0; + else if (du_restart_from_stall) + du_npc_written <= 0; + else if (du_npc_write) + du_npc_written <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + stepped_into_exception <= 0; + else if (du_restart_from_stall) + stepped_into_exception <= 0; + else if (exception & stepping & (padv_ctrl | ctrl_stage_exceptions)) + stepped_into_exception <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + stepped_into_rfe <= 0; + else if (du_restart_from_stall) + stepped_into_rfe <= 0; + else if (stepping & padv_ctrl) + stepped_into_rfe <= ctrl_op_rfe_i; + + assign du_restart_pc_o = spr_npc; + + assign du_restart_o = du_restart_from_stall; + + /* Indicate when we're stepping */ + assign stepping = spr_dmr1[`OR1K_SPR_DMR1_ST] & + spr_dsr[`OR1K_SPR_DSR_TE]; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + pstep <= 0; + else if (du_restart_from_stall & stepping) + pstep <= 6'h1; + else if ((pstep[0] & fetch_valid_i) | + /* decode is always single cycle */ + (pstep[1] & padv_decode_o) | + /* execute stage */ + (pstep[2] & (execute_valid_i | ctrl_stage_exceptions)) | + /* ctrl stage */ + (pstep[3] & (ctrl_valid_i | ctrl_stage_exceptions)) | + pstep[4]) + pstep <= {pstep[4:0],1'b0}; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + branch_step <= 0; + else if (du_npc_written) + branch_step <= 0; + else if (stepping & pstep[2]) + branch_step <= {branch_step[0], decode_branch_i}; + else if (!stepping & padv_ctrl) + branch_step <= {branch_step[0], ctrl_delay_slot}; + + assign stepped_into_delay_slot = branch_step[1] & stepping; + + /* Signals for waveform debuging */ + wire [31:0] spr_read_data_group_0; + assign spr_read_data_group_0 = spr_internal_read_dat[0]; + wire [31:0] spr_read_data_group_1; + assign spr_read_data_group_1 = spr_internal_read_dat[1]; + wire [31:0] spr_read_data_group_2; + assign spr_read_data_group_2 = spr_internal_read_dat[2]; + wire [31:0] spr_read_data_group_3; + assign spr_read_data_group_3 = spr_internal_read_dat[3]; + wire [31:0] spr_read_data_group_4; + assign spr_read_data_group_4 = spr_internal_read_dat[4]; + wire [31:0] spr_read_data_group_5; + assign spr_read_data_group_5 = spr_internal_read_dat[5]; + wire [31:0] spr_read_data_group_6; + assign spr_read_data_group_6 = spr_internal_read_dat[6]; + wire [31:0] spr_read_data_group_7; + assign spr_read_data_group_7 = spr_internal_read_dat[7]; + wire [31:0] spr_read_data_group_8; + assign spr_read_data_group_8 = spr_internal_read_dat[8]; + wire [31:0] spr_read_data_group_9; + assign spr_read_data_group_9 = spr_internal_read_dat[9]; + + + /* always single cycle access */ + assign spr_access_ack[`OR1K_SPR_DU_BASE] = spr_access[`OR1K_SPR_DU_BASE]; + assign spr_internal_read_dat[`OR1K_SPR_DU_BASE] = + (spr_addr==`OR1K_SPR_DMR1_ADDR) ? + spr_dmr1 : + (spr_addr==`OR1K_SPR_DMR2_ADDR) ? + spr_dmr2 : + (spr_addr==`OR1K_SPR_DSR_ADDR) ? + spr_dsr : + (spr_addr==`OR1K_SPR_DRR_ADDR) ? + spr_drr : 0; + + /* Put the incoming stall signal through a register to detect FE */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_stall_r <= 0; + else + du_stall_r <= du_stall_i; + + /* DMR1 */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_dmr1 <= 0; + else if (spr_we && spr_addr==`OR1K_SPR_DMR1_ADDR) + spr_dmr1[23:0] <= spr_write_dat[23:0]; + + /* DMR2 */ + always @(posedge clk) + spr_dmr2 <= 0; + + /* DSR */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_dsr <= 0; + else if (spr_we && spr_addr==`OR1K_SPR_DSR_ADDR) + spr_dsr[13:0] <= spr_write_dat[13:0]; + + /* DRR */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_drr <= 0; + else if (spr_we && spr_addr==`OR1K_SPR_DRR_ADDR) + spr_drr[13:0] <= spr_write_dat[13:0]; + else if (stall_on_trap & padv_ctrl & except_trap_i) + spr_drr[`OR1K_SPR_DRR_TE] <= 1; + + end // block: du + else + begin : no_du + assign du_access = 0; + assign du_stall_o = 0; + assign du_ack_o = 0; + assign du_restart_o = 0; + assign du_restart_pc_o = 0; + assign stepping = 0; + assign du_npc_write = 0; + assign stepped_into_delay_slot = 0; + assign du_dat_o = 0; + assign du_restart_from_stall = 0; + assign spr_access_ack[`OR1K_SPR_DU_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_DU_BASE] = 0; + assign stall_on_trap = 0; + always @(posedge clk) + begin + spr_dmr1 <= 0; + spr_dmr2 <= 0; + spr_dsr <= 0; + spr_drr <= 0; + du_npc_written <= 0; + cpu_stall <= 0; + end + end + endgenerate + +// Controls to generate ACKs from units that are external to this module +generate +if (FEATURE_DMMU!="NONE") begin : dmmu_ctrl + assign spr_access_ack[`OR1K_SPR_DMMU_BASE] = spr_bus_ack_dmmu_i & + spr_access[`OR1K_SPR_DMMU_BASE]; + assign spr_internal_read_dat[`OR1K_SPR_DMMU_BASE] = + spr_bus_dat_dmmu_i & + {OPTION_OPERAND_WIDTH{spr_access[`OR1K_SPR_DMMU_BASE]}}; +end else begin + assign spr_access_ack[`OR1K_SPR_DMMU_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_DMMU_BASE] = 0; +end +endgenerate + +generate +if (FEATURE_IMMU!="NONE") begin : immu_ctrl + assign spr_access_ack[`OR1K_SPR_IMMU_BASE] = spr_bus_ack_immu_i & + spr_access[`OR1K_SPR_IMMU_BASE]; + assign spr_internal_read_dat[`OR1K_SPR_IMMU_BASE] = + spr_bus_dat_immu_i & + {OPTION_OPERAND_WIDTH{spr_access[`OR1K_SPR_IMMU_BASE]}}; +end else begin + assign spr_access_ack[`OR1K_SPR_IMMU_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_IMMU_BASE] = 0; +end +endgenerate + +generate +if (FEATURE_DATACACHE!="NONE") begin : datacache_ctrl + assign spr_access_ack[`OR1K_SPR_DC_BASE] = spr_bus_ack_dc_i & + spr_access[`OR1K_SPR_DC_BASE]; + assign spr_internal_read_dat[`OR1K_SPR_DC_BASE] = + spr_bus_dat_dc_i & {OPTION_OPERAND_WIDTH{spr_access[`OR1K_SPR_DC_BASE]}}; +end else begin + assign spr_access_ack[`OR1K_SPR_DC_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_DC_BASE] = 0; +end +endgenerate + +generate +if (FEATURE_INSTRUCTIONCACHE!="NONE") begin : instructioncache_ctrl + assign spr_access_ack[`OR1K_SPR_IC_BASE] = spr_bus_ack_ic_i & + spr_access[`OR1K_SPR_IC_BASE]; + assign spr_internal_read_dat[`OR1K_SPR_IC_BASE] = + spr_bus_dat_ic_i & {OPTION_OPERAND_WIDTH{spr_access[`OR1K_SPR_IC_BASE]}}; +end else begin + assign spr_access_ack[`OR1K_SPR_IC_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_IC_BASE] = 0; +end +endgenerate + +generate +if (FEATURE_MAC!="NONE") begin : mac_ctrl + assign spr_access_ack[`OR1K_SPR_MAC_BASE] = spr_bus_ack_mac_i & + spr_access[`OR1K_SPR_MAC_BASE]; + assign spr_internal_read_dat[`OR1K_SPR_MAC_BASE] = + spr_bus_dat_mac_i & + {OPTION_OPERAND_WIDTH{spr_access[`OR1K_SPR_MAC_BASE]}}; +end else begin + assign spr_access_ack[`OR1K_SPR_MAC_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_MAC_BASE] = 0; +end +endgenerate + +generate +if (FEATURE_PMU!="NONE") begin : pmu_ctrl + assign spr_access_ack[`OR1K_SPR_PM_BASE] = spr_bus_ack_pmu_i & + spr_access[`OR1K_SPR_PM_BASE]; + assign spr_internal_read_dat[`OR1K_SPR_PM_BASE] = + spr_bus_dat_pmu_i & {OPTION_OPERAND_WIDTH{spr_access[`OR1K_SPR_PM_BASE]}}; +end else begin + assign spr_access_ack[`OR1K_SPR_PM_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_PM_BASE] = 0; +end +endgenerate + +generate +if (FEATURE_FPU!="NONE") begin : fpu_ctrl + assign spr_access_ack[`OR1K_SPR_FPU_BASE] = spr_bus_ack_fpu_i; + assign spr_internal_read_dat[`OR1K_SPR_FPU_BASE] = + spr_bus_dat_fpu_i & + {OPTION_OPERAND_WIDTH{spr_access[`OR1K_SPR_FPU_BASE]}}; +end else begin + assign spr_access_ack[`OR1K_SPR_FPU_BASE] = 0; + assign spr_internal_read_dat[`OR1K_SPR_FPU_BASE] = 0; +end +endgenerate + +endmodule // mor1kx_ctrl_cappuccino diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ctrl_espresso.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ctrl_espresso.v new file mode 100644 index 0000000..36bb296 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ctrl_espresso.v @@ -0,0 +1,1453 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx espresso pipeline control unit + + inputs from execute stage + + generate pipeline controls + + manage SPRs + + issue addresses for exceptions to fetch stage + control branches going to fetch stage + + contains tick timer + + contains PIC logic + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_ctrl_espresso + (/*AUTOARG*/ + // Outputs + flag_o, spr_npc_o, spr_ppc_o, mfspr_dat_o, ctrl_mfspr_we_o, + carry_o, pipeline_flush_o, padv_fetch_o, padv_decode_o, + padv_execute_o, fetch_take_exception_branch_o, exception_taken_o, + execute_waiting_o, stepping_o, du_dat_o, du_ack_o, du_stall_o, + du_restart_pc_o, du_restart_o, spr_bus_addr_o, spr_bus_we_o, + spr_bus_stb_o, spr_bus_dat_o, spr_sr_o, ctrl_branch_target_o, + ctrl_branch_occur_o, rf_we_o, + // Inputs + clk, rst, ctrl_alu_result_i, ctrl_rfb_i, ctrl_flag_set_i, + ctrl_flag_clear_i, ctrl_opc_insn_i, pc_fetch_i, fetch_advancing_i, + except_ibus_err_i, except_illegal_i, except_syscall_i, + except_dbus_i, except_trap_i, except_align_i, next_fetch_done_i, + alu_valid_i, lsu_valid_i, op_lsu_load_i, op_lsu_store_i, op_jr_i, + op_jbr_i, irq_i, carry_set_i, carry_clear_i, overflow_set_i, + overflow_clear_i, du_addr_i, du_stb_i, du_dat_i, du_we_i, + du_stall_i, spr_bus_dat_dc_i, spr_bus_ack_dc_i, spr_bus_dat_ic_i, + spr_bus_ack_ic_i, spr_bus_dat_dmmu_i, spr_bus_ack_dmmu_i, + spr_bus_dat_immu_i, spr_bus_ack_immu_i, spr_bus_dat_mac_i, + spr_bus_ack_mac_i, spr_bus_dat_pmu_i, spr_bus_ack_pmu_i, + spr_bus_dat_pcu_i, spr_bus_ack_pcu_i, spr_bus_dat_fpu_i, + spr_bus_ack_fpu_i, multicore_coreid_i, rf_wb_i + ); + + parameter OPTION_OPERAND_WIDTH = 32; + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}; + + parameter FEATURE_SYSCALL = "ENABLED"; + parameter FEATURE_TRAP = "ENABLED"; + parameter FEATURE_RANGE = "ENABLED"; + + parameter FEATURE_DATACACHE = "NONE"; + parameter OPTION_DCACHE_BLOCK_WIDTH = 5; + parameter OPTION_DCACHE_SET_WIDTH = 9; + parameter OPTION_DCACHE_WAYS = 2; + parameter FEATURE_DMMU = "NONE"; + parameter FEATURE_INSTRUCTIONCACHE = "NONE"; + parameter OPTION_ICACHE_BLOCK_WIDTH = 5; + parameter OPTION_ICACHE_SET_WIDTH = 9; + parameter OPTION_ICACHE_WAYS = 2; + parameter FEATURE_IMMU = "NONE"; + parameter FEATURE_TIMER = "ENABLED"; + parameter FEATURE_DEBUGUNIT = "NONE"; + parameter FEATURE_PERFCOUNTERS = "NONE"; + parameter FEATURE_PMU = "NONE"; + parameter FEATURE_MAC = "NONE"; + parameter FEATURE_FPU = "NONE"; + + parameter FEATURE_MULTICORE = "NONE"; + + parameter FEATURE_PIC = "ENABLED"; + parameter OPTION_PIC_TRIGGER = "LEVEL"; + parameter OPTION_PIC_NMI_WIDTH = 0; + + parameter FEATURE_DSX = "NONE"; + parameter FEATURE_FASTCONTEXTS = "NONE"; + parameter FEATURE_OVERFLOW = "NONE"; + + parameter SPR_SR_WIDTH = 16; + parameter SPR_SR_RESET_VALUE = 16'h8001; + + input clk, rst; + + // ALU result - either jump target, SPR address + input [OPTION_OPERAND_WIDTH-1:0] ctrl_alu_result_i; + + // Operand B from RF might be jump address, might be value for SPR + input [OPTION_OPERAND_WIDTH-1:0] ctrl_rfb_i; + + input ctrl_flag_set_i, ctrl_flag_clear_i; + output flag_o; + + output [OPTION_OPERAND_WIDTH-1:0] spr_npc_o; + output [OPTION_OPERAND_WIDTH-1:0] spr_ppc_o; + + input [`OR1K_OPCODE_WIDTH-1:0] ctrl_opc_insn_i; + + // PC of execute stage (NPC) + input [OPTION_OPERAND_WIDTH-1:0] pc_fetch_i; + input fetch_advancing_i; + + + // Exception inputs, registered on output of execute stage + input except_ibus_err_i, + except_illegal_i, + except_syscall_i, except_dbus_i, + except_trap_i, except_align_i; + + // Inputs from two units that can stall proceedings + input next_fetch_done_i; + + input alu_valid_i, lsu_valid_i; + + input op_lsu_load_i, op_lsu_store_i; + input op_jr_i, op_jbr_i; + + // External IRQ lines in + input [31:0] irq_i; + + // SPR data out + output [OPTION_OPERAND_WIDTH-1:0] mfspr_dat_o; + + // WE to RF for l.mfspr + output ctrl_mfspr_we_o; + + // Flag out to branch control, combinatorial + reg flag; + + // Arithmetic flags to and from ALU + output carry_o; + input carry_set_i; + input carry_clear_i; + input overflow_set_i; + input overflow_clear_i; + + // Branch indicator from control unit (l.rfe/exception) + wire ctrl_branch_exception; + // PC out to fetch stage for l.rfe, exceptions + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_branch_except_pc; + + // Clear instructions from decode and fetch stage + output pipeline_flush_o; + + output padv_fetch_o; + output padv_decode_o; + output padv_execute_o; + + // This indicates to the fetch unit only that it should basically interrupt + // whatever it's doing and start fetching the exception + output fetch_take_exception_branch_o; + // This indicates to other parts of the CPU that we've handled an excption + // so can be used to clear exception indication registers + output exception_taken_o; + + output execute_waiting_o; + output stepping_o; + + // Debug bus + input [15:0] du_addr_i; + input du_stb_i; + input [OPTION_OPERAND_WIDTH-1:0] du_dat_i; + input du_we_i; + output [OPTION_OPERAND_WIDTH-1:0] du_dat_o; + output du_ack_o; + // Stall control from debug interface + input du_stall_i; + output du_stall_o; + output [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_o; + output du_restart_o; + + // SPR accesses to external units (cache, mmu, etc.) + output [15:0] spr_bus_addr_o; + output spr_bus_we_o; + output spr_bus_stb_o; + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dc_i; + input spr_bus_ack_dc_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_ic_i; + input spr_bus_ack_ic_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dmmu_i; + input spr_bus_ack_dmmu_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_immu_i; + input spr_bus_ack_immu_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_mac_i; + input spr_bus_ack_mac_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pmu_i; + input spr_bus_ack_pmu_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pcu_i; + input spr_bus_ack_pcu_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_fpu_i; + input spr_bus_ack_fpu_i; + output [15:0] spr_sr_o; + + // The multicore core identifier + input [OPTION_OPERAND_WIDTH-1:0] multicore_coreid_i; + + // Internal signals + reg [SPR_SR_WIDTH-1:0] spr_sr; + reg [SPR_SR_WIDTH-1:0] spr_esr; + reg [OPTION_OPERAND_WIDTH-1:0] spr_epcr; + reg [OPTION_OPERAND_WIDTH-1:0] spr_eear; + + // Programmable Interrupt Control SPRs + wire [31:0] spr_picmr; + wire [31:0] spr_picsr; + + // Tick Timer SPRs + wire [31:0] spr_ttmr; + wire [31:0] spr_ttcr; + + reg [OPTION_OPERAND_WIDTH-1:0] spr_ppc; + reg [OPTION_OPERAND_WIDTH-1:0] spr_npc; + reg execute_delay_slot; + reg delay_slot_rf_we_done; + + output [OPTION_OPERAND_WIDTH-1:0] ctrl_branch_target_o; + + reg execute_go; + wire execute_done; + + reg execute_waiting_r; + + reg decode_execute_halt; + + reg exception_taken; + + reg [OPTION_OPERAND_WIDTH-1:0] last_branch_insn_pc; + reg [OPTION_OPERAND_WIDTH-1:0] last_branch_target_pc; + + reg take_exception; + reg exception_r; + + reg [OPTION_OPERAND_WIDTH-1:0] exception_pc_addr; + + reg waiting_for_fetch; + reg branched_and_waiting_for_fetch; + + reg doing_rfe_r; + wire doing_rfe; + wire deassert_doing_rfe; + + wire exception, exception_pending; + + wire execute_stage_exceptions; + wire decode_stage_exceptions; + + wire exception_re; + + wire except_ticktimer; + wire except_pic; + + wire except_ticktimer_nonsrmasked; + wire except_pic_nonsrmasked; + + wire except_range; + + wire [15:0] spr_addr; + + wire op_mtspr; + wire op_mfspr; + wire op_rfe; + + wire [OPTION_OPERAND_WIDTH-1:0] b; + + wire execute_waiting; + + wire execute_valid; + + wire deassert_decode_execute_halt; + + wire ctrl_branch_occur; + wire new_branch; + output ctrl_branch_occur_o; + output rf_we_o; + input rf_wb_i; + wire except_ibus_align; + wire fetch_advance; + wire rfete; + wire stall_on_trap; + + /* Debug SPRs */ + reg [31:0] spr_dmr1; + reg [31:0] spr_dmr2; + reg [31:0] spr_dsr; + reg [31:0] spr_drr; + + /* DU internal control signals */ + wire du_access; + wire cpu_stall; + wire du_restart_from_stall; + wire [1:0] pstep; + wire stepping; + wire stepped_into_delay_slot; + wire du_npc_write; + reg du_npc_written; + reg [OPTION_OPERAND_WIDTH-1:0] du_spr_npc; + + /* Wires for SPR management */ + wire spr_group_present; + wire [3:0] spr_group; + wire spr_we; + wire spr_read; + wire [OPTION_OPERAND_WIDTH-1:0] spr_write_dat; + wire [11:0] spr_access_ack; + wire [31:0] spr_internal_read_dat [0:12]; + wire spr_read_access; + wire spr_write_access; + wire spr_bus_access; + reg [OPTION_OPERAND_WIDTH-1:0] spr_sys_group_read; + + /* Wires from mor1kx_cfgrs module */ + wire [31:0] spr_vr; + wire [31:0] spr_vr2; + wire [31:0] spr_avr; + wire [31:0] spr_upr; + wire [31:0] spr_cpucfgr; + wire [31:0] spr_dmmucfgr; + wire [31:0] spr_immucfgr; + wire [31:0] spr_dccfgr; + wire [31:0] spr_iccfgr; + wire [31:0] spr_dcfgr; + wire [31:0] spr_pccfgr; + wire [31:0] spr_fpcsr = 0; + wire [31:0] spr_isr [0:7]; + + assign b = ctrl_rfb_i; + + assign ctrl_branch_exception = (exception_r | (op_rfe | doing_rfe)) & + !exception_taken; + assign exception_pending = (except_ibus_err_i | except_ibus_align | + except_illegal_i | except_syscall_i | + except_dbus_i | except_align_i | + except_ticktimer | except_range | + except_pic | except_trap_i ); + + assign exception = exception_pending; + + assign fetch_take_exception_branch_o = (take_exception | op_rfe) & + !stepping; + + assign execute_stage_exceptions = except_dbus_i | except_align_i | + except_range; + assign decode_stage_exceptions = except_trap_i | except_illegal_i; + + assign exception_re = exception & !exception_r & !exception_taken; + + assign deassert_decode_execute_halt = ctrl_branch_occur & + decode_execute_halt; + + assign ctrl_branch_except_pc = (op_rfe | doing_rfe) & !rfete ? spr_epcr : + exception_pc_addr; + + // Exceptions take precedence + assign ctrl_branch_occur = // instruction is branch, and flag is right + (op_jbr_i & + // is l.j or l.jal + (!(|ctrl_opc_insn_i[2:1]) | + // is l.bf/bnf and flag is right + (ctrl_opc_insn_i[2]==flag))) | + (op_jr_i & !(except_ibus_align)); + + assign ctrl_branch_occur_o = // Usual branch signaling + ((ctrl_branch_occur | ctrl_branch_exception) & + fetch_advance) | + // Need to tell the fetch stage to branch + // when it gets the next instruction because + // there was fetch stalls between the branch + // and the delay slot insn + (execute_delay_slot); + + assign ctrl_branch_target_o = ctrl_branch_exception ? + ctrl_branch_except_pc : + // jump or branch? + op_jbr_i ? ctrl_alu_result_i : + ctrl_rfb_i; + + // Do writeback when we register our output to the next stage, or if + // we're doing mfspr + assign rf_we_o = (execute_done & !delay_slot_rf_we_done) & + ((rf_wb_i & !op_mfspr + & !((op_lsu_load_i | op_lsu_store_i) & + except_dbus_i | except_align_i)) | + (op_mfspr)); + + assign except_range = (FEATURE_RANGE!="NONE") ? spr_sr[`OR1K_SPR_SR_OVE] && + (spr_sr[`OR1K_SPR_SR_OV] | overflow_set_i & + execute_done) & !doing_rfe : 0; + + // Check for unaligned jump address from register + assign except_ibus_align = op_jr_i & (|ctrl_rfb_i[1:0]); + + // Return from exception to exception (if pending tick or PIC ints) + assign rfete = (spr_esr[`OR1K_SPR_SR_IEE] & except_pic_nonsrmasked) | + (spr_esr[`OR1K_SPR_SR_TEE] & except_ticktimer_nonsrmasked); + + always @(posedge clk) + if (rst) + exception_pc_addr <= OPTION_RESET_PC; + else if (exception_re | (rfete & execute_done)) + casez( + {except_ibus_err_i, + except_illegal_i, + except_align_i, + except_ibus_align, + except_syscall_i, + except_trap_i, + except_dbus_i, + except_range, + except_pic_nonsrmasked, + except_ticktimer_nonsrmasked + } + ) + 10'b1?????????: + exception_pc_addr <= {19'd0,`OR1K_BERR_VECTOR,8'd0}; + 10'b01????????: + exception_pc_addr <= {19'd0,`OR1K_ILLEGAL_VECTOR,8'd0}; + 10'b001???????, + 10'b0001??????: + exception_pc_addr <= {19'd0,`OR1K_ALIGN_VECTOR,8'd0}; + 10'b00001?????: + exception_pc_addr <= {19'd0,`OR1K_SYSCALL_VECTOR,8'd0}; + 10'b000001????: + exception_pc_addr <= {19'd0,`OR1K_TRAP_VECTOR,8'd0}; + 10'b0000001???: + exception_pc_addr <= {19'd0,`OR1K_BERR_VECTOR,8'd0}; + 10'b00000001??: + exception_pc_addr <= {19'd0,`OR1K_RANGE_VECTOR,8'd0}; + 10'b000000001?: + exception_pc_addr <= {19'd0,`OR1K_INT_VECTOR,8'd0}; + //10'b0000000001: + default: + exception_pc_addr <= {19'd0,`OR1K_TT_VECTOR,8'd0}; + endcase // casex (... + + assign op_mtspr = ctrl_opc_insn_i==`OR1K_OPCODE_MTSPR; + assign op_mfspr = ctrl_opc_insn_i==`OR1K_OPCODE_MFSPR; + assign op_rfe = ctrl_opc_insn_i==`OR1K_OPCODE_RFE; + + reg waiting_for_except_fetch; + always @(posedge clk `OR_ASYNC_RST) + if (rst) + waiting_for_except_fetch <= 0; + else if (waiting_for_except_fetch & next_fetch_done_i) + waiting_for_except_fetch <= 0; + else if (fetch_take_exception_branch_o) + waiting_for_except_fetch <= 1; + + assign fetch_advance = (next_fetch_done_i | except_ibus_err_i) & + !execute_waiting & !cpu_stall & + (!stepping | + (stepping & pstep[0] & !next_fetch_done_i)); + + assign padv_fetch_o = fetch_advance & !exception_pending & !doing_rfe_r & + !cpu_stall; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + take_exception <= 0; + else + take_exception <= (exception_pending | exception_r) & + (fetch_advance | + // Cause exception to always be 'taken' if stepping + (stepping & execute_done) + ) & + // Would like this as only a single pulse + !take_exception; + + reg padv_decode_r; + // Some bits of the pipeline (execute_alu for instance) require a falling + // edge of the decode signal to start work on multi-cycle ops. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + padv_decode_r <= 0; + else + padv_decode_r <= padv_fetch_o; + + assign padv_decode_o = padv_decode_r; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_go <= 0; + else + execute_go <= padv_fetch_o | execute_waiting | + (stepping & next_fetch_done_i); + + assign execute_done = execute_go & !execute_waiting; + + // ALU or LSU stall execution, nothing else can + assign execute_valid = !((op_lsu_load_i | op_lsu_store_i) & !lsu_valid_i | + !alu_valid_i); + + assign execute_waiting = !execute_valid & !waiting_for_fetch; + assign execute_waiting_o = execute_waiting; + + + assign padv_execute_o = execute_done; + + assign spr_addr = du_access ? du_addr_i : ctrl_alu_result_i[15:0]; + assign ctrl_mfspr_we_o = op_mfspr & execute_go; + + // Pipeline flush + assign pipeline_flush_o = (execute_done & op_rfe) | + (exception_re) | + cpu_stall; + + // Flag + always @(posedge clk `OR_ASYNC_RST) + if (rst) + flag <= 0; + else if (execute_done) + flag <= ctrl_flag_clear_i ? 0 : + ctrl_flag_set_i ? 1 : flag; + + assign flag_o = flag; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_waiting_r <= 0; + else if (!execute_waiting) + execute_waiting_r <= 0; + else if (execute_waiting) + execute_waiting_r <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_execute_halt <= 0; + else if (du_restart_from_stall) + decode_execute_halt <= 0; + else if (decode_execute_halt & deassert_decode_execute_halt) + decode_execute_halt <= 0; + else if ((op_rfe | exception) & !decode_execute_halt & !exception_taken) + decode_execute_halt <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + exception_r <= 0; + else if (exception_taken | du_restart_from_stall) + exception_r <= 0; + else if (exception & !exception_r) + exception_r <= 1; + + // Signal to indicate that the incoming exception or l.rfe has been taken + // and we're waiting for it to propagate through the pipeline. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + exception_taken <= 0; + else if (exception_taken) + exception_taken <= 0; + else if (exception_r & take_exception) + exception_taken <= 1; + + assign exception_taken_o = exception_taken; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + last_branch_insn_pc <= 0; + else if (fetch_advance & ctrl_branch_occur) + last_branch_insn_pc <= spr_ppc; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + last_branch_target_pc <= 0; + else if (execute_done & ctrl_branch_occur & stepping) + last_branch_target_pc <= ctrl_branch_target_o; + + // Used to gate execute stage's advance signal in the case where a LSU op has + // finished before the next instruction has been fetched. Typically this + // occurs when not using icache and doing lots of memory accesses. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + waiting_for_fetch <= 0; + else if (next_fetch_done_i) + waiting_for_fetch <= 0; + else if (!execute_waiting & execute_waiting_r & !next_fetch_done_i) + waiting_for_fetch <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + branched_and_waiting_for_fetch <= 0; + else if (exception_re) + branched_and_waiting_for_fetch <= 0; + else if (padv_fetch_o & ctrl_branch_occur_o) + branched_and_waiting_for_fetch <= 1; + else if (branched_and_waiting_for_fetch) + branched_and_waiting_for_fetch <= !next_fetch_done_i; + + + + assign doing_rfe = ((execute_done & op_rfe) | doing_rfe_r) & + !deassert_doing_rfe; + + // Basically, the fetch stage should always take the rfe immediately + assign deassert_doing_rfe = doing_rfe_r; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + doing_rfe_r <= 0; + else if (deassert_doing_rfe) + doing_rfe_r <= 0; + else if (execute_done) + doing_rfe_r <= op_rfe; + + assign spr_sr_o = spr_sr; + + // Supervision register + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_sr <= SPR_SR_RESET_VALUE; + else if (fetch_take_exception_branch_o) + begin + if (op_rfe & !rfete) + begin + spr_sr <= spr_esr; + end + else + begin + // Go into supervisor mode, disable interrupts, MMUs + spr_sr[`OR1K_SPR_SR_SM ] <= 1'b1; + if (FEATURE_TIMER!="NONE") + spr_sr[`OR1K_SPR_SR_TEE ] <= 1'b0; + if (FEATURE_PIC!="NONE") + spr_sr[`OR1K_SPR_SR_IEE ] <= 1'b0; + if (FEATURE_DMMU!="NONE") + spr_sr[`OR1K_SPR_SR_DME ] <= 1'b0; + if (FEATURE_IMMU!="NONE") + spr_sr[`OR1K_SPR_SR_IME ] <= 1'b0; + if (FEATURE_OVERFLOW!="NONE") + spr_sr[`OR1K_SPR_SR_OVE ] <= 1'b0; + end + end + else if (execute_done) + begin + spr_sr[`OR1K_SPR_SR_F ] <= ctrl_flag_set_i ? 1 : + ctrl_flag_clear_i ? 0 : + spr_sr[`OR1K_SPR_SR_F ]; + spr_sr[`OR1K_SPR_SR_CY ] <= carry_set_i ? 1 : + carry_clear_i ? 0 : + spr_sr[`OR1K_SPR_SR_CY ]; + + if (FEATURE_OVERFLOW!="NONE") + spr_sr[`OR1K_SPR_SR_OV ] <= overflow_set_i ? 1 : + overflow_clear_i ? 0 : + spr_sr[`OR1K_SPR_SR_OV ]; + + if ((spr_we & (spr_sr[`OR1K_SPR_SR_SM] | du_access)) && + spr_addr==`OR1K_SPR_SR_ADDR) + begin + spr_sr[`OR1K_SPR_SR_SM ] <= spr_write_dat[`OR1K_SPR_SR_SM ]; + + spr_sr[`OR1K_SPR_SR_F ] <= spr_write_dat[`OR1K_SPR_SR_F ]; + + if (FEATURE_TIMER!="NONE") + spr_sr[`OR1K_SPR_SR_TEE ] <= spr_write_dat[`OR1K_SPR_SR_TEE ]; + + if (FEATURE_PIC!="NONE") + spr_sr[`OR1K_SPR_SR_IEE ] <= spr_write_dat[`OR1K_SPR_SR_IEE ]; + + if (FEATURE_DATACACHE!="NONE") + spr_sr[`OR1K_SPR_SR_DCE ] <= spr_write_dat[`OR1K_SPR_SR_DCE ]; + + if (FEATURE_INSTRUCTIONCACHE!="NONE") + spr_sr[`OR1K_SPR_SR_ICE ] <= spr_write_dat[`OR1K_SPR_SR_ICE ]; + + if (FEATURE_DMMU!="NONE") + spr_sr[`OR1K_SPR_SR_DME ] <= spr_write_dat[`OR1K_SPR_SR_DME ]; + + if (FEATURE_IMMU!="NONE") + spr_sr[`OR1K_SPR_SR_IME ] <= spr_write_dat[`OR1K_SPR_SR_IME ]; + + if (FEATURE_FASTCONTEXTS!="NONE") + spr_sr[`OR1K_SPR_SR_CE ] <= spr_write_dat[`OR1K_SPR_SR_CE ]; + + spr_sr[`OR1K_SPR_SR_CY ] <= spr_write_dat[`OR1K_SPR_SR_CY ]; + + if (FEATURE_OVERFLOW!="NONE") begin + spr_sr[`OR1K_SPR_SR_OV ] <= spr_write_dat[`OR1K_SPR_SR_OV ]; + spr_sr[`OR1K_SPR_SR_OVE ] <= spr_write_dat[`OR1K_SPR_SR_OVE ]; + end + + if (FEATURE_DSX!="NONE") + spr_sr[`OR1K_SPR_SR_DSX ] <= spr_write_dat[`OR1K_SPR_SR_DSX ]; + + spr_sr[`OR1K_SPR_SR_EPH ] <= spr_write_dat[`OR1K_SPR_SR_EPH ]; + + end // if ((spr_we & (spr_sr[`OR1K_SPR_SR_SM] | du_access)) &&... + + /* Need to check for DSX being set on exception entry on execute_done + as the delay slot information is gone after it goes high */ + if (FEATURE_DSX!="NONE") + if (exception_r || exception_re) + spr_sr[`OR1K_SPR_SR_DSX ] <= execute_delay_slot; + + end // if (execute_done) + + assign carry_o = spr_sr[`OR1K_SPR_SR_CY]; + + // Exception SR + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_esr <= SPR_SR_RESET_VALUE; + else if (exception_re) + begin + spr_esr <= spr_sr; + /* + A bit odd, but if we had a l.sf instruction on an exception rising + edge, EPCR will point to the insn past the l.sf but the flag will + not have been saved to the SR properly. So we must put it in here + so it can be restored correctly. + Ditto for the other flags which may have been changed in a similar + fashion. + */ + if (execute_done) + begin + if (ctrl_flag_set_i) + spr_esr[`OR1K_SPR_SR_F ] <= 1'b1; + else if (ctrl_flag_clear_i) + spr_esr[`OR1K_SPR_SR_F ] <= 1'b0; + if (FEATURE_OVERFLOW!="NONE") + begin + if (overflow_set_i) + spr_esr[`OR1K_SPR_SR_OV ] <= 1'b1; + else if (overflow_clear_i) + spr_esr[`OR1K_SPR_SR_OV ] <= 1'b0; + end + if (carry_set_i) + spr_esr[`OR1K_SPR_SR_CY ] <= 1'b1; + else if (carry_clear_i) + spr_esr[`OR1K_SPR_SR_CY ] <= 1'b0; + end + end + else if (spr_we & spr_addr==`OR1K_SPR_ESR0_ADDR) + spr_esr <= spr_write_dat[SPR_SR_WIDTH-1:0]; + + // Exception PC + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_epcr <= OPTION_RESET_PC; + else if (exception_re & !(rfete & (op_rfe | deassert_doing_rfe))) + begin + if (except_ibus_err_i) + spr_epcr <= spr_ppc-4; + else if (except_syscall_i) + // EPCR after syscall is address of next not executed insn. + spr_epcr <= spr_npc; + else if (except_ticktimer | except_pic) + spr_epcr <= branched_and_waiting_for_fetch ? spr_npc : + execute_delay_slot ? spr_ppc-4 : spr_ppc+4; + else if (execute_stage_exceptions | + // Don't update EPCR on software breakpoint + (decode_stage_exceptions & !(stall_on_trap & except_trap_i))) + spr_epcr <= execute_delay_slot ? spr_ppc-4 : spr_ppc; + else if (!(stall_on_trap & except_trap_i)) + spr_epcr <= execute_delay_slot ? spr_ppc-4 : spr_ppc; + end + else if (spr_we && spr_addr==`OR1K_SPR_EPCR0_ADDR) + spr_epcr <= spr_write_dat; + + // Exception Effective Address + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_eear <= {OPTION_OPERAND_WIDTH{1'b0}}; + else if (exception_re) + begin + if (except_ibus_err_i) + spr_eear <= pc_fetch_i; + else + spr_eear <= ctrl_alu_result_i; + end + + // Next PC (NPC) + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_npc <= OPTION_RESET_PC; + else if (deassert_doing_rfe) + spr_npc <= rfete ? exception_pc_addr : spr_epcr; + else if (du_restart_o) + spr_npc <= du_restart_pc_o; + else if (stepping & next_fetch_done_i) + spr_npc <= execute_delay_slot ? last_branch_target_pc : pc_fetch_i; + else if (stepping & exception_r) + spr_npc <= exception_pc_addr; + else if (fetch_advance) + // PC we're now executing + spr_npc <= fetch_take_exception_branch_o ? exception_pc_addr : + ctrl_branch_occur ? ctrl_branch_target_o : pc_fetch_i; + + // Previous PC (PPC) + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_ppc <= OPTION_RESET_PC; + else if (padv_fetch_o | (stepping & next_fetch_done_i)) + spr_ppc <= spr_npc; // PC we've got in execute stage (about to finish) + + assign spr_npc_o = spr_npc; + assign spr_ppc_o = spr_ppc; + + // Remember when we're in a delay slot in execute stage. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_delay_slot <= 0; + else if (execute_done) + execute_delay_slot <= execute_delay_slot ? 0 : + ctrl_branch_occur; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + delay_slot_rf_we_done <= 0; + else + delay_slot_rf_we_done <= rf_we_o & execute_delay_slot; + + mor1kx_cfgrs + #(.FEATURE_PIC (FEATURE_PIC), + .FEATURE_TIMER (FEATURE_TIMER), + .OPTION_PIC_TRIGGER (OPTION_PIC_TRIGGER), + .FEATURE_DSX (FEATURE_DSX), + .FEATURE_FASTCONTEXTS (FEATURE_FASTCONTEXTS), + .FEATURE_OVERFLOW (FEATURE_OVERFLOW), + .FEATURE_DATACACHE (FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH (OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH (OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS (OPTION_DCACHE_WAYS), + .FEATURE_DMMU (FEATURE_DMMU), + .FEATURE_INSTRUCTIONCACHE (FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH (OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH (OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS (OPTION_ICACHE_WAYS), + .FEATURE_IMMU (FEATURE_IMMU), + .FEATURE_DEBUGUNIT (FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS (FEATURE_PERFCOUNTERS), + .FEATURE_MAC (FEATURE_MAC), + .FEATURE_SYSCALL (FEATURE_SYSCALL), + .FEATURE_TRAP (FEATURE_TRAP), + .FEATURE_RANGE (FEATURE_RANGE), + .FEATURE_DELAYSLOT ("ENABLED") + ) + mor1kx_cfgrs + (/*AUTOINST*/ + // Outputs + .spr_vr (spr_vr[31:0]), + .spr_vr2 (spr_vr2[31:0]), + .spr_upr (spr_upr[31:0]), + .spr_cpucfgr (spr_cpucfgr[31:0]), + .spr_dmmucfgr (spr_dmmucfgr[31:0]), + .spr_immucfgr (spr_immucfgr[31:0]), + .spr_dccfgr (spr_dccfgr[31:0]), + .spr_iccfgr (spr_iccfgr[31:0]), + .spr_dcfgr (spr_dcfgr[31:0]), + .spr_pccfgr (spr_pccfgr[31:0]), + .spr_avr (spr_avr[31:0])); + + /* Implementation-specific registers */ + assign spr_isr[0] = 0; + assign spr_isr[1] = 0; + assign spr_isr[2] = 0; + assign spr_isr[3] = 0; + assign spr_isr[4] = 0; + assign spr_isr[5] = 0; + assign spr_isr[6] = 0; + assign spr_isr[7] = 0; + + // System group (0) SPR data out + always @* + case(spr_addr) + `OR1K_SPR_VR_ADDR: + spr_sys_group_read = spr_vr; + `OR1K_SPR_VR2_ADDR: + spr_sys_group_read = {spr_vr2[31:8], `MOR1KX_PIPEID_ESPRESSO}; + `OR1K_SPR_AVR_ADDR: + spr_sys_group_read = spr_avr; + `OR1K_SPR_UPR_ADDR: + spr_sys_group_read = spr_upr; + `OR1K_SPR_CPUCFGR_ADDR: + spr_sys_group_read = spr_cpucfgr; + `OR1K_SPR_DMMUCFGR_ADDR: + spr_sys_group_read = spr_dmmucfgr; + `OR1K_SPR_IMMUCFGR_ADDR: + spr_sys_group_read = spr_immucfgr; + `OR1K_SPR_DCCFGR_ADDR: + spr_sys_group_read = spr_dccfgr; + `OR1K_SPR_ICCFGR_ADDR: + spr_sys_group_read = spr_iccfgr; + `OR1K_SPR_DCFGR_ADDR: + spr_sys_group_read = spr_dcfgr; + `OR1K_SPR_PCCFGR_ADDR: + spr_sys_group_read = spr_pccfgr; + `OR1K_SPR_NPC_ADDR: + spr_sys_group_read = spr_npc; + `OR1K_SPR_SR_ADDR: + spr_sys_group_read = {{(OPTION_OPERAND_WIDTH-SPR_SR_WIDTH){1'b0}}, + spr_sr}; + + `OR1K_SPR_PPC_ADDR: + spr_sys_group_read = spr_ppc; + `OR1K_SPR_FPCSR_ADDR: + spr_sys_group_read = spr_fpcsr; + `OR1K_SPR_EPCR0_ADDR: + spr_sys_group_read = spr_epcr; + `OR1K_SPR_EEAR0_ADDR: + spr_sys_group_read = spr_eear; + `OR1K_SPR_ESR0_ADDR: + spr_sys_group_read = {{(OPTION_OPERAND_WIDTH-SPR_SR_WIDTH){1'b0}}, + spr_esr}; + `OR1K_SPR_ISR0_ADDR: + spr_sys_group_read = spr_isr[0]; + `OR1K_SPR_ISR0_ADDR +1: + spr_sys_group_read = spr_isr[1]; + `OR1K_SPR_ISR0_ADDR +2: + spr_sys_group_read = spr_isr[2]; + `OR1K_SPR_ISR0_ADDR +3: + spr_sys_group_read = spr_isr[3]; + `OR1K_SPR_ISR0_ADDR +4: + spr_sys_group_read = spr_isr[4]; + `OR1K_SPR_ISR0_ADDR +5: + spr_sys_group_read = spr_isr[5]; + `OR1K_SPR_ISR0_ADDR +6: + spr_sys_group_read = spr_isr[6]; + `OR1K_SPR_ISR0_ADDR +7: + spr_sys_group_read = spr_isr[7]; + + `OR1K_SPR_COREID_ADDR: + // If the multicore feature is activated this address returns the + // core identifier, 0 otherwise + spr_sys_group_read = (FEATURE_MULTICORE != "NONE") ? + multicore_coreid_i : 0; + + default: begin + /* GPR read */ + if (spr_addr >= `OR1K_SPR_GPR0_ADDR && + spr_addr <= (`OR1K_SPR_GPR0_ADDR + 32)) + spr_sys_group_read = b; /* Register file */ + else + /* Invalid address - read as zero*/ + spr_sys_group_read = 0; + end + endcase // case (spr_addr) + + /* System group read data MUX in */ + assign spr_internal_read_dat[0] = spr_sys_group_read; + /* System group ack generation */ + /* TODO - might be delay for register file reads! */ + assign spr_access_ack[0] = 1; + + + + /* Generate data to the register file for mfspr operations */ + assign mfspr_dat_o = spr_internal_read_dat[spr_addr[14:11]]; + + // PIC SPR control + generate + if (FEATURE_PIC !="NONE") begin : pic + + /* mor1kx_pic AUTO_TEMPLATE ( + .spr_picsr_o (spr_picsr), + .spr_picmr_o (spr_picmr), + .spr_bus_ack (spr_access_ack[9]), + .spr_dat_o (spr_internal_read_dat[9]), + // Inputs + .spr_we_i (spr_we), + .spr_access_i (1'b1), + .spr_addr_i (spr_addr), + .spr_dat_i (spr_write_dat), + );*/ + mor1kx_pic + #( + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH) + ) + mor1kx_pic + (/*AUTOINST*/ + // Outputs + .spr_picmr_o (spr_picmr), // Templated + .spr_picsr_o (spr_picsr), // Templated + .spr_bus_ack (spr_access_ack[9]), // Templated + .spr_dat_o (spr_internal_read_dat[9]), // Templated + // Inputs + .clk (clk), + .rst (rst), + .irq_i (irq_i[31:0]), + .spr_access_i (1'b1), // Templated + .spr_we_i (spr_we), // Templated + .spr_addr_i (spr_addr), // Templated + .spr_dat_i (spr_write_dat)); // Templated + + assign except_pic_nonsrmasked = (|spr_picsr) & + !op_mtspr & + // Stops back-to-back branch addresses going to + // fetch stage + !ctrl_branch_occur & + // Stops issues with PC when branching + !execute_delay_slot; + + assign except_pic = spr_sr[`OR1K_SPR_SR_IEE] & except_pic_nonsrmasked & + !doing_rfe; + end + else begin + assign except_pic_nonsrmasked = 0; + assign except_pic = 0; + assign spr_picsr = 0; + assign spr_picmr = 0; + assign spr_access_ack[9] = 0; + assign spr_internal_read_dat[9] = 0; + end // else: !if(FEATURE_PIC !="NONE") + endgenerate + + + generate + if (FEATURE_TIMER!="NONE") begin : tt + + /* mor1kx_ticktimer AUTO_TEMPLATE ( + .spr_ttmr_o (spr_ttmr), + .spr_ttcr_o (spr_ttcr), + .spr_bus_ack (spr_access_ack[10]), + .spr_dat_o (spr_internal_read_dat[10]), + // Inputs + .spr_we_i (spr_we), + .spr_access_i (1'b1), + .spr_addr_i (spr_addr), + .spr_dat_i (spr_write_dat), + );*/ + mor1kx_ticktimer mor1kx_ticktimer + (/*AUTOINST*/ + // Outputs + .spr_ttmr_o (spr_ttmr), // Templated + .spr_ttcr_o (spr_ttcr), // Templated + .spr_bus_ack (spr_access_ack[10]), // Templated + .spr_dat_o (spr_internal_read_dat[10]), // Templated + // Inputs + .clk (clk), + .rst (rst), + .spr_access_i (1'b1), // Templated + .spr_we_i (spr_we), // Templated + .spr_addr_i (spr_addr), // Templated + .spr_dat_i (spr_write_dat)); // Templated + + assign except_ticktimer_nonsrmasked = spr_ttmr[28] & + (!op_mtspr & !(spr_esr[`OR1K_SPR_SR_TEE] & execute_done)) & + // Stops back-to-back branch addresses to + // fetch stage. + !ctrl_branch_occur & + // Stops issues with PC when branching + !execute_delay_slot; + + assign except_ticktimer = except_ticktimer_nonsrmasked & + spr_sr[`OR1K_SPR_SR_TEE] & !doing_rfe; + end // if (FEATURE_TIMER!="NONE") + else begin + assign except_ticktimer_nonsrmasked = 0; + assign except_ticktimer = 0; + assign spr_ttmr = 0; + assign spr_ttcr = 0; + assign spr_access_ack[10] = 0; + assign spr_internal_read_dat[10] = 0; + end // else: !if(FEATURE_TIMER!="NONE") + endgenerate + + /* SPR access control - allow accesses from either the instructions or from + the debug interface */ + assign spr_read_access = (op_mfspr | (du_access & !du_we_i)); + assign spr_write_access = ((execute_done & op_mtspr) | (du_access & du_we_i)); + + assign spr_write_dat = du_access ? du_dat_i : b; + assign spr_we = spr_write_access & spr_group_present; + assign spr_read = spr_read_access & spr_group_present; + + /* A bus out to other units that live outside of the control unit */ + assign spr_bus_addr_o = spr_addr; + assign spr_bus_we_o = spr_write_access & spr_group_present & spr_bus_access; + assign spr_bus_stb_o = (spr_read_access | spr_write_access) & + spr_group_present & spr_bus_access; + assign spr_bus_dat_o = spr_write_dat; + + /* Is the SPR in the design? */ + assign spr_group_present = (// System group + (spr_addr[15:11]==5'h00) || + // DMMU + (spr_addr[15:11]==5'h01 && + FEATURE_DMMU!="NONE") || + // IMMU + (spr_addr[15:11]==5'h02 && + FEATURE_IMMU!="NONE") || + // Data cache + (spr_addr[15:11]==5'h03 && + FEATURE_DATACACHE!="NONE") || + // Instruction cache + (spr_addr[15:11]==5'h04 && + FEATURE_INSTRUCTIONCACHE!= "NONE") || + // MAC unit + (spr_addr[15:11]==5'h05 && + FEATURE_MAC!="NONE") || + // Debug unit + (spr_addr[15:11]==5'h06 && + FEATURE_DEBUGUNIT!="NONE") || + // Performance counters + (spr_addr[15:11]==5'h07 && + FEATURE_PERFCOUNTERS!="NONE") || + // Power Management + (spr_addr[15:11]==5'h08 && + FEATURE_PMU!="NONE") || + // PIC + (spr_addr[15:11]==5'h09 && + FEATURE_PIC!="NONE") || + // Tick timer + (spr_addr[15:11]==5'h0a && + FEATURE_TIMER!="NONE") || + // FPU + (spr_addr[15:11]==5'h0b && + FEATURE_FPU!="NONE") + ); + + /* Generate a SPR group signal - generate invalid if the group is not + present in the design */ + assign spr_group = (spr_group_present) ? spr_addr[14:11] : 4'd12; + + /* Default group when a selected one is not present - it reads as zero */ + assign spr_internal_read_dat[12] = 0; + + /* Is a SPR bus access needed, or is the requested SPR in this file? */ + assign spr_bus_access = /* Any of the units we don't have in this file */ + /* System group */ + !(spr_addr[15:11]==5'h00 || + /* Debug Group */ + spr_addr[15:11]==5'h06 || + /* PIC Group */ + spr_addr[15:11]==5'h09 || + /* Tick Group */ + spr_addr[15:11]==5'h0a); + + assign stepping_o = stepping; + + generate + if (FEATURE_DEBUGUNIT!="NONE") begin : du + + reg [OPTION_OPERAND_WIDTH-1:0] du_read_dat; + + reg du_ack; + reg du_stall_r; + reg [1:0] pstep_r; + reg [1:0] branch_step; + reg stepped_into_exception; + reg stepped_into_rfe; + + assign du_access = du_stb_i; + + // Generate ack back to the debug interface bus + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_ack <= 0; + else if (du_ack) + du_ack <= 0; + else if (du_stb_i) begin + if (!spr_group_present) + /* Unit doesn't exist, ACK to clear the access, nothing done */ + du_ack <= 1; + else if (spr_access_ack[spr_group]) + /* actual access occurred */ + du_ack <= 1; + end + + assign du_ack_o = du_ack; + + /* Data back to the debug bus */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_read_dat <= 0; + else if (spr_access_ack[spr_group]) begin + du_read_dat <= spr_internal_read_dat[spr_group]; + end + + assign du_dat_o = du_read_dat; + /* TODO: check into only letting stall go high when we've gracefully + completed the instruction currently in the ctrl stage. + Why? Potentially an instruction like l.mfspr from an external unit + hasn't completed fully, gets interrupted, and it's assumed it's + completed, but actually hasn't. */ + assign cpu_stall = du_stall_i | du_restart_from_stall; + + /* goes out to the debug interface and comes back 1 cycle later + via du_stall_i */ + assign du_stall_o = (stepping & execute_done)| + (stall_on_trap & execute_done & except_trap_i); + + /* Pulse to indicate we're restarting after a stall */ + assign du_restart_from_stall = du_stall_r & !du_stall_i; + + /* NPC debug control logic */ + assign du_npc_write = (du_we_i && du_addr_i==`OR1K_SPR_NPC_ADDR && + du_ack_o); + + /* Pick the traps-cause-stall bit out of the DSR */ + assign stall_on_trap = spr_dsr[`OR1K_SPR_DSR_TE]; + + /* record if NPC was written while we were stalled. + If so, we will use this value for restarting */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_npc_written <= 0; + else if (du_restart_from_stall) + du_npc_written <= 0; + else if (du_npc_write) + du_npc_written <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_spr_npc <= 0; + else if (du_npc_write) + du_spr_npc <= du_dat_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + stepped_into_exception <= 0; + else if (du_restart_from_stall) + stepped_into_exception <= 0; + else if (stepping & execute_done) + stepped_into_exception <= exception; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + stepped_into_rfe <= 0; + else if (du_restart_from_stall) + stepped_into_rfe <= 0; + else if (stepping & execute_done) + stepped_into_rfe <= op_rfe; + + assign du_restart_pc_o = du_npc_written ? du_spr_npc : + stepped_into_rfe ? spr_epcr : + stepped_into_delay_slot ? + last_branch_target_pc : spr_npc; + + assign du_restart_o = du_restart_from_stall; + + /* Indicate when we're stepping */ + assign stepping = spr_dmr1[`OR1K_SPR_DMR1_ST] & + spr_dsr[`OR1K_SPR_DSR_TE]; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + pstep_r <= 0; + else if (du_restart_from_stall & stepping) + pstep_r <= 2'd1; + else if ((pstep[0] & next_fetch_done_i) | + /* decode is always single cycle */ + (pstep[1] & execute_done)) + pstep_r <= {pstep_r[0],1'b0}; + + assign pstep = pstep_r; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + branch_step <= 0; + else if (stepping & pstep[1]) + branch_step <= {branch_step[0], ctrl_branch_occur}; + else if (!stepping & execute_done) + branch_step <= {branch_step[0], execute_delay_slot}; + + assign stepped_into_delay_slot = branch_step[1]; + + /* Signals for waveform debuging */ + wire [31:0] spr_read_data_group_0; + assign spr_read_data_group_0 = spr_internal_read_dat[0]; + wire [31:0] spr_read_data_group_1; + assign spr_read_data_group_1 = spr_internal_read_dat[1]; + wire [31:0] spr_read_data_group_2; + assign spr_read_data_group_2 = spr_internal_read_dat[2]; + wire [31:0] spr_read_data_group_3; + assign spr_read_data_group_3 = spr_internal_read_dat[3]; + wire [31:0] spr_read_data_group_4; + assign spr_read_data_group_4 = spr_internal_read_dat[4]; + wire [31:0] spr_read_data_group_5; + assign spr_read_data_group_5 = spr_internal_read_dat[5]; + wire [31:0] spr_read_data_group_6; + assign spr_read_data_group_6 = spr_internal_read_dat[6]; + wire [31:0] spr_read_data_group_7; + assign spr_read_data_group_7 = spr_internal_read_dat[7]; + wire [31:0] spr_read_data_group_8; + assign spr_read_data_group_8 = spr_internal_read_dat[8]; + wire [31:0] spr_read_data_group_9; + assign spr_read_data_group_9 = spr_internal_read_dat[9]; + + + /* always single cycle access */ + assign spr_access_ack[6] = 1; + assign spr_internal_read_dat[6] = (spr_addr==`OR1K_SPR_DMR1_ADDR) ? + spr_dmr1 : + (spr_addr==`OR1K_SPR_DMR2_ADDR) ? + spr_dmr2 : + (spr_addr==`OR1K_SPR_DSR_ADDR) ? + spr_dsr : + (spr_addr==`OR1K_SPR_DRR_ADDR) ? + spr_drr : 0; + + /* Put the incoming stall signal through a register to detect FE */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_stall_r <= 0; + else + du_stall_r <= du_stall_i; + + /* DMR1 */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_dmr1 <= 0; + else if (spr_we && spr_addr==`OR1K_SPR_DMR1_ADDR) + spr_dmr1[23:0] <= spr_write_dat[23:0]; + + /* DMR2 */ + always @(posedge clk) + spr_dmr2 <= 0; + + /* DSR */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_dsr <= 0; + else if (spr_we && spr_addr==`OR1K_SPR_DSR_ADDR) + spr_dsr[13:0] <= spr_write_dat[13:0]; + + /* DRR */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_drr <= 0; + else if (spr_we && spr_addr==`OR1K_SPR_DRR_ADDR) + spr_drr[13:0] <= spr_write_dat[13:0]; + else if (stall_on_trap & execute_done & except_trap_i) + spr_drr[`OR1K_SPR_DRR_TE] <= 1; + + end // block: du + else + begin : no_du + assign du_access = 0; + assign cpu_stall = 0; + assign du_stall_o = 0; + assign du_ack_o = 0; + assign du_restart_o = 0; + assign du_restart_pc_o = 0; + assign stepping = 0; + assign du_npc_write = 0; + assign stepped_into_delay_slot = 0; + assign du_dat_o = 0; + assign du_restart_from_stall = 0; + assign spr_access_ack[6] = 0; + + always @(posedge clk) + begin + spr_dmr1 <= 0; + spr_dmr2 <= 0; + spr_dsr <= 0; + spr_drr <= 0; + du_npc_written <= 0; + end + end + endgenerate + + /* Controls to generate ACKs from units that are external to this module */ + generate + if (FEATURE_DMMU!="NONE") begin : dmmu_ctrl + assign spr_access_ack[1] = spr_bus_ack_dmmu_i; + assign spr_internal_read_dat[1] = spr_bus_dat_dmmu_i; + end + else begin + assign spr_access_ack[1] = 0; + assign spr_internal_read_dat[1] = 0; + end + endgenerate + + generate + if (FEATURE_IMMU!="NONE") begin : immu_ctrl + assign spr_access_ack[2] = spr_bus_ack_immu_i; + assign spr_internal_read_dat[2] = spr_bus_dat_immu_i; + end + else begin + assign spr_access_ack[2] = 0; + assign spr_internal_read_dat[2] = 0; + end + endgenerate + + generate + if (FEATURE_DATACACHE!="NONE") begin : datacache_ctrl + assign spr_access_ack[3] = spr_bus_ack_dc_i; + assign spr_internal_read_dat[3] = spr_bus_dat_dc_i; + end + else begin + assign spr_access_ack[3] = 0; + assign spr_internal_read_dat[3] = 0; + end + endgenerate + + generate + if (FEATURE_INSTRUCTIONCACHE!="NONE") begin : instructioncache_ctrl + assign spr_access_ack[4] = spr_bus_ack_ic_i; + assign spr_internal_read_dat[4] = spr_bus_dat_ic_i; + end + else begin + assign spr_access_ack[4] = 0; + assign spr_internal_read_dat[4] = 0; + end + endgenerate + + generate + if (FEATURE_MAC!="NONE") begin : mac_ctrl + assign spr_access_ack[5] = spr_bus_ack_mac_i; + assign spr_internal_read_dat[5] = spr_bus_dat_mac_i; + end + else begin + assign spr_access_ack[5] = 0; + assign spr_internal_read_dat[5] = 0; + end + endgenerate + + generate + if (FEATURE_PERFCOUNTERS!="NONE") begin : perfcounters_ctrl + assign spr_access_ack[7] = spr_bus_ack_pcu_i; + assign spr_internal_read_dat[7] = spr_bus_dat_pcu_i; + end + else begin + assign spr_access_ack[7] = 0; + assign spr_internal_read_dat[7] = 0; + end + endgenerate + + generate + if (FEATURE_PMU!="NONE") begin : pmu_ctrl + assign spr_access_ack[8] = spr_bus_ack_pmu_i; + assign spr_internal_read_dat[8] = spr_bus_dat_pcu_i; + end + else begin + assign spr_access_ack[8] = 0; + assign spr_internal_read_dat[8] = 0; + end + endgenerate + + generate + if (FEATURE_FPU!="NONE") begin : fpu_ctrl + assign spr_access_ack[11] = spr_bus_ack_fpu_i; + assign spr_internal_read_dat[11] = spr_bus_dat_fpu_i; + end + else begin + assign spr_access_ack[11] = 0; + assign spr_internal_read_dat[11] = 0; + end + endgenerate + +endmodule // mor1kx_ctrl_espresso diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ctrl_prontoespresso.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ctrl_prontoespresso.v new file mode 100644 index 0000000..9dbc46c --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ctrl_prontoespresso.v @@ -0,0 +1,1494 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx pronto espresso pipeline control unit + + inputs from execute stage + + generate pipeline controls + + manage SPRs + + issue addresses for exceptions to fetch stage + control branches going to fetch stage + + contains tick timer + + contains PIC logic + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_ctrl_prontoespresso + (/*AUTOARG*/ + // Outputs + spr_npc_o, spr_ppc_o, link_addr_o, mfspr_dat_o, ctrl_mfspr_we_o, + flag_o, carry_o, pipeline_flush_o, padv_fetch_o, padv_decode_o, + padv_execute_o, fetch_take_exception_branch_o, exception_taken_o, + execute_waiting_o, stepping_o, du_dat_o, du_ack_o, du_stall_o, + du_restart_pc_o, du_restart_o, spr_bus_addr_o, spr_bus_we_o, + spr_bus_stb_o, spr_bus_dat_o, spr_sr_o, ctrl_branch_target_o, + ctrl_insn_done_o, ctrl_branch_occur_o, rf_we_o, + // Inputs + clk, rst, ctrl_alu_result_i, ctrl_rfb_i, ctrl_flag_set_i, + ctrl_flag_clear_i, ctrl_opc_insn_i, fetch_ppc_i, pc_fetch_next_i, + fetch_sleep_i, except_ibus_err_i, except_illegal_i, + except_syscall_i, except_dbus_i, except_trap_i, except_align_i, + fetch_ready_i, fetch_quick_branch_i, alu_valid_i, lsu_valid_i, + op_lsu_load_i, op_lsu_store_i, op_jr_i, op_jbr_i, irq_i, + carry_set_i, carry_clear_i, overflow_set_i, overflow_clear_i, + du_addr_i, du_stb_i, du_dat_i, du_we_i, du_stall_i, + spr_bus_dat_dc_i, spr_bus_ack_dc_i, spr_bus_dat_ic_i, + spr_bus_ack_ic_i, spr_bus_dat_dmmu_i, spr_bus_ack_dmmu_i, + spr_bus_dat_immu_i, spr_bus_ack_immu_i, spr_bus_dat_mac_i, + spr_bus_ack_mac_i, spr_bus_dat_pmu_i, spr_bus_ack_pmu_i, + spr_bus_dat_pcu_i, spr_bus_ack_pcu_i, spr_bus_dat_fpu_i, + spr_bus_ack_fpu_i, multicore_coreid_i, rf_wb_i + ); + + parameter OPTION_OPERAND_WIDTH = 32; + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}; + + parameter FEATURE_SYSCALL = "ENABLED"; + parameter FEATURE_TRAP = "ENABLED"; + parameter FEATURE_RANGE = "ENABLED"; + + parameter FEATURE_DATACACHE = "NONE"; + parameter OPTION_DCACHE_BLOCK_WIDTH = 5; + parameter OPTION_DCACHE_SET_WIDTH = 9; + parameter OPTION_DCACHE_WAYS = 2; + parameter FEATURE_DMMU = "NONE"; + parameter FEATURE_INSTRUCTIONCACHE = "NONE"; + parameter OPTION_ICACHE_BLOCK_WIDTH = 5; + parameter OPTION_ICACHE_SET_WIDTH = 9; + parameter OPTION_ICACHE_WAYS = 2; + parameter FEATURE_IMMU = "NONE"; + parameter FEATURE_TIMER = "ENABLED"; + parameter FEATURE_DEBUGUNIT = "NONE"; + parameter FEATURE_PERFCOUNTERS = "NONE"; + parameter FEATURE_PMU = "NONE"; + parameter FEATURE_MAC = "NONE"; + parameter FEATURE_FPU = "NONE"; + + parameter FEATURE_MULTICORE = "NONE"; + + parameter FEATURE_PIC = "ENABLED"; + parameter OPTION_PIC_TRIGGER = "LEVEL"; + parameter OPTION_PIC_NMI_WIDTH = 0; + + parameter FEATURE_DSX = "NONE"; + parameter FEATURE_FASTCONTEXTS = "NONE"; + parameter FEATURE_OVERFLOW = "NONE"; + + parameter SPR_SR_WIDTH = 16; + parameter SPR_SR_RESET_VALUE = 16'h8001; + + parameter FEATURE_INBUILT_CHECKERS = "ENABLED"; + + input clk, rst; + + // ALU result - either jump target, SPR address + input [OPTION_OPERAND_WIDTH-1:0] ctrl_alu_result_i; + + // Operand B from RF might be jump address, might be value for SPR + input [OPTION_OPERAND_WIDTH-1:0] ctrl_rfb_i; + + input ctrl_flag_set_i, ctrl_flag_clear_i; + + output [OPTION_OPERAND_WIDTH-1:0] spr_npc_o; + output [OPTION_OPERAND_WIDTH-1:0] spr_ppc_o; + + // Link address, to writeback stage + output [OPTION_OPERAND_WIDTH-1:0] link_addr_o; + + input [`OR1K_OPCODE_WIDTH-1:0] ctrl_opc_insn_i; + + // PCs from the fetch stage + // PC of the instruction from fetch stage + input [OPTION_OPERAND_WIDTH-1:0] fetch_ppc_i; + // Next PC we're going to deliver + input [OPTION_OPERAND_WIDTH-1:0] pc_fetch_next_i; + + // Input from fetch stage, indicating it's "sleeping", or not fetching + // anymore. + input fetch_sleep_i; + + + // Exception inputs, registered on output of execute stage + input except_ibus_err_i, + except_illegal_i, + except_syscall_i, except_dbus_i, + except_trap_i, except_align_i; + + // Inputs from two units that can stall proceedings + input fetch_ready_i; + input fetch_quick_branch_i; + + input alu_valid_i, lsu_valid_i; + + input op_lsu_load_i, op_lsu_store_i; + input op_jr_i, op_jbr_i; + + // External IRQ lines in + input [31:0] irq_i; + + // SPR data out + output [OPTION_OPERAND_WIDTH-1:0] mfspr_dat_o; + + // WE to RF for l.mfspr + output ctrl_mfspr_we_o; + + // Flag out to branch control, combinatorial + output flag_o; + + // Arithmetic flags to and from ALU + output carry_o; + input carry_set_i; + input carry_clear_i; + input overflow_set_i; + input overflow_clear_i; + + // Branch indicator from control unit (l.rfe/exception) + wire ctrl_branch_exception; + // PC out to fetch stage for l.rfe, exceptions + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_branch_except_pc; + + // Clear instructions from decode and fetch stage + output pipeline_flush_o; + + output padv_fetch_o; + output padv_decode_o; + output padv_execute_o; + + // This indicates to the fetch unit only that it should basically interrupt + // whatever it's doing and start fetching the exception + output fetch_take_exception_branch_o; + // This indicates to other parts of the CPU that we've handled an excption + // so can be used to clear exception indication registers + output exception_taken_o; + + output execute_waiting_o; + output stepping_o; + + // Debug bus + input [15:0] du_addr_i; + input du_stb_i; + input [OPTION_OPERAND_WIDTH-1:0] du_dat_i; + input du_we_i; + output [OPTION_OPERAND_WIDTH-1:0] du_dat_o; + output du_ack_o; + // Stall control from debug interface + input du_stall_i; + output du_stall_o; + output [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_o; + output du_restart_o; + + // SPR accesses to external units (cache, mmu, etc.) + output [15:0] spr_bus_addr_o; + output spr_bus_we_o; + output spr_bus_stb_o; + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dc_i; + input spr_bus_ack_dc_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_ic_i; + input spr_bus_ack_ic_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dmmu_i; + input spr_bus_ack_dmmu_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_immu_i; + input spr_bus_ack_immu_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_mac_i; + input spr_bus_ack_mac_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pmu_i; + input spr_bus_ack_pmu_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_pcu_i; + input spr_bus_ack_pcu_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_fpu_i; + input spr_bus_ack_fpu_i; + output [15:0] spr_sr_o; + + // The multicore core identifier + input [OPTION_OPERAND_WIDTH-1:0] multicore_coreid_i; + + // Internal signals + reg flag; + reg [SPR_SR_WIDTH-1:0] spr_sr; + reg [SPR_SR_WIDTH-1:0] spr_esr; + reg [OPTION_OPERAND_WIDTH-1:0] spr_epcr; + reg [OPTION_OPERAND_WIDTH-1:0] spr_eear; + + // Programmable Interrupt Control SPRs + wire [31:0] spr_picmr; + wire [31:0] spr_picsr; + + // Tick Timer SPRs + wire [31:0] spr_ttmr; + wire [31:0] spr_ttcr; + + reg [OPTION_OPERAND_WIDTH-1:0] spr_ppc; + reg [OPTION_OPERAND_WIDTH-1:0] spr_npc; + + output [OPTION_OPERAND_WIDTH-1:0] ctrl_branch_target_o; + + reg execute_go; + wire execute_done; + + output ctrl_insn_done_o; + + reg execute_waiting_r; + + reg decode_execute_halt; + + reg exception_taken; + + reg take_exception; + reg exception_r; + + reg [OPTION_OPERAND_WIDTH-1:0] exception_pc_addr; + + reg waiting_for_fetch; + + reg doing_rfe_r; + wire doing_rfe; + wire deassert_doing_rfe; + + wire exception, exception_pending; + + wire execute_stage_exceptions; + wire decode_stage_exceptions; + + wire exception_re; + + wire [31:0] irq_unmasked; + + wire except_ticktimer; + wire except_pic; + + wire except_ticktimer_nonsrmasked; + wire except_pic_nonsrmasked; + + wire except_range; + + wire [15:0] spr_addr; + + wire op_mtspr; + wire op_mfspr; + wire op_rfe; + + wire [OPTION_OPERAND_WIDTH-1:0] b; + + wire execute_waiting; + + wire execute_valid; + + wire deassert_decode_execute_halt; + + wire ctrl_branch_occur; + wire new_branch; + output ctrl_branch_occur_o; + output rf_we_o; + input rf_wb_i; + wire except_ibus_align; + wire fetch_advance; + wire rfete; + wire stall_on_trap; + + /* Debug SPRs */ + reg [31:0] spr_dmr1; + reg [31:0] spr_dmr2; + reg [31:0] spr_dsr; + reg [31:0] spr_drr; + + /* DU internal control signals */ + wire du_access; + reg cpu_stall; + wire du_restart_from_stall; + wire [1:0] pstep; + wire stepping; + wire du_npc_write; + + /* Wires for SPR management */ + wire spr_group_present; + wire [3:0] spr_group; + wire spr_we; + wire spr_read; + wire [OPTION_OPERAND_WIDTH-1:0] spr_write_dat; + wire [11:0] spr_access_ack; + wire [31:0] spr_internal_read_dat [0:12]; + wire spr_read_access; + wire spr_write_access; + wire spr_bus_access; + reg [OPTION_OPERAND_WIDTH-1:0] spr_sys_group_read; + + /* Wires from mor1kx_cfgrs module */ + wire [31:0] spr_vr; + wire [31:0] spr_vr2; + wire [31:0] spr_avr; + wire [31:0] spr_upr; + wire [31:0] spr_cpucfgr; + wire [31:0] spr_dmmucfgr; + wire [31:0] spr_immucfgr; + wire [31:0] spr_dccfgr; + wire [31:0] spr_iccfgr; + wire [31:0] spr_dcfgr; + wire [31:0] spr_pccfgr; + wire [31:0] spr_fpcsr = 0; + wire [31:0] spr_isr [0:7]; + + assign b = ctrl_rfb_i; + + assign ctrl_branch_exception = (exception_r | (op_rfe | doing_rfe)) & + !exception_taken; + + assign exception_pending = (except_ibus_err_i | except_ibus_align | + except_illegal_i | except_syscall_i | + except_dbus_i | except_align_i | + except_ticktimer | except_range | + except_pic | except_trap_i ); + + assign exception = exception_pending; + + assign fetch_take_exception_branch_o = (take_exception | op_rfe) & + !stepping; + + assign execute_stage_exceptions = except_dbus_i | except_align_i | + except_range; + assign decode_stage_exceptions = except_trap_i | except_illegal_i; + + assign exception_re = exception & !exception_r & !exception_taken; + + assign deassert_decode_execute_halt = ctrl_branch_occur & + decode_execute_halt; + + assign ctrl_branch_except_pc = (op_rfe | doing_rfe) & !rfete ? spr_epcr : + exception_pc_addr; + + // Exceptions take precedence + assign ctrl_branch_occur = // instruction is branch, and flag is right + (op_jbr_i & + // is l.j or l.jal + (!(|ctrl_opc_insn_i[2:1]) | + // is l.bf/bnf and flag is right + (ctrl_opc_insn_i[2]==flag))) | + (op_jr_i & !(except_ibus_align)); + + assign ctrl_branch_occur_o = // Usual branch signaling + ((ctrl_branch_occur/* | ctrl_branch_exception*/) & + fetch_advance); + + assign ctrl_branch_target_o = ctrl_branch_exception ? + ctrl_branch_except_pc : + // jump or branch? + op_jbr_i ? ctrl_alu_result_i : + ctrl_rfb_i; + + // Do writeback when we register our output to the next stage, or if + // we're doing mfspr + assign rf_we_o = (execute_done /*& !delay_slot_rf_we_done*/) & + ((rf_wb_i & !op_mfspr + & !((op_lsu_load_i | op_lsu_store_i) & + except_dbus_i | except_align_i)) | + (op_mfspr)); + + assign except_range = (FEATURE_RANGE!="NONE") ? spr_sr[`OR1K_SPR_SR_OVE] && + (spr_sr[`OR1K_SPR_SR_OV] | overflow_set_i & + execute_done) & !doing_rfe: 0; + + // Check for unaligned jump address from register + assign except_ibus_align = op_jr_i & (|ctrl_rfb_i[1:0]); + + // Return from exception to exception (if pending tick or PIC ints) + assign rfete = (spr_esr[`OR1K_SPR_SR_IEE] & except_pic_nonsrmasked) | + (spr_esr[`OR1K_SPR_SR_TEE] & except_ticktimer_nonsrmasked); + + always @(posedge clk) + if (rst) + exception_pc_addr <= OPTION_RESET_PC; + else if (exception_re | (rfete & execute_done)) + casez( + {except_ibus_err_i, + except_illegal_i, + except_align_i, + except_ibus_align, + except_syscall_i, + except_trap_i, + except_dbus_i, + except_range, + except_pic_nonsrmasked, + except_ticktimer_nonsrmasked + } + ) + 10'b1?????????: + exception_pc_addr <= {19'd0,`OR1K_BERR_VECTOR,8'd0}; + 10'b01????????: + exception_pc_addr <= {19'd0,`OR1K_ILLEGAL_VECTOR,8'd0}; + 10'b001???????, + 10'b0001??????: + exception_pc_addr <= {19'd0,`OR1K_ALIGN_VECTOR,8'd0}; + 10'b00001?????: + exception_pc_addr <= {19'd0,`OR1K_SYSCALL_VECTOR,8'd0}; + 10'b000001????: + exception_pc_addr <= {19'd0,`OR1K_TRAP_VECTOR,8'd0}; + 10'b0000001???: + exception_pc_addr <= {19'd0,`OR1K_BERR_VECTOR,8'd0}; + 10'b00000001??: + exception_pc_addr <= {19'd0,`OR1K_RANGE_VECTOR,8'd0}; + 10'b000000001?: + exception_pc_addr <= {19'd0,`OR1K_INT_VECTOR,8'd0}; + //10'b00000000001: + default: + exception_pc_addr <= {19'd0,`OR1K_TT_VECTOR,8'd0}; + endcase // casex (... + + assign op_mtspr = ctrl_opc_insn_i==`OR1K_OPCODE_MTSPR; + assign op_mfspr = ctrl_opc_insn_i==`OR1K_OPCODE_MFSPR; + assign op_rfe = ctrl_opc_insn_i==`OR1K_OPCODE_RFE; + + reg waiting_for_except_fetch; + always @(posedge clk `OR_ASYNC_RST) + if (rst) + waiting_for_except_fetch <= 0; + else if (waiting_for_except_fetch & fetch_ready_i) + waiting_for_except_fetch <= 0; + else if (fetch_take_exception_branch_o) + waiting_for_except_fetch <= 1; + + assign fetch_advance = (fetch_ready_i | except_ibus_err_i) & + !execute_waiting & !cpu_stall & + (!stepping | + (stepping & pstep[0] & !fetch_ready_i)); + + assign padv_fetch_o = fetch_advance & !exception_pending & !doing_rfe_r & + !cpu_stall; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + take_exception <= 0; + else + take_exception <= (exception_pending/* | doing_rfe_r*/) & + (((fetch_advance & waiting_for_fetch) | execute_done | + fetch_sleep_i) | + // Cause exception to always be 'taken' if stepping + (stepping & execute_done) + ) & + // Would like this as only a single pulse + !take_exception; + + reg padv_decode_r; + // Some bits of the pipeline (execute_alu for instance) require a falling + // edge of the decode signal to start work on multi-cycle ops. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + padv_decode_r <= 0; + else + padv_decode_r <= padv_fetch_o; + + assign padv_decode_o = padv_decode_r; + + reg ctrl_branch_occur_r; + wire ctrl_branch_occur_re; + assign ctrl_branch_occur_re = ctrl_branch_occur & !ctrl_branch_occur_r; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + ctrl_branch_occur_r <= 0; + else + ctrl_branch_occur_r <= ctrl_branch_occur; + + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_go <= 0; + else + // Note: turned padv_fetch_o here into (padv_fetch_o & + // !ctrl_branch_occur) for pronto version. This may have implications + // for exeception handling. + execute_go <= (padv_fetch_o & !(ctrl_branch_occur_re | op_rfe)) | + execute_waiting | (stepping & fetch_ready_i); + + assign execute_done = (execute_go | fetch_quick_branch_i) & + !execute_waiting & !cpu_stall; + // Note: we gate on cpu_stall here because a case was observed where + // the stall came during a multicycle instruction, and the rest of the + // pipeline had stalled and execute_done strobed, indicating the + // instruction completed but the PCs were not advanced. So it's best to + // just stop this signal asserting, meaning we don't allow the + // instruction to officially complete (result is not written to RF). + + assign ctrl_insn_done_o = execute_done; + + // ALU or LSU stall execution, nothing else can + assign execute_valid = !((op_lsu_load_i | op_lsu_store_i) & !lsu_valid_i | + !alu_valid_i); + + assign execute_waiting = !execute_valid & !waiting_for_fetch; + assign execute_waiting_o = execute_waiting; + + assign padv_execute_o = execute_done; + + assign spr_addr = du_access ? du_addr_i : ctrl_alu_result_i[15:0]; + assign ctrl_mfspr_we_o = op_mfspr & execute_go; + + // Pipeline flush + assign pipeline_flush_o = (execute_done & op_rfe) | + (exception_re) | + cpu_stall; + + // Flag + always @(posedge clk `OR_ASYNC_RST) + if (rst) + flag <= 0; + else if (execute_done) + flag <= ctrl_flag_clear_i ? 0 : + ctrl_flag_set_i ? 1 : flag; + + assign flag_o = flag; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_waiting_r <= 0; + else if (!execute_waiting) + execute_waiting_r <= 0; + else if (execute_waiting) + execute_waiting_r <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_execute_halt <= 0; + else if (du_restart_from_stall) + decode_execute_halt <= 0; + else if (decode_execute_halt & deassert_decode_execute_halt) + decode_execute_halt <= 0; + else if ((op_rfe | exception) & !decode_execute_halt & !exception_taken) + decode_execute_halt <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + exception_r <= 0; + else if (exception_taken | du_restart_from_stall) + exception_r <= 0; + else if (exception & !exception_r) + exception_r <= 1; + + // Signal to indicate that the incoming exception or l.rfe has been taken + // and we're waiting for it to propagate through the pipeline. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + exception_taken <= 0; + else if (exception_taken) + exception_taken <= 0; + else if (exception_r & take_exception) + exception_taken <= 1; + + assign exception_taken_o = exception_r & take_exception;//exception_taken; + + // Used to gate execute stage's advance signal in the case where a LSU op has + // finished before the next instruction has been fetched. Typically this + // occurs when not using icache and doing lots of memory accesses. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + waiting_for_fetch <= 0; + else if (fetch_ready_i) + waiting_for_fetch <= 0; + // Whenever execute not waiting and fetch not ready + else if (!execute_waiting /*& execute_waiting_r*/ & !fetch_ready_i) + waiting_for_fetch <= 1; + else if (execute_done & !fetch_ready_i) + waiting_for_fetch <= 1; + + assign doing_rfe = ((execute_done & op_rfe) | doing_rfe_r) & + !deassert_doing_rfe; + + // Basically, the fetch stage should always take the rfe immediately + assign deassert_doing_rfe = doing_rfe_r; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + doing_rfe_r <= 0; + else if (deassert_doing_rfe) + doing_rfe_r <= 0; + else if (execute_done) + doing_rfe_r <= op_rfe; + + assign spr_sr_o = spr_sr; + + // Supervision register + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_sr <= SPR_SR_RESET_VALUE; + else if (fetch_take_exception_branch_o) + begin + if (op_rfe & !rfete) + begin + spr_sr <= spr_esr; + end + else + begin + // Go into supervisor mode, disable interrupts, MMUs + spr_sr[`OR1K_SPR_SR_SM ] <= 1'b1; + if (FEATURE_TIMER!="NONE") + spr_sr[`OR1K_SPR_SR_TEE ] <= 1'b0; + if (FEATURE_PIC!="NONE") + spr_sr[`OR1K_SPR_SR_IEE ] <= 1'b0; + if (FEATURE_DMMU!="NONE") + spr_sr[`OR1K_SPR_SR_DME ] <= 1'b0; + if (FEATURE_IMMU!="NONE") + spr_sr[`OR1K_SPR_SR_IME ] <= 1'b0; + if (FEATURE_OVERFLOW!="NONE") + spr_sr[`OR1K_SPR_SR_OVE ] <= 1'b0; + end + end + else if (execute_done) + begin + spr_sr[`OR1K_SPR_SR_F ] <= ctrl_flag_set_i ? 1 : + ctrl_flag_clear_i ? 0 : + spr_sr[`OR1K_SPR_SR_F ]; + spr_sr[`OR1K_SPR_SR_CY ] <= carry_set_i ? 1 : + carry_clear_i ? 0 : + spr_sr[`OR1K_SPR_SR_CY ]; + if (FEATURE_OVERFLOW!="NONE") + spr_sr[`OR1K_SPR_SR_OV ] <= overflow_set_i ? 1 : + overflow_clear_i ? 0 : + spr_sr[`OR1K_SPR_SR_OV ]; + + if ((spr_we & (spr_sr[`OR1K_SPR_SR_SM] | du_access)) && + spr_addr==`OR1K_SPR_SR_ADDR) + begin + spr_sr[`OR1K_SPR_SR_SM ] <= spr_write_dat[`OR1K_SPR_SR_SM ]; + + spr_sr[`OR1K_SPR_SR_F ] <= spr_write_dat[`OR1K_SPR_SR_F ]; + + if (FEATURE_TIMER!="NONE") + spr_sr[`OR1K_SPR_SR_TEE ] <= spr_write_dat[`OR1K_SPR_SR_TEE ]; + + if (FEATURE_PIC!="NONE") + spr_sr[`OR1K_SPR_SR_IEE ] <= spr_write_dat[`OR1K_SPR_SR_IEE ]; + + if (FEATURE_DATACACHE!="NONE") + spr_sr[`OR1K_SPR_SR_DCE ] <= spr_write_dat[`OR1K_SPR_SR_DCE ]; + + if (FEATURE_INSTRUCTIONCACHE!="NONE") + spr_sr[`OR1K_SPR_SR_ICE ] <= spr_write_dat[`OR1K_SPR_SR_ICE ]; + + if (FEATURE_DMMU!="NONE") + spr_sr[`OR1K_SPR_SR_DME ] <= spr_write_dat[`OR1K_SPR_SR_DME ]; + + if (FEATURE_IMMU!="NONE") + spr_sr[`OR1K_SPR_SR_IME ] <= spr_write_dat[`OR1K_SPR_SR_IME ]; + + if (FEATURE_FASTCONTEXTS!="NONE") + spr_sr[`OR1K_SPR_SR_CE ] <= spr_write_dat[`OR1K_SPR_SR_CE ]; + + spr_sr[`OR1K_SPR_SR_CY ] <= spr_write_dat[`OR1K_SPR_SR_CY ]; + + if (FEATURE_OVERFLOW!="NONE") begin + spr_sr[`OR1K_SPR_SR_OV ] <= spr_write_dat[`OR1K_SPR_SR_OV ]; + spr_sr[`OR1K_SPR_SR_OVE ] <= spr_write_dat[`OR1K_SPR_SR_OVE ]; + end + + if (FEATURE_DSX!="NONE") + spr_sr[`OR1K_SPR_SR_DSX ] <= spr_write_dat[`OR1K_SPR_SR_DSX ]; + + spr_sr[`OR1K_SPR_SR_EPH ] <= spr_write_dat[`OR1K_SPR_SR_EPH ]; + + end // if ((spr_we & (spr_sr[`OR1K_SPR_SR_SM] | du_access)) &&... + + end // if (execute_done) + + assign carry_o = spr_sr[`OR1K_SPR_SR_CY]; + + // Exception SR + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_esr <= SPR_SR_RESET_VALUE; + else if (exception_re) + begin + spr_esr <= spr_sr; + /* + A bit odd, but if we had a l.sf instruction on an exception rising + edge, EPCR will point to the insn past the l.sf but the flag will + not have been saved to the SR properly. So we must put it in here + so it can be restored correctly. + Ditto for the other flags which may have been changed in a similar + fashion. + */ + if (execute_done) + begin + if (ctrl_flag_set_i) + spr_esr[`OR1K_SPR_SR_F ] <= 1'b1; + else if (ctrl_flag_clear_i) + spr_esr[`OR1K_SPR_SR_F ] <= 1'b0; + if (FEATURE_OVERFLOW!="NONE") + begin + if (overflow_set_i) + spr_esr[`OR1K_SPR_SR_OV ] <= 1'b1; + else if (overflow_clear_i) + spr_esr[`OR1K_SPR_SR_OV ] <= 1'b0; + end + if (carry_set_i) + spr_esr[`OR1K_SPR_SR_CY ] <= 1'b1; + else if (carry_clear_i) + spr_esr[`OR1K_SPR_SR_CY ] <= 1'b0; + end + end + else if (spr_we & spr_addr==`OR1K_SPR_ESR0_ADDR) + spr_esr <= spr_write_dat[SPR_SR_WIDTH-1:0]; + + // Exception PC + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_epcr <= OPTION_RESET_PC; + else if (exception_re & !(rfete & (op_rfe | deassert_doing_rfe))) + begin + if (except_ibus_err_i) + spr_epcr <= spr_ppc; + else if (except_syscall_i) + // EPCR after syscall is address of next not executed insn. + spr_epcr <= spr_npc; + else if (except_ticktimer | except_pic) + spr_epcr <= ctrl_branch_occur ? spr_ppc : spr_npc; + else if (execute_stage_exceptions | + // Don't update EPCR on software breakpoint + (decode_stage_exceptions & !(stall_on_trap & except_trap_i))) + spr_epcr <= spr_ppc; + else if (!(stall_on_trap & except_trap_i)) + spr_epcr <= spr_ppc; + end + else if (spr_we && spr_addr==`OR1K_SPR_EPCR0_ADDR) + spr_epcr <= spr_write_dat; + + // Exception Effective Address + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_eear <= {OPTION_OPERAND_WIDTH{1'b0}}; + else if (exception_re) + begin + if (except_ibus_err_i) + spr_eear <= fetch_ppc_i; + else + spr_eear <= ctrl_alu_result_i; + end + + // Next PC (NPC) + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_npc <= OPTION_RESET_PC; + else if (deassert_doing_rfe) + spr_npc <= rfete ? exception_pc_addr : spr_epcr; + else if (du_npc_write) + spr_npc <= du_restart_pc_o; + else if (stepping & ctrl_branch_occur) + spr_npc <= ctrl_branch_target_o; + else if (stepping & fetch_ready_i) + spr_npc <= pc_fetch_next_i; + else if (stepping & exception_r) + spr_npc <= exception_pc_addr; + else if (stepping & execute_done & ctrl_branch_occur) + // The case where we stepped into a jump + spr_npc <= ctrl_branch_target_o; + else if (((fetch_advance & exception) | fetch_take_exception_branch_o) | + padv_fetch_o) + // PC we're now executing + spr_npc <= (fetch_take_exception_branch_o |(fetch_advance & exception)) ? + exception_pc_addr : (ctrl_branch_occur & !fetch_quick_branch_i) ? + ctrl_branch_target_o : pc_fetch_next_i; + + // Previous PC (PPC) + always @* + spr_ppc = fetch_ppc_i; + + assign spr_npc_o = spr_npc; + assign spr_ppc_o = spr_ppc; + + // This is for the writeback stage, when we have l.jal[r] instructions. + // Annoyingly, we can't rely on the link address being + // available without a dedicated bit of logic to calculate it, + // so do so here. + assign link_addr_o = spr_ppc + 4; + + mor1kx_cfgrs + #(.FEATURE_PIC (FEATURE_PIC), + .FEATURE_TIMER (FEATURE_TIMER), + .OPTION_PIC_TRIGGER (OPTION_PIC_TRIGGER), + .FEATURE_DSX (FEATURE_DSX), + .FEATURE_FASTCONTEXTS (FEATURE_FASTCONTEXTS), + .FEATURE_OVERFLOW (FEATURE_OVERFLOW), + .FEATURE_DATACACHE (FEATURE_DATACACHE), + .OPTION_DCACHE_BLOCK_WIDTH (OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH (OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS (OPTION_DCACHE_WAYS), + .FEATURE_DMMU (FEATURE_DMMU), + .FEATURE_INSTRUCTIONCACHE (FEATURE_INSTRUCTIONCACHE), + .OPTION_ICACHE_BLOCK_WIDTH (OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH (OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS (OPTION_ICACHE_WAYS), + .FEATURE_IMMU (FEATURE_IMMU), + .FEATURE_DEBUGUNIT (FEATURE_DEBUGUNIT), + .FEATURE_PERFCOUNTERS (FEATURE_PERFCOUNTERS), + .FEATURE_MAC (FEATURE_MAC), + .FEATURE_SYSCALL (FEATURE_SYSCALL), + .FEATURE_TRAP (FEATURE_TRAP), + .FEATURE_RANGE (FEATURE_RANGE) + ) + mor1kx_cfgrs + (/*AUTOINST*/ + // Outputs + .spr_vr (spr_vr[31:0]), + .spr_vr2 (spr_vr2[31:0]), + .spr_upr (spr_upr[31:0]), + .spr_cpucfgr (spr_cpucfgr[31:0]), + .spr_dmmucfgr (spr_dmmucfgr[31:0]), + .spr_immucfgr (spr_immucfgr[31:0]), + .spr_dccfgr (spr_dccfgr[31:0]), + .spr_iccfgr (spr_iccfgr[31:0]), + .spr_dcfgr (spr_dcfgr[31:0]), + .spr_pccfgr (spr_pccfgr[31:0]), + .spr_avr (spr_avr[31:0])); + + /* Implementation-specific registers */ + assign spr_isr[0] = 0; + assign spr_isr[1] = 0; + assign spr_isr[2] = 0; + assign spr_isr[3] = 0; + assign spr_isr[4] = 0; + assign spr_isr[5] = 0; + assign spr_isr[6] = 0; + assign spr_isr[7] = 0; + + // System group (0) SPR data out + always @* + case(spr_addr) + `OR1K_SPR_VR_ADDR: + spr_sys_group_read = spr_vr; + `OR1K_SPR_VR2_ADDR: + spr_sys_group_read = {spr_vr2[31:8], `MOR1KX_PIPEID_PRONTOESPRESSO}; + `OR1K_SPR_AVR_ADDR: + spr_sys_group_read = spr_avr; + `OR1K_SPR_UPR_ADDR: + spr_sys_group_read = spr_upr; + `OR1K_SPR_CPUCFGR_ADDR: + spr_sys_group_read = spr_cpucfgr; + `OR1K_SPR_DMMUCFGR_ADDR: + spr_sys_group_read = spr_dmmucfgr; + `OR1K_SPR_IMMUCFGR_ADDR: + spr_sys_group_read = spr_immucfgr; + `OR1K_SPR_DCCFGR_ADDR: + spr_sys_group_read = spr_dccfgr; + `OR1K_SPR_ICCFGR_ADDR: + spr_sys_group_read = spr_iccfgr; + `OR1K_SPR_DCFGR_ADDR: + spr_sys_group_read = spr_dcfgr; + `OR1K_SPR_PCCFGR_ADDR: + spr_sys_group_read = spr_pccfgr; + `OR1K_SPR_NPC_ADDR: + spr_sys_group_read = spr_npc; + `OR1K_SPR_SR_ADDR: + spr_sys_group_read = {{(OPTION_OPERAND_WIDTH-SPR_SR_WIDTH){1'b0}}, + spr_sr}; + + `OR1K_SPR_PPC_ADDR: + spr_sys_group_read = spr_ppc; + `OR1K_SPR_FPCSR_ADDR: + spr_sys_group_read = spr_fpcsr; + `OR1K_SPR_EPCR0_ADDR: + spr_sys_group_read = spr_epcr; + `OR1K_SPR_EEAR0_ADDR: + spr_sys_group_read = spr_eear; + `OR1K_SPR_ESR0_ADDR: + spr_sys_group_read = {{(OPTION_OPERAND_WIDTH-SPR_SR_WIDTH){1'b0}}, + spr_esr}; + `OR1K_SPR_ISR0_ADDR: + spr_sys_group_read = spr_isr[0]; + `OR1K_SPR_ISR0_ADDR +1: + spr_sys_group_read = spr_isr[1]; + `OR1K_SPR_ISR0_ADDR +2: + spr_sys_group_read = spr_isr[2]; + `OR1K_SPR_ISR0_ADDR +3: + spr_sys_group_read = spr_isr[3]; + `OR1K_SPR_ISR0_ADDR +4: + spr_sys_group_read = spr_isr[4]; + `OR1K_SPR_ISR0_ADDR +5: + spr_sys_group_read = spr_isr[5]; + `OR1K_SPR_ISR0_ADDR +6: + spr_sys_group_read = spr_isr[6]; + `OR1K_SPR_ISR0_ADDR +7: + spr_sys_group_read = spr_isr[7]; + + `OR1K_SPR_COREID_ADDR: + // If the multicore feature is activated this address returns the + // core identifier, 0 otherwise + spr_sys_group_read = (FEATURE_MULTICORE != "NONE") ? multicore_coreid_i : 0; + + default: begin + /* GPR read */ + if (spr_addr >= `OR1K_SPR_GPR0_ADDR && + spr_addr <= (`OR1K_SPR_GPR0_ADDR + 32)) + spr_sys_group_read = b; /* Register file */ + else + /* Invalid address - read as zero*/ + spr_sys_group_read = 0; + end + endcase // case (spr_addr) + + /* System group read data MUX in */ + assign spr_internal_read_dat[0] = spr_sys_group_read; + /* System group ack generation */ + /* TODO - might be delay for register file reads! */ + assign spr_access_ack[0] = 1; + + + + /* Generate data to the register file for mfspr operations */ + assign mfspr_dat_o = spr_internal_read_dat[spr_addr[14:11]]; + + // PIC SPR control + generate + if (FEATURE_PIC !="NONE") begin : pic + + /* mor1kx_pic AUTO_TEMPLATE ( + .spr_picsr_o (spr_picsr), + .spr_picmr_o (spr_picmr), + .spr_bus_ack (spr_access_ack[9]), + .spr_dat_o (spr_internal_read_dat[9]), + // Inputs + .spr_we_i (spr_we), + .spr_access_i (1'b1), + .spr_addr_i (spr_addr), + .spr_dat_i (spr_write_dat), + );*/ + mor1kx_pic + #( + .OPTION_PIC_TRIGGER(OPTION_PIC_TRIGGER), + .OPTION_PIC_NMI_WIDTH(OPTION_PIC_NMI_WIDTH) + ) + mor1kx_pic + (/*AUTOINST*/ + // Outputs + .spr_picmr_o (spr_picmr), // Templated + .spr_picsr_o (spr_picsr), // Templated + .spr_bus_ack (spr_access_ack[9]), // Templated + .spr_dat_o (spr_internal_read_dat[9]), // Templated + // Inputs + .clk (clk), + .rst (rst), + .irq_i (irq_i[31:0]), + .spr_access_i (1'b1), // Templated + .spr_we_i (spr_we), // Templated + .spr_addr_i (spr_addr), // Templated + .spr_dat_i (spr_write_dat)); // Templated + + assign except_pic_nonsrmasked = (|spr_picsr) & + !op_mtspr & + // Stops back-to-back branch addresses going to + // fetch stage + !ctrl_branch_occur; + + assign except_pic = spr_sr[`OR1K_SPR_SR_IEE] & except_pic_nonsrmasked & + !doing_rfe; + + end + else begin + assign except_pic_nonsrmasked = 0; + assign except_pic = 0; + assign spr_picsr = 0; + assign spr_picmr = 0; + assign spr_access_ack[9] = 0; + assign spr_internal_read_dat[9] = 0; + end // else: !if(FEATURE_PIC !="NONE") + endgenerate + + + generate + if (FEATURE_TIMER!="NONE") begin : tt + + /* mor1kx_ticktimer AUTO_TEMPLATE ( + .spr_ttmr_o (spr_ttmr), + .spr_ttcr_o (spr_ttcr), + .spr_bus_ack (spr_access_ack[10]), + .spr_dat_o (spr_internal_read_dat[10]), + // Inputs + .spr_we_i (spr_we), + .spr_access_i (1'b1), + .spr_addr_i (spr_addr), + .spr_dat_i (spr_write_dat), + );*/ + mor1kx_ticktimer mor1kx_ticktimer + (/*AUTOINST*/ + // Outputs + .spr_ttmr_o (spr_ttmr), // Templated + .spr_ttcr_o (spr_ttcr), // Templated + .spr_bus_ack (spr_access_ack[10]), // Templated + .spr_dat_o (spr_internal_read_dat[10]), // Templated + // Inputs + .clk (clk), + .rst (rst), + .spr_access_i (1'b1), // Templated + .spr_we_i (spr_we), // Templated + .spr_addr_i (spr_addr), // Templated + .spr_dat_i (spr_write_dat)); // Templated + + assign except_ticktimer_nonsrmasked = spr_ttmr[28] & + !(op_mtspr & !(spr_esr[`OR1K_SPR_SR_TEE] & execute_done)) & + // Stops back-to-back branch addresses to + // fetch stage. + !ctrl_branch_occur; + + assign except_ticktimer = except_ticktimer_nonsrmasked & + spr_sr[`OR1K_SPR_SR_TEE] & !doing_rfe; + end // if (FEATURE_TIMER!="NONE") + else begin + assign except_ticktimer_nonsrmasked = 0; + assign except_ticktimer = 0; + assign spr_ttmr = 0; + assign spr_ttcr = 0; + assign spr_access_ack[10] = 0; + assign spr_internal_read_dat[10] = 0; + end // else: !if(FEATURE_TIMER!="NONE") + endgenerate + + /* SPR access control - allow accesses from either the instructions or from + the debug interface */ + assign spr_read_access = (op_mfspr | (du_access & !du_we_i)); + assign spr_write_access = ((execute_done & op_mtspr) | (du_access & du_we_i)); + + assign spr_write_dat = du_access ? du_dat_i : b; + assign spr_we = spr_write_access & spr_group_present; + assign spr_read = spr_read_access & spr_group_present; + + /* A bus out to other units that live outside of the control unit */ + assign spr_bus_addr_o = spr_addr; + assign spr_bus_we_o = spr_write_access & spr_group_present & spr_bus_access; + assign spr_bus_stb_o = (spr_read_access | spr_write_access) & + spr_group_present & spr_bus_access; + assign spr_bus_dat_o = spr_write_dat; + + /* Is the SPR in the design? */ + assign spr_group_present = (// System group + (spr_addr[15:11]==5'h00) || + // DMMU + (spr_addr[15:11]==5'h01 && + FEATURE_DMMU!="NONE") || + // IMMU + (spr_addr[15:11]==5'h02 && + FEATURE_IMMU!="NONE") || + // Data cache + (spr_addr[15:11]==5'h03 && + FEATURE_DATACACHE!="NONE") || + // Instruction cache + (spr_addr[15:11]==5'h04 && + FEATURE_INSTRUCTIONCACHE!= "NONE") || + // MAC unit + (spr_addr[15:11]==5'h05 && + FEATURE_MAC!="NONE") || + // Debug unit + (spr_addr[15:11]==5'h06 && + FEATURE_DEBUGUNIT!="NONE") || + // Performance counters + (spr_addr[15:11]==5'h07 && + FEATURE_PERFCOUNTERS!="NONE") || + // Power Management + (spr_addr[15:11]==5'h08 && + FEATURE_PMU!="NONE") || + // PIC + (spr_addr[15:11]==5'h09 && + FEATURE_PIC!="NONE") || + // Tick timer + (spr_addr[15:11]==5'h0a && + FEATURE_TIMER!="NONE") || + // FPU + (spr_addr[15:11]==5'h0b && + FEATURE_FPU!="NONE") + ); + + /* Generate a SPR group signal - generate invalid if the group is not + present in the design */ + assign spr_group = (spr_group_present) ? spr_addr[14:11] : 4'd12; + + /* Default group when a selected one is not present - it reads as zero */ + assign spr_internal_read_dat[12] = 0; + + /* Is a SPR bus access needed, or is the requested SPR in this file? */ + assign spr_bus_access = /* Any of the units we don't have in this file */ + /* System group */ + !(spr_addr[15:11]==5'h00 || + /* Debug Group */ + spr_addr[15:11]==5'h06 || + /* PIC Group */ + spr_addr[15:11]==5'h09 || + /* Tick Group */ + spr_addr[15:11]==5'h0a); + + assign stepping_o = stepping; + + generate + if (FEATURE_DEBUGUNIT!="NONE") begin : du + + reg [OPTION_OPERAND_WIDTH-1:0] du_read_dat; + + reg du_ack; + reg du_stall_r; + reg [1:0] pstep_r; + reg [1:0] branch_step; + reg stepped_into_exception; + reg stepped_into_rfe; + + assign du_access = du_stb_i; + + // Generate ack back to the debug interface bus + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_ack <= 0; + else if (du_ack) + du_ack <= 0; + else if (du_stb_i) begin + if (!spr_group_present) + /* Unit doesn't exist, ACK to clear the access, nothing done */ + du_ack <= 1; + else if (spr_access_ack[spr_group]) + /* actual access occurred */ + du_ack <= 1; + end + + assign du_ack_o = du_ack; + + /* Data back to the debug bus */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_read_dat <= 0; + else if (spr_access_ack[spr_group]) begin + du_read_dat <= spr_internal_read_dat[spr_group]; + end + + assign du_dat_o = du_read_dat; + /* TODO: check into only letting stall go high when we've gracefully + completed the instruction currently in the ctrl stage. + Why? Potentially an instruction like l.mfspr from an external unit + hasn't completed fully, gets interrupted, and it's assumed it's + completed, but actually hasn't. */ + + always @(posedge clk) + cpu_stall <= du_stall_i | du_restart_from_stall; + + /* goes out to the debug interface and comes back 1 cycle later + via du_stall_i */ + assign du_stall_o = (stepping & execute_done) | + (stall_on_trap & execute_done & except_trap_i); + + /* Pulse to indicate we're restarting after a stall */ + assign du_restart_from_stall = du_stall_r & !du_stall_i; + + /* NPC debug control logic */ + assign du_npc_write = (du_we_i && du_addr_i==`OR1K_SPR_NPC_ADDR && + du_ack_o); + + /* Pick the traps-cause-stall bit out of the DSR */ + assign stall_on_trap = spr_dsr[`OR1K_SPR_DSR_TE]; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + stepped_into_exception <= 0; + else if (du_restart_from_stall) + stepped_into_exception <= 0; + else if (stepping & execute_done) + stepped_into_exception <= exception; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + stepped_into_rfe <= 0; + else if (du_restart_from_stall) + stepped_into_rfe <= 0; + else if (stepping & execute_done) + stepped_into_rfe <= op_rfe; + + assign du_restart_pc_o = du_npc_write ? du_dat_i : + stepped_into_rfe ? spr_epcr : spr_npc; + + assign du_restart_o = du_restart_from_stall; + + /* Indicate when we're stepping */ + assign stepping = spr_dmr1[`OR1K_SPR_DMR1_ST] & + spr_dsr[`OR1K_SPR_DSR_TE]; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + pstep_r <= 0; + else if (du_restart_from_stall & stepping) + pstep_r <= 2'd1; + else if ((pstep[0] & fetch_ready_i) | + /* decode is always single cycle */ + (pstep[1] & execute_done)) + pstep_r <= {pstep_r[0],1'b0}; + + assign pstep = pstep_r; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + branch_step <= 0; + else if (stepping & pstep[1]) + branch_step <= {branch_step[0], ctrl_branch_occur}; + else if (!stepping & execute_done) + branch_step <= {branch_step[0], /*execute_delay_slot*/ 1'b0}; + + /* Signals for waveform debuging */ + wire [31:0] spr_read_data_group_0; + assign spr_read_data_group_0 = spr_internal_read_dat[0]; + wire [31:0] spr_read_data_group_1; + assign spr_read_data_group_1 = spr_internal_read_dat[1]; + wire [31:0] spr_read_data_group_2; + assign spr_read_data_group_2 = spr_internal_read_dat[2]; + wire [31:0] spr_read_data_group_3; + assign spr_read_data_group_3 = spr_internal_read_dat[3]; + wire [31:0] spr_read_data_group_4; + assign spr_read_data_group_4 = spr_internal_read_dat[4]; + wire [31:0] spr_read_data_group_5; + assign spr_read_data_group_5 = spr_internal_read_dat[5]; + wire [31:0] spr_read_data_group_6; + assign spr_read_data_group_6 = spr_internal_read_dat[6]; + wire [31:0] spr_read_data_group_7; + assign spr_read_data_group_7 = spr_internal_read_dat[7]; + wire [31:0] spr_read_data_group_8; + assign spr_read_data_group_8 = spr_internal_read_dat[8]; + wire [31:0] spr_read_data_group_9; + assign spr_read_data_group_9 = spr_internal_read_dat[9]; + + + /* always single cycle access */ + assign spr_access_ack[6] = 1; + assign spr_internal_read_dat[6] = (spr_addr==`OR1K_SPR_DMR1_ADDR) ? + spr_dmr1 : + (spr_addr==`OR1K_SPR_DMR2_ADDR) ? + spr_dmr2 : + (spr_addr==`OR1K_SPR_DSR_ADDR) ? + spr_dsr : + (spr_addr==`OR1K_SPR_DRR_ADDR) ? + spr_drr : 0; + + /* Put the incoming stall signal through a register to detect FE */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + du_stall_r <= 0; + else + du_stall_r <= du_stall_i; + + /* DMR1 */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_dmr1 <= 0; + else if (spr_we && spr_addr==`OR1K_SPR_DMR1_ADDR) + spr_dmr1[23:0] <= spr_write_dat[23:0]; + + /* DMR2 */ + always @(posedge clk) + spr_dmr2 <= 0; + + /* DSR */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_dsr <= 0; + else if (spr_we && spr_addr==`OR1K_SPR_DSR_ADDR) + spr_dsr[13:0] <= spr_write_dat[13:0]; + + /* DRR */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_drr <= 0; + else if (spr_we && spr_addr==`OR1K_SPR_DRR_ADDR) + spr_drr[13:0] <= spr_write_dat[13:0]; + else if (stall_on_trap & execute_done & except_trap_i) + spr_drr[`OR1K_SPR_DRR_TE] <= 1; + + + end // block: du + else + begin : no_du + assign du_access = 0; + assign du_stall_o = 0; + assign du_ack_o = 0; + assign du_restart_o = 0; + assign du_restart_pc_o = 0; + assign stepping = 0; + assign du_npc_write = 0; + assign du_dat_o = 0; + assign du_restart_from_stall = 0; + assign spr_access_ack[6] = 0; + + always @(posedge clk) + begin + cpu_stall <= 0; + spr_dmr1 <= 0; + spr_dmr2 <= 0; + spr_dsr <= 0; + spr_drr <= 0; + end + end + endgenerate + + /* Controls to generate ACKs from units that are external to this module */ + generate + if (FEATURE_DMMU!="NONE") begin : dmmu_ctrl + assign spr_access_ack[1] = spr_bus_ack_dmmu_i; + assign spr_internal_read_dat[1] = spr_bus_dat_dmmu_i; + end + else begin + assign spr_access_ack[1] = 0; + assign spr_internal_read_dat[1] = 0; + end + endgenerate + + generate + if (FEATURE_IMMU!="NONE") begin : immu_ctrl + assign spr_access_ack[2] = spr_bus_ack_immu_i; + assign spr_internal_read_dat[2] = spr_bus_dat_immu_i; + end + else begin + assign spr_access_ack[2] = 0; + assign spr_internal_read_dat[2] = 0; + end + endgenerate + + generate + if (FEATURE_DATACACHE!="NONE") begin : datacache_ctrl + assign spr_access_ack[3] = spr_bus_ack_dc_i; + assign spr_internal_read_dat[3] = spr_bus_dat_dc_i; + end + else begin + assign spr_access_ack[3] = 0; + assign spr_internal_read_dat[3] = 0; + end + endgenerate + + generate + if (FEATURE_INSTRUCTIONCACHE!="NONE") begin : instructioncache_ctrl + assign spr_access_ack[4] = spr_bus_ack_ic_i; + assign spr_internal_read_dat[4] = spr_bus_dat_ic_i; + end + else begin + assign spr_access_ack[4] = 0; + assign spr_internal_read_dat[4] = 0; + end + endgenerate + + generate + if (FEATURE_MAC!="NONE") begin : mac_ctrl + assign spr_access_ack[5] = spr_bus_ack_mac_i; + assign spr_internal_read_dat[5] = spr_bus_dat_mac_i; + end + else begin + assign spr_access_ack[5] = 0; + assign spr_internal_read_dat[5] = 0; + end + endgenerate + + generate + if (FEATURE_PERFCOUNTERS!="NONE") begin : perfcounters_ctrl + assign spr_access_ack[7] = spr_bus_ack_pcu_i; + assign spr_internal_read_dat[7] = spr_bus_dat_pcu_i; + end + else begin + assign spr_access_ack[7] = 0; + assign spr_internal_read_dat[7] = 0; + end + endgenerate + + generate + if (FEATURE_PMU!="NONE") begin : pmu_ctrl + assign spr_access_ack[8] = spr_bus_ack_pmu_i; + assign spr_internal_read_dat[8] = spr_bus_dat_pcu_i; + end + else begin + assign spr_access_ack[8] = 0; + assign spr_internal_read_dat[8] = 0; + end + endgenerate + + generate + if (FEATURE_FPU!="NONE") begin : fpu_ctrl + assign spr_access_ack[11] = spr_bus_ack_fpu_i; + assign spr_internal_read_dat[11] = spr_bus_dat_fpu_i; + end + else begin + assign spr_access_ack[11] = 0; + assign spr_internal_read_dat[11] = 0; + end + endgenerate + + // synthesis translate_off + + generate + if (FEATURE_INBUILT_CHECKERS != "NONE") begin : execute_checker + + reg [OPTION_OPERAND_WIDTH-1:0] last_execute_pc; + reg just_branched = 1; + reg had_rfe = 0; + integer insns = 0; + + + // A monitor to do a rudimentary check of the processor's PC + // progression + always @(negedge clk) begin + + if (op_rfe) + had_rfe = 1; + + if (execute_done & !stepping) begin + + // First instruction of an exception vector, ie. + // 0x100, 0x200, 0x300 ... 0x2000 + if (~|spr_ppc[31:14] && ~|spr_ppc[7:0]) + just_branched = 1; + + if (!just_branched && spr_ppc != (last_execute_pc+4) && + (insns > 2)) + begin + /* verilator lint_off STMTDLY */ + #5; + /* verilator lint_on STMTDLY */ + $display("CPU didn't execute in correct order"); + $display("went: %08h, %08h",last_execute_pc, spr_ppc); + $finish(); + end + + insns = insns + 1; + last_execute_pc = spr_ppc; + + case (ctrl_opc_insn_i) + `OR1K_OPCODE_J, + `OR1K_OPCODE_JAL, + `OR1K_OPCODE_JALR, + `OR1K_OPCODE_JR, + `OR1K_OPCODE_BNF, + `OR1K_OPCODE_BF, + `OR1K_OPCODE_RFE, + `OR1K_OPCODE_SYSTRAPSYNC: + just_branched = 1; + default: + just_branched = 0; + endcase // case (`EXECUTE_STAGE_INSN[`OR1K_OPCODE_POS]) + + if (had_rfe) + begin + // Sometimes the RFE will pulse high, and the + // branch logic in the fetch stage will acknowledge + // it but the instruction isn't "acked" in the + // control stage. + just_branched = 1; + had_rfe = 0; + end + + end // if (execute_done & !stepping) + else if (du_npc_write) + just_branched = 1; + end // always @ (posedge `CPU_clk) + end + endgenerate + // synthesis translate_on + +endmodule // mor1kx_ctrl diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_dcache.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_dcache.v new file mode 100644 index 0000000..979c9bc --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_dcache.v @@ -0,0 +1,690 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Data cache implementation + + Copyright (C) 2012-2013 + Stefan Kristiansson + Stefan Wallentowitz + + ******************************************************************************/ + +`include "mor1kx-defines.v" + +module mor1kx_dcache + #( + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_DCACHE_BLOCK_WIDTH = 5, + parameter OPTION_DCACHE_SET_WIDTH = 9, + parameter OPTION_DCACHE_WAYS = 2, + parameter OPTION_DCACHE_LIMIT_WIDTH = 32, + parameter OPTION_DCACHE_SNOOP = "NONE" + ) + ( + input clk, + input rst, + + input dc_dbus_err_i, + input dc_enable_i, + input dc_access_i, + output refill_o, + output refill_req_o, + output refill_done_o, + output cache_hit_o, + + // CPU Interface + output cpu_err_o, + output cpu_ack_o, + output reg [OPTION_OPERAND_WIDTH-1:0] cpu_dat_o, + input [OPTION_OPERAND_WIDTH-1:0] cpu_dat_i, + input [OPTION_OPERAND_WIDTH-1:0] cpu_adr_i, + input [OPTION_OPERAND_WIDTH-1:0] cpu_adr_match_i, + input cpu_req_i, + input cpu_we_i, + input [3:0] cpu_bsel_i, + + input refill_allowed, + + input [OPTION_OPERAND_WIDTH-1:0] wradr_i, + input [OPTION_OPERAND_WIDTH-1:0] wrdat_i, + input we_i, + + // Snoop address + input [31:0] snoop_adr_i, + // Snoop event in this cycle + input snoop_valid_i, + // Whether the snoop hit. If so, there will be no tag memory write + // this cycle. The LSU may need to stall the pipeline. + output snoop_hit_o, + + + // SPR interface + input [15:0] spr_bus_addr_i, + input spr_bus_we_i, + input spr_bus_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_i, + + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o, + output spr_bus_ack_o + ); + + // States + localparam IDLE = 5'b00001; + localparam READ = 5'b00010; + localparam WRITE = 5'b00100; + localparam REFILL = 5'b01000; + localparam INVALIDATE = 5'b10000; + + // Address space in bytes for a way + localparam WAY_WIDTH = OPTION_DCACHE_BLOCK_WIDTH + OPTION_DCACHE_SET_WIDTH; + /* + * Tag memory layout + * +---------------------------------------------------------+ + * (index) -> | LRU | wayN valid | wayN tag |...| way0 valid | way0 tag | + * +---------------------------------------------------------+ + */ + + // The tag is the part left of the index + localparam TAG_WIDTH = (OPTION_DCACHE_LIMIT_WIDTH - WAY_WIDTH); + + // The tag memory contains entries with OPTION_DCACHE_WAYS parts of + // each TAGMEM_WAY_WIDTH. Each of those is tag and a valid flag. + localparam TAGMEM_WAY_WIDTH = TAG_WIDTH + 1; + localparam TAGMEM_WAY_VALID = TAGMEM_WAY_WIDTH - 1; + + // Additionally, the tag memory entry contains an LRU value. The + // width of this is 0 for OPTION_DCACHE_LIMIT_WIDTH==1 + localparam TAG_LRU_WIDTH = OPTION_DCACHE_WAYS*(OPTION_DCACHE_WAYS-1) >> 1; + + // We have signals for the LRU which are not used for one way + // caches. To avoid signal width [-1:0] this generates [0:0] + // vectors for them, which are removed automatically then. + localparam TAG_LRU_WIDTH_BITS = (OPTION_DCACHE_WAYS >= 2) ? TAG_LRU_WIDTH : 1; + + // Compute the total sum of the entry elements + localparam TAGMEM_WIDTH = TAGMEM_WAY_WIDTH * OPTION_DCACHE_WAYS + TAG_LRU_WIDTH; + + // For convenience we define the position of the LRU in the tag + // memory entries + localparam TAG_LRU_MSB = TAGMEM_WIDTH - 1; + localparam TAG_LRU_LSB = TAG_LRU_MSB - TAG_LRU_WIDTH + 1; + + // FSM state signals + reg [4:0] state; + wire read; + wire write; + wire refill; + + reg [WAY_WIDTH-1:OPTION_DCACHE_BLOCK_WIDTH] invalidate_adr; + wire [31:0] next_refill_adr; + reg [31:0] way_wr_dat; + wire refill_done; + wire refill_hit; + reg [(1<<(OPTION_DCACHE_BLOCK_WIDTH-2))-1:0] refill_valid; + reg [(1<<(OPTION_DCACHE_BLOCK_WIDTH-2))-1:0] refill_valid_r; + wire invalidate; + + // The index we read and write from tag memory + wire [OPTION_DCACHE_SET_WIDTH-1:0] tag_rindex; + reg [OPTION_DCACHE_SET_WIDTH-1:0] tag_windex; + + // The data from the tag memory + wire [TAGMEM_WIDTH-1:0] tag_dout; + wire [TAG_LRU_WIDTH_BITS-1:0] tag_lru_out; + wire [TAGMEM_WAY_WIDTH-1:0] tag_way_out [OPTION_DCACHE_WAYS-1:0]; + + // The data to the tag memory + wire [TAGMEM_WIDTH-1:0] tag_din; + reg [TAG_LRU_WIDTH_BITS-1:0] tag_lru_in; + reg [TAGMEM_WAY_WIDTH-1:0] tag_way_in [OPTION_DCACHE_WAYS-1:0]; + + reg [TAGMEM_WAY_WIDTH-1:0] tag_way_save[OPTION_DCACHE_WAYS-1:0]; + + // Whether to write to the tag memory in this cycle + reg tag_we; + + // This is the tag we need to write to the tag memory during refill + wire [TAG_WIDTH-1:0] tag_wtag; + + // This is the tag we check against + wire [TAG_WIDTH-1:0] tag_tag; + + // Access to the way memories + wire [WAY_WIDTH-3:0] way_raddr[OPTION_DCACHE_WAYS-1:0]; + wire [WAY_WIDTH-3:0] way_waddr[OPTION_DCACHE_WAYS-1:0]; + wire [OPTION_OPERAND_WIDTH-1:0] way_din[OPTION_DCACHE_WAYS-1:0]; + wire [OPTION_OPERAND_WIDTH-1:0] way_dout[OPTION_DCACHE_WAYS-1:0]; + reg [OPTION_DCACHE_WAYS-1:0] way_we; + + // Does any way hit? + wire hit; + wire [OPTION_DCACHE_WAYS-1:0] way_hit; + + // This is the least recently used value before access the memory. + // Those are one hot encoded. + wire [OPTION_DCACHE_WAYS-1:0] lru; + + // Register that stores the LRU value from lru + reg [OPTION_DCACHE_WAYS-1:0] tag_save_lru; + + // The access vector to update the LRU history is the way that has + // a hit or is refilled. It is also one-hot encoded. + reg [OPTION_DCACHE_WAYS-1:0] access; + + // The current LRU history as read from tag memory and the update + // value after we accessed it to write back to tag memory. + wire [TAG_LRU_WIDTH_BITS-1:0] current_lru_history; + wire [TAG_LRU_WIDTH_BITS-1:0] next_lru_history; + + // Intermediate signals to ease debugging + wire [TAG_WIDTH-1:0] check_way_tag [OPTION_DCACHE_WAYS-1:0]; + wire check_way_match [OPTION_DCACHE_WAYS-1:0]; + wire check_way_valid [OPTION_DCACHE_WAYS-1:0]; + + reg write_pending; + + // Extract index to read from snooped address + wire [OPTION_DCACHE_SET_WIDTH-1:0] snoop_index; + assign snoop_index = snoop_adr_i[WAY_WIDTH-1:OPTION_DCACHE_BLOCK_WIDTH]; + + // Register that is high one cycle after the actual snoop event to + // drive the comparison + reg snoop_check; + // Register that stores the tag for one cycle + reg [TAG_WIDTH-1:0] snoop_tag; + // Also store the index for one cycle, for the succeeding write access + reg [OPTION_DCACHE_SET_WIDTH-1:0] snoop_windex; + + // Snoop tag memory interface + // Data out of tag memory + wire [TAGMEM_WIDTH-1:0] snoop_dout; + // Each ways information in the tag memory + wire [TAGMEM_WAY_WIDTH-1:0] snoop_way_out [OPTION_DCACHE_WAYS-1:0]; + // Each ways tag in the tag memory + wire [TAG_WIDTH-1:0] snoop_check_way_tag [OPTION_DCACHE_WAYS-1:0]; + // Whether the tag matches the snoop tag + wire snoop_check_way_match [OPTION_DCACHE_WAYS-1:0]; + // Whether the tag is valid + wire snoop_check_way_valid [OPTION_DCACHE_WAYS-1:0]; + // Whether the way hits + wire [OPTION_DCACHE_WAYS-1:0] snoop_way_hit; + // Whether any way hits + wire snoop_hit; + + assign snoop_hit_o = (OPTION_DCACHE_SNOOP != "NONE") ? snoop_hit : 0; + + genvar i; + + assign cpu_ack_o = ((read | refill) & hit & !write_pending | + refill_hit) & cpu_req_i & !snoop_hit; + + assign tag_rindex = cpu_adr_i[WAY_WIDTH-1:OPTION_DCACHE_BLOCK_WIDTH]; + + assign tag_tag = cpu_adr_match_i[OPTION_DCACHE_LIMIT_WIDTH-1:WAY_WIDTH]; + assign tag_wtag = wradr_i[OPTION_DCACHE_LIMIT_WIDTH-1:WAY_WIDTH]; + + generate + if (OPTION_DCACHE_WAYS >= 2) begin + // Multiplex the LRU history from and to tag memory + assign current_lru_history = tag_dout[TAG_LRU_MSB:TAG_LRU_LSB]; + assign tag_din[TAG_LRU_MSB:TAG_LRU_LSB] = tag_lru_in; + assign tag_lru_out = tag_dout[TAG_LRU_MSB:TAG_LRU_LSB]; + end + + for (i = 0; i < OPTION_DCACHE_WAYS; i=i+1) begin : ways + assign way_raddr[i] = cpu_adr_i[WAY_WIDTH-1:2]; + assign way_waddr[i] = write ? cpu_adr_match_i[WAY_WIDTH-1:2] : + wradr_i[WAY_WIDTH-1:2]; + assign way_din[i] = way_wr_dat; + + // compare stored tag with incoming tag and check valid bit + assign check_way_tag[i] = tag_way_out[i][TAG_WIDTH-1:0]; + assign check_way_match[i] = (check_way_tag[i] == tag_tag); + assign check_way_valid[i] = tag_way_out[i][TAGMEM_WAY_VALID]; + + assign way_hit[i] = check_way_valid[i] & check_way_match[i]; + + // Multiplex the way entries in the tag memory + assign tag_din[(i+1)*TAGMEM_WAY_WIDTH-1:i*TAGMEM_WAY_WIDTH] = tag_way_in[i]; + assign tag_way_out[i] = tag_dout[(i+1)*TAGMEM_WAY_WIDTH-1:i*TAGMEM_WAY_WIDTH]; + + if (OPTION_DCACHE_SNOOP != "NONE") begin + // The same for the snoop tag memory + assign snoop_way_out[i] = snoop_dout[(i+1)*TAGMEM_WAY_WIDTH-1:i*TAGMEM_WAY_WIDTH]; + + assign snoop_check_way_tag[i] = snoop_way_out[i][TAG_WIDTH-1:0]; + assign snoop_check_way_match[i] = (snoop_check_way_tag[i] == snoop_tag); + assign snoop_check_way_valid[i] = snoop_way_out[i][TAGMEM_WAY_VALID]; + + assign snoop_way_hit[i] = snoop_check_way_valid[i] & snoop_check_way_match[i]; + end + end + endgenerate + + assign hit = |way_hit; + assign cache_hit_o = hit; + + assign snoop_hit = (OPTION_DCACHE_SNOOP != "NONE") & + |snoop_way_hit & snoop_check; + + integer w0; + always @(*) begin + cpu_dat_o = {OPTION_OPERAND_WIDTH{1'bx}}; + + // Put correct way on the data port + for (w0 = 0; w0 < OPTION_DCACHE_WAYS; w0 = w0 + 1) begin + if (way_hit[w0] | (refill_hit & tag_save_lru[w0])) begin + cpu_dat_o = way_dout[w0]; + end + end + end + + assign next_refill_adr = (OPTION_DCACHE_BLOCK_WIDTH == 5) ? + {wradr_i[31:5], wradr_i[4:0] + 5'd4} : // 32 byte + {wradr_i[31:4], wradr_i[3:0] + 4'd4}; // 16 byte + + assign refill_done_o = refill_done; + assign refill_done = refill_valid[next_refill_adr[OPTION_DCACHE_BLOCK_WIDTH-1:2]]; + assign refill_hit = refill_valid_r[cpu_adr_match_i[OPTION_DCACHE_BLOCK_WIDTH-1:2]] & + cpu_adr_match_i[OPTION_DCACHE_LIMIT_WIDTH-1: + OPTION_DCACHE_BLOCK_WIDTH] == + wradr_i[OPTION_DCACHE_LIMIT_WIDTH-1: + OPTION_DCACHE_BLOCK_WIDTH] & + refill & !write_pending; + + assign refill = (state == REFILL); + assign read = (state == READ); + assign write = (state == WRITE); + + assign refill_o = refill; + + assign refill_req_o = read & cpu_req_i & !hit & !write_pending & refill_allowed | refill; + + /* + * SPR bus interface + */ + + // The SPR interface is used to invalidate the cache blocks. When + // an invalidation is started, the respective entry in the tag + // memory is cleared. When another transfer is in progress, the + // handling is delayed until it is possible to serve it. + // + // The invalidation is acknowledged to the SPR bus, but the cycle + // is terminated by the core. We therefore need to hold the + // invalidate acknowledgement. Meanwhile we continuously write the + // tag memory which is no problem. + + // Net that signals an acknowledgement + reg invalidate_ack; + + // An invalidate request is either a block flush or a block invalidate + assign invalidate = spr_bus_stb_i & spr_bus_we_i & + (spr_bus_addr_i == `OR1K_SPR_DCBFR_ADDR | + spr_bus_addr_i == `OR1K_SPR_DCBIR_ADDR); + + // Acknowledge to the SPR bus. + assign spr_bus_ack_o = invalidate_ack; + + /* + * Cache FSM + * Starts in IDLE. + * State changes between READ and WRITE happens cpu_we_i is asserted or not. + * cpu_we_i is in sync with cpu_adr_i, so that means that it's the + * *upcoming* write that it is indicating. It only toggles for one cycle, + * so if we are busy doing something else when this signal comes + * (i.e. refilling) we assert the write_pending signal. + * cpu_req_i is in sync with cpu_adr_match_i, so it can be used to + * determined if a cache hit should cause a refill or if a write should + * really be executed. + */ + integer w1; + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) begin + state <= IDLE; + write_pending <= 0; + end else if(dc_dbus_err_i) begin + state <= IDLE; + write_pending <= 0; + end else begin + if (cpu_we_i) + write_pending <= 1; + else if (!cpu_req_i) + write_pending <= 0; + + refill_valid_r <= refill_valid; + + if (snoop_valid_i) begin + // + // If there is a snoop event, we need to store this + // information. This happens independent of whether we + // have a snoop tag memory or not. + // + snoop_check <= 1; + snoop_windex <= snoop_index; + snoop_tag <= snoop_adr_i[OPTION_DCACHE_LIMIT_WIDTH-1:WAY_WIDTH]; + end else begin + snoop_check <= 0; + end + + case (state) + IDLE: begin + if (invalidate) begin + // If there is an invalidation request + // + // Store address in invalidate_adr that is muxed to the tag + // memory write address + invalidate_adr <= spr_bus_dat_i[WAY_WIDTH-1:OPTION_DCACHE_BLOCK_WIDTH]; + + // Change to invalidate state that actually accesses + // the tag memory + state <= INVALIDATE; + end else if (cpu_we_i | write_pending) + state <= WRITE; + else if (cpu_req_i) + state <= READ; + end + + READ: begin + if (dc_access_i | cpu_we_i & dc_enable_i) begin + if (!hit & cpu_req_i & !write_pending & refill_allowed) begin + refill_valid <= 0; + refill_valid_r <= 0; + + // Store the LRU information for correct replacement + // on refill. Always one when only one way. + tag_save_lru <= (OPTION_DCACHE_WAYS==1) | lru; + + for (w1 = 0; w1 < OPTION_DCACHE_WAYS; w1 = w1 + 1) begin + tag_way_save[w1] <= tag_way_out[w1]; + end + + state <= REFILL; + end else if (cpu_we_i | write_pending) begin + state <= WRITE; + end else if (invalidate) begin + state <= IDLE; + end + end else if (!dc_enable_i | invalidate) begin + state <= IDLE; + end + end + + REFILL: begin + if (we_i) begin + refill_valid[wradr_i[OPTION_DCACHE_BLOCK_WIDTH-1:2]] <= 1; + + if (refill_done) + state <= IDLE; + end + // Abort refill on snoop-hit + // TODO: only abort on snoop-hits to refill address + if (snoop_hit) begin + refill_valid <= 0; + refill_valid_r <= 0; + state <= IDLE; + end + end + + WRITE: begin + if ((!dc_access_i | !cpu_req_i | !cpu_we_i) & !snoop_hit) begin + write_pending <= 0; + state <= READ; + end + end + + INVALIDATE: begin + if (invalidate) begin + // Store address in invalidate_adr that is muxed to the tag + // memory write address + invalidate_adr <= spr_bus_dat_i[WAY_WIDTH-1:OPTION_DCACHE_BLOCK_WIDTH]; + + state <= INVALIDATE; + end else begin + state <= IDLE; + end + end + + default: + state <= IDLE; + endcase + end + end + + // + // This is the combinational part of the state machine that + // interfaces the tag and way memories. + // + integer w2; + always @(*) begin + // Default is to keep data, don't write and don't access + tag_lru_in = tag_lru_out; + for (w2 = 0; w2 < OPTION_DCACHE_WAYS; w2 = w2 + 1) begin + tag_way_in[w2] = tag_way_out[w2]; + end + + tag_we = 1'b0; + way_we = {(OPTION_DCACHE_WAYS){1'b0}}; + + access = {(OPTION_DCACHE_WAYS){1'b0}}; + + way_wr_dat = wrdat_i; + + // The default is (of course) not to acknowledge the invalidate + invalidate_ack = 1'b0; + + if (snoop_hit) begin + // This is the write access + tag_we = 1'b1; + tag_windex = snoop_windex; + for (w2 = 0; w2 < OPTION_DCACHE_WAYS; w2 = w2 + 1) begin + if (snoop_way_hit[w2]) begin + tag_way_in[w2] = 0; + end else begin + tag_way_in[w2] = snoop_way_out[w2]; + end + end + end else begin + // + // The tag mem is written during reads and writes to write + // the lru info and during refill and invalidate. + // + tag_windex = read | write ? + cpu_adr_match_i[WAY_WIDTH-1:OPTION_DCACHE_BLOCK_WIDTH] : + (state == INVALIDATE) ? invalidate_adr : + wradr_i[WAY_WIDTH-1:OPTION_DCACHE_BLOCK_WIDTH]; + + case (state) + IDLE: begin + // + // When idle we can always acknowledge the invalidate as it + // has the highest priority in handling. When something is + // changed on the state machine handling above this needs + // to be changed. + // + invalidate_ack = 1'b1; + end + + READ: begin + if (hit) begin + // + // We got a hit. The LRU module gets the access + // information. Depending on this we update the LRU + // history in the tag. + // + access = way_hit; + + // This is the updated LRU history after hit + tag_lru_in = next_lru_history; + + tag_we = 1'b1; + end + end + + WRITE: begin + way_wr_dat = cpu_dat_i; + if (hit & cpu_req_i) begin + /* Mux cache output with write data */ + if (!cpu_bsel_i[3]) + way_wr_dat[31:24] = cpu_dat_o[31:24]; + if (!cpu_bsel_i[2]) + way_wr_dat[23:16] = cpu_dat_o[23:16]; + if (!cpu_bsel_i[1]) + way_wr_dat[15:8] = cpu_dat_o[15:8]; + if (!cpu_bsel_i[0]) + way_wr_dat[7:0] = cpu_dat_o[7:0]; + + way_we = way_hit; + + tag_lru_in = next_lru_history; + + tag_we = 1'b1; + end + end + + REFILL: begin + if (we_i) begin + // + // Write the data to the way that is replaced (which is + // the LRU) + // + way_we = tag_save_lru; + + // Access pattern + access = tag_save_lru; + + /* Invalidate the way on the first write */ + if (refill_valid == 0) begin + for (w2 = 0; w2 < OPTION_DCACHE_WAYS; w2 = w2 + 1) begin + if (tag_save_lru[w2]) begin + tag_way_in[w2][TAGMEM_WAY_VALID] = 1'b0; + end + end + + tag_we = 1'b1; + end + + // + // After refill update the tag memory entry of the + // filled way with the LRU history, the tag and set + // valid to 1. + // + if (refill_done) begin + for (w2 = 0; w2 < OPTION_DCACHE_WAYS; w2 = w2 + 1) begin + tag_way_in[w2] = tag_way_save[w2]; + if (tag_save_lru[w2]) begin + tag_way_in[w2] = { 1'b1, tag_wtag }; + end + end + tag_lru_in = next_lru_history; + + tag_we = 1'b1; + end + end + end + + INVALIDATE: begin + invalidate_ack = 1'b1; + + // Lazy invalidation, invalidate everything that matches tag address + tag_lru_in = 0; + for (w2 = 0; w2 < OPTION_DCACHE_WAYS; w2 = w2 + 1) begin + tag_way_in[w2] = 0; + end + + tag_we = 1'b1; + end + + default: begin + end + endcase + end + end + + generate + for (i = 0; i < OPTION_DCACHE_WAYS; i=i+1) begin : way_memories + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH(WAY_WIDTH-2), + .DATA_WIDTH(OPTION_OPERAND_WIDTH), + .ENABLE_BYPASS(1) + ) + way_data_ram + ( + // Outputs + .dout (way_dout[i]), + // Inputs + .clk (clk), + .raddr (way_raddr[i][WAY_WIDTH-3:0]), + .re (1'b1), + .waddr (way_waddr[i][WAY_WIDTH-3:0]), + .we (way_we[i]), + .din (way_din[i][31:0])); + + end + + if (OPTION_DCACHE_WAYS >= 2) begin : gen_u_lru + /* mor1kx_cache_lru AUTO_TEMPLATE( + .current (current_lru_history), + .update (next_lru_history), + .lru_pre (lru), + .lru_post (), + .access (access), + ); */ + + mor1kx_cache_lru + #(.NUMWAYS(OPTION_DCACHE_WAYS)) + u_lru(/*AUTOINST*/ + // Outputs + .update (next_lru_history), // Templated + .lru_pre (lru), // Templated + .lru_post (), // Templated + // Inputs + .current (current_lru_history), // Templated + .access (access)); // Templated + end // if (OPTION_DCACHE_WAYS >= 2) + endgenerate + + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH(OPTION_DCACHE_SET_WIDTH), + .DATA_WIDTH(TAGMEM_WIDTH), + .ENABLE_BYPASS(OPTION_DCACHE_SNOOP != "NONE") + ) + tag_ram + ( + // Outputs + .dout (tag_dout[TAGMEM_WIDTH-1:0]), + // Inputs + .clk (clk), + .raddr (tag_rindex), + .re (1'b1), + .waddr (tag_windex), + .we (tag_we), + .din (tag_din)); + +generate +if (OPTION_DCACHE_SNOOP != "NONE") begin + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH(OPTION_DCACHE_SET_WIDTH), + .DATA_WIDTH(TAGMEM_WIDTH), + .ENABLE_BYPASS(1) + ) + snoop_tag_ram + ( + // Outputs + .dout (snoop_dout[TAGMEM_WIDTH-1:0]), + // Inputs + .clk (clk), + .raddr (snoop_index), + .re (1'b1), + .waddr (tag_windex), + .we (tag_we), + .din (tag_din)); +end +endgenerate + +endmodule diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_decode.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_decode.v new file mode 100644 index 0000000..c2f5674 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_decode.v @@ -0,0 +1,540 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx decode unit + + Completely combinatorial. + + Outputs: + - ALU operation + - indication of other type of op - LSU/SPR + - immediates + - register file addresses + - exception decodes: illegal, system call + + Copyright (C) 2012 Julius Baxter + Copyright (C) 2013 Stefan Kristiansson + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_decode + #( + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + parameter OPTION_RF_ADDR_WIDTH = 5, + + parameter FEATURE_SYSCALL = "ENABLED", + parameter FEATURE_TRAP = "ENABLED", + parameter FEATURE_RANGE = "ENABLED", + parameter FEATURE_MAC = "NONE", + parameter FEATURE_MULTIPLIER = "PARALLEL", + parameter FEATURE_DIVIDER = "NONE", + + parameter FEATURE_ADDC = "NONE", + parameter FEATURE_SRA = "ENABLED", + parameter FEATURE_ROR = "NONE", + parameter FEATURE_EXT = "NONE", + parameter FEATURE_CMOV = "NONE", + parameter FEATURE_FFL1 = "NONE", + parameter FEATURE_ATOMIC = "ENABLED", + parameter FEATURE_MSYNC = "ENABLED", + parameter FEATURE_PSYNC = "NONE", + parameter FEATURE_CSYNC = "NONE", + + parameter FEATURE_FPU = "NONE", // ENABLED|NONE + + parameter FEATURE_CUST1 = "NONE", + parameter FEATURE_CUST2 = "NONE", + parameter FEATURE_CUST3 = "NONE", + parameter FEATURE_CUST4 = "NONE", + parameter FEATURE_CUST5 = "NONE", + parameter FEATURE_CUST6 = "NONE", + parameter FEATURE_CUST7 = "NONE", + parameter FEATURE_CUST8 = "NONE" + ) + ( + input clk, + input rst, + + // input from fetch stage + input [`OR1K_INSN_WIDTH-1:0] decode_insn_i, + + // ALU opcodes + output [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_o, + output [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_secondary_o, + + output [`OR1K_IMM_WIDTH-1:0] decode_imm16_o, + output [OPTION_OPERAND_WIDTH-1:0] decode_immediate_o, + output decode_immediate_sel_o, + + // Upper 10 bits of immediate for jumps and branches + output [9:0] decode_immjbr_upper_o, + + // GPR numbers + output [OPTION_RF_ADDR_WIDTH-1:0] decode_rfd_adr_o, + output [OPTION_RF_ADDR_WIDTH-1:0] decode_rfa_adr_o, + output [OPTION_RF_ADDR_WIDTH-1:0] decode_rfb_adr_o, + + output decode_rf_wb_o, + + output decode_op_jbr_o, + output decode_op_jr_o, + output decode_op_jal_o, + output decode_op_bf_o, + output decode_op_bnf_o, + output decode_op_brcond_o, + output decode_op_branch_o, + + output decode_op_alu_o, + + output decode_op_lsu_load_o, + output decode_op_lsu_store_o, + output decode_op_lsu_atomic_o, + output reg [1:0] decode_lsu_length_o, + output decode_lsu_zext_o, + + output decode_op_mfspr_o, + output decode_op_mtspr_o, + + output decode_op_rfe_o, + output decode_op_setflag_o, + output decode_op_add_o, + output decode_op_mul_o, + output decode_op_mul_signed_o, + output decode_op_mul_unsigned_o, + output decode_op_div_o, + output decode_op_div_signed_o, + output decode_op_div_unsigned_o, + output decode_op_shift_o, + output decode_op_ffl1_o, + output decode_op_movhi_o, + output decode_op_ext_o, + + // Sync operations + output decode_op_msync_o, + output [`OR1K_FPUOP_WIDTH-1:0] decode_op_fpu_o, + + + // Adder control logic + output decode_adder_do_sub_o, + output decode_adder_do_carry_o, + + // exception output - + output reg decode_except_illegal_o, + output decode_except_syscall_o, + output decode_except_trap_o, + + output [`OR1K_OPCODE_WIDTH-1:0] decode_opc_insn_o + ); + + wire [`OR1K_OPCODE_WIDTH-1:0] opc_insn; + wire [`OR1K_ALU_OPC_WIDTH-1:0] opc_alu; + + wire [OPTION_OPERAND_WIDTH-1:0] imm_sext; + wire imm_sext_sel; + wire [OPTION_OPERAND_WIDTH-1:0] imm_zext; + wire imm_zext_sel; + wire [OPTION_OPERAND_WIDTH-1:0] imm_high; + wire imm_high_sel; + + wire decode_except_ibus_align; + + // Insn opcode + assign opc_insn = decode_insn_i[`OR1K_OPCODE_SELECT]; + assign decode_opc_insn_o = opc_insn; + + // load opcodes are 6'b10_0000 to 6'b10_0110, 0 to 6, so check for 7 and up + assign decode_op_lsu_load_o = (decode_insn_i[31:30] == 2'b10) & + !(&decode_insn_i[28:26]) & + !decode_insn_i[29] || + ((opc_insn == `OR1K_OPCODE_LWA) & + (FEATURE_ATOMIC!="NONE")); + + // Detect when instruction is store + assign decode_op_lsu_store_o = (opc_insn == `OR1K_OPCODE_SW) || + (opc_insn == `OR1K_OPCODE_SB) || + (opc_insn == `OR1K_OPCODE_SH) || + ((opc_insn == `OR1K_OPCODE_SWA) & + (FEATURE_ATOMIC!="NONE")); + + assign decode_op_lsu_atomic_o = ((opc_insn == `OR1K_OPCODE_LWA) || + (opc_insn == `OR1K_OPCODE_SWA)) & + (FEATURE_ATOMIC!="NONE"); + + // Decode length of load/store operation + always @(*) + case (opc_insn) + `OR1K_OPCODE_SB, + `OR1K_OPCODE_LBZ, + `OR1K_OPCODE_LBS: + decode_lsu_length_o = 2'b00; + + `OR1K_OPCODE_SH, + `OR1K_OPCODE_LHZ, + `OR1K_OPCODE_LHS: + decode_lsu_length_o = 2'b01; + + `OR1K_OPCODE_SW, + `OR1K_OPCODE_SWA, + `OR1K_OPCODE_LWZ, + `OR1K_OPCODE_LWS, + `OR1K_OPCODE_LWA: + decode_lsu_length_o = 2'b10; + + default: + decode_lsu_length_o = 2'b10; + endcase + + assign decode_lsu_zext_o = opc_insn[0]; + + assign decode_op_msync_o = FEATURE_MSYNC!="NONE" && + opc_insn == `OR1K_OPCODE_SYSTRAPSYNC && + decode_insn_i[`OR1K_SYSTRAPSYNC_OPC_SELECT] == + `OR1K_SYSTRAPSYNC_OPC_MSYNC; + + assign decode_op_mtspr_o = opc_insn == `OR1K_OPCODE_MTSPR; + + // Detect when setflag instruction + assign decode_op_setflag_o = opc_insn == `OR1K_OPCODE_SF || + opc_insn == `OR1K_OPCODE_SFIMM; + + assign decode_op_alu_o = opc_insn == `OR1K_OPCODE_ALU || + opc_insn == `OR1K_OPCODE_ORI || + opc_insn == `OR1K_OPCODE_ANDI || + opc_insn == `OR1K_OPCODE_XORI; + + // Bottom 4 opcodes branch against an immediate + assign decode_op_jbr_o = opc_insn < `OR1K_OPCODE_NOP; + + assign decode_op_jr_o = opc_insn == `OR1K_OPCODE_JR | + opc_insn == `OR1K_OPCODE_JALR; + + assign decode_op_jal_o = opc_insn == `OR1K_OPCODE_JALR | + opc_insn == `OR1K_OPCODE_JAL; + + assign decode_op_bf_o = opc_insn == `OR1K_OPCODE_BF; + assign decode_op_bnf_o = opc_insn == `OR1K_OPCODE_BNF; + assign decode_op_brcond_o = decode_op_bf_o | decode_op_bnf_o; + + // All branch instructions combined + assign decode_op_branch_o = decode_op_jbr_o | + decode_op_jr_o | + decode_op_jal_o; + + assign decode_op_mfspr_o = opc_insn == `OR1K_OPCODE_MFSPR; + + assign decode_op_rfe_o = opc_insn == `OR1K_OPCODE_RFE; + + assign decode_op_add_o = (opc_insn == `OR1K_OPCODE_ALU && + (opc_alu == `OR1K_ALU_OPC_ADDC || + opc_alu == `OR1K_ALU_OPC_ADD || + opc_alu == `OR1K_ALU_OPC_SUB)) || + opc_insn == `OR1K_OPCODE_ADDIC || + opc_insn == `OR1K_OPCODE_ADDI; + + assign decode_op_mul_signed_o = (opc_insn == `OR1K_OPCODE_ALU && + opc_alu == `OR1K_ALU_OPC_MUL) || + opc_insn == `OR1K_OPCODE_MULI; + + assign decode_op_mul_unsigned_o = opc_insn == `OR1K_OPCODE_ALU && + opc_alu == `OR1K_ALU_OPC_MULU; + + assign decode_op_mul_o = decode_op_mul_signed_o | decode_op_mul_unsigned_o; + + assign decode_op_div_signed_o = opc_insn == `OR1K_OPCODE_ALU && + opc_alu == `OR1K_ALU_OPC_DIV; + + assign decode_op_div_unsigned_o = opc_insn == `OR1K_OPCODE_ALU && + opc_alu == `OR1K_ALU_OPC_DIVU; + + assign decode_op_div_o = decode_op_div_signed_o | decode_op_div_unsigned_o; + + assign decode_op_shift_o = opc_insn == `OR1K_OPCODE_ALU && + opc_alu == `OR1K_ALU_OPC_SHRT || + opc_insn == `OR1K_OPCODE_SHRTI; + + /* check bit 9 to verify valid l.fl1/l.ff1 instruction */ + assign decode_op_ffl1_o = opc_insn == `OR1K_OPCODE_ALU && + (decode_insn_i[9:8] == 2'b00 || decode_insn_i[9:8] == 2'b01) && + opc_alu == `OR1K_ALU_OPC_FFL1; + + assign decode_op_movhi_o = opc_insn == `OR1K_OPCODE_MOVHI; + + assign decode_op_ext_o = opc_insn == `OR1K_OPCODE_ALU && + (opc_alu == `OR1K_ALU_OPC_EXTBH || + opc_alu == `OR1K_ALU_OPC_EXTW) && + (FEATURE_EXT!="NONE"); + + // FPU related + generate + /* verilator lint_off WIDTH */ + if (FEATURE_FPU!="NONE") begin : fpu_decode_ena + /* verilator lint_on WIDTH */ + // Only single precision FP-instructions are supported + assign decode_op_fpu_o = { ((opc_insn == `OR1K_OPCODE_FPU) & + ~decode_insn_i[`OR1K_FPUOP_DOUBLE_BIT]), + decode_insn_i[`OR1K_FPUOP_WIDTH-2:0] }; + end + else begin : fpu_decode_none + assign decode_op_fpu_o = {`OR1K_FPUOP_WIDTH{1'b0}}; + end + endgenerate // FPU related + + // Which instructions cause writeback? + assign decode_rf_wb_o = (opc_insn == `OR1K_OPCODE_JAL | + opc_insn == `OR1K_OPCODE_MOVHI | + opc_insn == `OR1K_OPCODE_JALR | + opc_insn == `OR1K_OPCODE_LWA) | + // All '10????' opcodes except l.sfxxi + (decode_insn_i[31:30] == 2'b10 & + !(opc_insn == `OR1K_OPCODE_SFIMM)) | + // All '11????' opcodes except l.sfxx and l.mtspr and lf.sfxx.s + (decode_insn_i[31:30] == 2'b11 & + !(opc_insn == `OR1K_OPCODE_SF | + decode_op_mtspr_o | decode_op_lsu_store_o) & + !(decode_op_fpu_o[`OR1K_FPUOP_WIDTH-1] & decode_insn_i[3])); + + // Register file addresses + assign decode_rfa_adr_o = decode_insn_i[`OR1K_RA_SELECT]; + assign decode_rfb_adr_o = decode_insn_i[`OR1K_RB_SELECT]; + + assign decode_rfd_adr_o = decode_op_jal_o ? 5'd9 : + decode_insn_i[`OR1K_RD_SELECT]; + + // Immediate in l.mtspr is broken up, reassemble + assign decode_imm16_o = (decode_op_mtspr_o | decode_op_lsu_store_o) ? + {decode_insn_i[25:21],decode_insn_i[10:0]} : + decode_insn_i[`OR1K_IMM_SELECT]; + + + // Upper 10 bits for jump/branch instructions + assign decode_immjbr_upper_o = decode_insn_i[25:16]; + + assign imm_sext = {{16{decode_imm16_o[15]}}, decode_imm16_o[15:0]}; + assign imm_sext_sel = ((opc_insn[5:4] == 2'b10) & + ~(opc_insn == `OR1K_OPCODE_ORI) & + ~(opc_insn == `OR1K_OPCODE_ANDI)) | + (opc_insn == `OR1K_OPCODE_SWA) | + (opc_insn == `OR1K_OPCODE_LWA) | + (opc_insn == `OR1K_OPCODE_SW) | + (opc_insn == `OR1K_OPCODE_SH) | + (opc_insn == `OR1K_OPCODE_SB); + + assign imm_zext = {{16{1'b0}}, decode_imm16_o[15:0]}; + assign imm_zext_sel = ((opc_insn[5:4] == 2'b10) & + ((opc_insn == `OR1K_OPCODE_ORI) | + (opc_insn == `OR1K_OPCODE_ANDI))) | + (opc_insn == `OR1K_OPCODE_MTSPR); + + assign imm_high = {decode_imm16_o, 16'd0}; + assign imm_high_sel = decode_op_movhi_o; + + assign decode_immediate_o = imm_sext_sel ? imm_sext : + imm_zext_sel ? imm_zext : imm_high; + + assign decode_immediate_sel_o = imm_sext_sel | imm_zext_sel | imm_high_sel; + + // ALU opcode + assign opc_alu = decode_insn_i[`OR1K_ALU_OPC_SELECT]; + assign decode_opc_alu_o = opc_insn == `OR1K_OPCODE_ORI ? `OR1K_ALU_OPC_OR : + opc_insn == `OR1K_OPCODE_ANDI ? `OR1K_ALU_OPC_AND : + opc_insn == `OR1K_OPCODE_XORI ? `OR1K_ALU_OPC_XOR : + opc_alu; + + assign decode_opc_alu_secondary_o = decode_op_setflag_o ? + decode_insn_i[`OR1K_COMP_OPC_SELECT]: + {1'b0, + decode_insn_i[`OR1K_ALU_OPC_SECONDARY_SELECT]}; + + assign decode_except_syscall_o = opc_insn == `OR1K_OPCODE_SYSTRAPSYNC && + decode_insn_i[`OR1K_SYSTRAPSYNC_OPC_SELECT] == + `OR1K_SYSTRAPSYNC_OPC_SYSCALL; + + assign decode_except_trap_o = opc_insn == `OR1K_OPCODE_SYSTRAPSYNC && + decode_insn_i[`OR1K_SYSTRAPSYNC_OPC_SELECT] == + `OR1K_SYSTRAPSYNC_OPC_TRAP; + + // Illegal instruction decode + always @* + case (opc_insn) + `OR1K_OPCODE_J, + `OR1K_OPCODE_JAL, + `OR1K_OPCODE_BNF, + `OR1K_OPCODE_BF, + `OR1K_OPCODE_MOVHI, + `OR1K_OPCODE_RFE, + `OR1K_OPCODE_JR, + `OR1K_OPCODE_JALR, + `OR1K_OPCODE_LWZ, + `OR1K_OPCODE_LWS, + `OR1K_OPCODE_LBZ, + `OR1K_OPCODE_LBS, + `OR1K_OPCODE_LHZ, + `OR1K_OPCODE_LHS, + `OR1K_OPCODE_ADDI, + `OR1K_OPCODE_ANDI, + `OR1K_OPCODE_ORI, + `OR1K_OPCODE_XORI, + `OR1K_OPCODE_MFSPR, + /* + `OR1K_OPCODE_SLLI, + `OR1K_OPCODE_SRLI, + `OR1K_OPCODE_SRAI, + `OR1K_OPCODE_RORI, + */ + `OR1K_OPCODE_SFIMM, + `OR1K_OPCODE_MTSPR, + `OR1K_OPCODE_SW, + `OR1K_OPCODE_SB, + `OR1K_OPCODE_SH, + /* + `OR1K_OPCODE_SFEQ, + `OR1K_OPCODE_SFNE, + `OR1K_OPCODE_SFGTU, + `OR1K_OPCODE_SFGEU, + `OR1K_OPCODE_SFLTU, + `OR1K_OPCODE_SFLEU, + `OR1K_OPCODE_SFGTS, + `OR1K_OPCODE_SFGES, + `OR1K_OPCODE_SFLTS, + `OR1K_OPCODE_SFLES, + */ + `OR1K_OPCODE_SF, + `OR1K_OPCODE_NOP: + decode_except_illegal_o = 1'b0; + + `OR1K_OPCODE_SWA, + `OR1K_OPCODE_LWA: + decode_except_illegal_o = (FEATURE_ATOMIC=="NONE"); + + `OR1K_OPCODE_CUST1: + decode_except_illegal_o = (FEATURE_CUST1=="NONE"); + `OR1K_OPCODE_CUST2: + decode_except_illegal_o = (FEATURE_CUST2=="NONE"); + `OR1K_OPCODE_CUST3: + decode_except_illegal_o = (FEATURE_CUST3=="NONE"); + `OR1K_OPCODE_CUST4: + decode_except_illegal_o = (FEATURE_CUST4=="NONE"); + `OR1K_OPCODE_CUST5: + decode_except_illegal_o = (FEATURE_CUST5=="NONE"); + `OR1K_OPCODE_CUST6: + decode_except_illegal_o = (FEATURE_CUST6=="NONE"); + `OR1K_OPCODE_CUST7: + decode_except_illegal_o = (FEATURE_CUST7=="NONE"); + `OR1K_OPCODE_CUST8: + decode_except_illegal_o = (FEATURE_CUST8=="NONE"); + `OR1K_OPCODE_FPU: + decode_except_illegal_o = (FEATURE_FPU=="NONE") | + decode_insn_i[`OR1K_FPUOP_DOUBLE_BIT]; + + `OR1K_OPCODE_LD, + `OR1K_OPCODE_SD: + decode_except_illegal_o = !(OPTION_OPERAND_WIDTH==64); + + `OR1K_OPCODE_ADDIC: + decode_except_illegal_o = (FEATURE_ADDC=="NONE"); + + //`OR1K_OPCODE_MACRC, // Same as movhi - check! + `OR1K_OPCODE_MACI, + `OR1K_OPCODE_MAC: + decode_except_illegal_o = (FEATURE_MAC=="NONE"); + + `OR1K_OPCODE_MULI: + decode_except_illegal_o = (FEATURE_MULTIPLIER=="NONE"); + + `OR1K_OPCODE_SHRTI: + case(decode_insn_i[`OR1K_ALU_OPC_SECONDARY_SELECT]) + `OR1K_ALU_OPC_SECONDARY_SHRT_SLL, + `OR1K_ALU_OPC_SECONDARY_SHRT_SRL: + decode_except_illegal_o = 1'b0; + `OR1K_ALU_OPC_SECONDARY_SHRT_SRA: + decode_except_illegal_o = (FEATURE_SRA=="NONE"); + + `OR1K_ALU_OPC_SECONDARY_SHRT_ROR: + decode_except_illegal_o = (FEATURE_ROR=="NONE"); + default: + decode_except_illegal_o = 1'b1; + endcase // case (decode_insn_i[`OR1K_ALU_OPC_SECONDARY_SELECT]) + + `OR1K_OPCODE_ALU: + case(decode_insn_i[`OR1K_ALU_OPC_SELECT]) + `OR1K_ALU_OPC_ADD, + `OR1K_ALU_OPC_SUB, + `OR1K_ALU_OPC_OR, + `OR1K_ALU_OPC_XOR, + `OR1K_ALU_OPC_AND: + decode_except_illegal_o = 1'b0; + `OR1K_ALU_OPC_CMOV: + decode_except_illegal_o = (FEATURE_CMOV=="NONE"); + `OR1K_ALU_OPC_FFL1: + decode_except_illegal_o = (FEATURE_FFL1=="NONE"); + `OR1K_ALU_OPC_DIV, + `OR1K_ALU_OPC_DIVU: + decode_except_illegal_o = (FEATURE_DIVIDER=="NONE"); + `OR1K_ALU_OPC_ADDC: + decode_except_illegal_o = (FEATURE_ADDC=="NONE"); + `OR1K_ALU_OPC_MUL, + `OR1K_ALU_OPC_MULU: + decode_except_illegal_o = (FEATURE_MULTIPLIER=="NONE"); + `OR1K_ALU_OPC_EXTBH, + `OR1K_ALU_OPC_EXTW: + decode_except_illegal_o = (FEATURE_EXT=="NONE"); + `OR1K_ALU_OPC_SHRT: + case(decode_insn_i[`OR1K_ALU_OPC_SECONDARY_SELECT]) + `OR1K_ALU_OPC_SECONDARY_SHRT_SLL, + `OR1K_ALU_OPC_SECONDARY_SHRT_SRL: + decode_except_illegal_o = 1'b0; + `OR1K_ALU_OPC_SECONDARY_SHRT_SRA: + decode_except_illegal_o = (FEATURE_SRA=="NONE"); + `OR1K_ALU_OPC_SECONDARY_SHRT_ROR: + decode_except_illegal_o = (FEATURE_ROR=="NONE"); + default: + decode_except_illegal_o = 1'b1; + endcase // case (decode_insn_i[`OR1K_ALU_OPC_SECONDARY_SELECT]) + default: + decode_except_illegal_o = 1'b1; + endcase // case (decode_insn_i[`OR1K_ALU_OPC_SELECT]) + + `OR1K_OPCODE_SYSTRAPSYNC: begin + if ((decode_insn_i[`OR1K_SYSTRAPSYNC_OPC_SELECT] == + `OR1K_SYSTRAPSYNC_OPC_SYSCALL && + FEATURE_SYSCALL=="ENABLED") || + (decode_insn_i[`OR1K_SYSTRAPSYNC_OPC_SELECT] == + `OR1K_SYSTRAPSYNC_OPC_TRAP && + FEATURE_TRAP=="ENABLED") || + (decode_insn_i[`OR1K_SYSTRAPSYNC_OPC_SELECT] == + `OR1K_SYSTRAPSYNC_OPC_MSYNC) || + (decode_insn_i[`OR1K_SYSTRAPSYNC_OPC_SELECT] == + `OR1K_SYSTRAPSYNC_OPC_PSYNC && + FEATURE_PSYNC!="NONE") || + (decode_insn_i[`OR1K_SYSTRAPSYNC_OPC_SELECT] == + `OR1K_SYSTRAPSYNC_OPC_CSYNC && + FEATURE_CSYNC!="NONE")) + decode_except_illegal_o = 1'b0; + else + decode_except_illegal_o = 1'b1; + end // case: endcase... + default: + decode_except_illegal_o = 1'b1; + + endcase // case (decode_insn_i[`OR1K_OPCODE_SELECT]) + + // Adder control logic + // Subtract when comparing to check if equal + assign decode_adder_do_sub_o = (opc_insn == `OR1K_OPCODE_ALU & + opc_alu == `OR1K_ALU_OPC_SUB) | + decode_op_setflag_o; + + // Generate carry-in select + assign decode_adder_do_carry_o = (FEATURE_ADDC!="NONE") && + ((opc_insn == `OR1K_OPCODE_ALU & + opc_alu == `OR1K_ALU_OPC_ADDC) || + (opc_insn == `OR1K_OPCODE_ADDIC)); + +endmodule // mor1kx_decode diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_decode_execute_cappuccino.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_decode_execute_cappuccino.v new file mode 100644 index 0000000..4ccb600 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_decode_execute_cappuccino.v @@ -0,0 +1,595 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Cappuccino decode to execute module. + - Decode to execute stage signal passing. + - Branches are resolved (in decode stage). + - Hazards that can not be resolved by bypassing are detected and + bubbles are inserted on such conditions. + + Generate valid signal when stage is done. + + Copyright (C) 2012 Julius Baxter + Copyright (C) 2013 Stefan Kristiansson + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_decode_execute_cappuccino + #( + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + + parameter OPTION_RF_ADDR_WIDTH = 5, + + parameter FEATURE_SYSCALL = "ENABLED", + parameter FEATURE_TRAP = "ENABLED", + parameter FEATURE_DELAY_SLOT = "ENABLED", + + parameter FEATURE_MULTIPLIER = "THREESTAGE", + + parameter FEATURE_FPU = "NONE", // ENABLED|NONE + + parameter FEATURE_INBUILT_CHECKERS = "ENABLED" + ) + ( + input clk, + input rst, + + // pipeline control signal in + input padv_i, + input [OPTION_OPERAND_WIDTH-1:0] pc_decode_i, + + // input from register file + input [OPTION_OPERAND_WIDTH-1:0] decode_rfb_i, + input [OPTION_OPERAND_WIDTH-1:0] execute_rfb_i, + + // Branch prediction signals + input predicted_flag_i, + output reg execute_predicted_flag_o, + // The target pc that should be used in case of branch misprediction + output reg [OPTION_OPERAND_WIDTH-1:0] execute_mispredict_target_o, + + input pipeline_flush_i, + + // ALU related inputs from decode + input [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_i, + input [`OR1K_ALU_OPC_WIDTH-1:0] decode_opc_alu_secondary_i, + + input [`OR1K_IMM_WIDTH-1:0] decode_imm16_i, + input [OPTION_OPERAND_WIDTH-1:0] decode_immediate_i, + input decode_immediate_sel_i, + + // ALU related outputs to execute + output reg [`OR1K_ALU_OPC_WIDTH-1:0] execute_opc_alu_o, + output reg [`OR1K_ALU_OPC_WIDTH-1:0] execute_opc_alu_secondary_o, + + output reg [`OR1K_IMM_WIDTH-1:0] execute_imm16_o, + output reg [OPTION_OPERAND_WIDTH-1:0] execute_immediate_o, + output reg execute_immediate_sel_o, + + // Adder control logic from decode + input decode_adder_do_sub_i, + input decode_adder_do_carry_i, + + // Adder control logic to execute + output reg execute_adder_do_sub_o, + output reg execute_adder_do_carry_o, + + // Upper 10 bits of immediate for jumps and branches + input [9:0] decode_immjbr_upper_i, + output reg [9:0] execute_immjbr_upper_o, + + // GPR numbers + output reg [OPTION_RF_ADDR_WIDTH-1:0] execute_rfd_adr_o, + input [OPTION_RF_ADDR_WIDTH-1:0] decode_rfd_adr_i, + input [OPTION_RF_ADDR_WIDTH-1:0] decode_rfa_adr_i, + input [OPTION_RF_ADDR_WIDTH-1:0] decode_rfb_adr_i, + input [OPTION_RF_ADDR_WIDTH-1:0] ctrl_rfd_adr_i, + input ctrl_op_lsu_load_i, + input ctrl_op_mfspr_i, + input ctrl_op_mul_i, + + // Control signal inputs from decode stage + input decode_rf_wb_i, + + input decode_op_alu_i, + + input decode_op_setflag_i, + + input decode_op_jbr_i, + input decode_op_jr_i, + input decode_op_jal_i, + input decode_op_bf_i, + input decode_op_bnf_i, + input decode_op_brcond_i, + input decode_op_branch_i, + + input decode_op_lsu_load_i, + input decode_op_lsu_store_i, + input decode_op_lsu_atomic_i, + input [1:0] decode_lsu_length_i, + input decode_lsu_zext_i, + + input decode_op_mfspr_i, + input decode_op_mtspr_i, + + input decode_op_rfe_i, + input decode_op_add_i, + input decode_op_mul_i, + input decode_op_mul_signed_i, + input decode_op_mul_unsigned_i, + input decode_op_div_i, + input decode_op_div_signed_i, + input decode_op_div_unsigned_i, + input decode_op_shift_i, + input decode_op_ffl1_i, + input decode_op_movhi_i, + input decode_op_ext_i, + input decode_op_msync_i, + input [`OR1K_FPUOP_WIDTH-1:0] decode_op_fpu_i, + + input [`OR1K_OPCODE_WIDTH-1:0] decode_opc_insn_i, + + // Control signal outputs to execute stage + output reg execute_rf_wb_o, + + output reg execute_op_alu_o, + + output reg execute_op_setflag_o, + + output reg execute_op_jbr_o, + output reg execute_op_jr_o, + output reg execute_op_jal_o, + output reg execute_op_brcond_o, + output reg execute_op_branch_o, + + output reg execute_op_lsu_load_o, + output reg execute_op_lsu_store_o, + output reg execute_op_lsu_atomic_o, + output reg [1:0] execute_lsu_length_o, + output reg execute_lsu_zext_o, + + output reg execute_op_mfspr_o, + output reg execute_op_mtspr_o, + + output reg execute_op_rfe_o, + output reg execute_op_add_o, + output reg execute_op_mul_o, + output reg execute_op_mul_signed_o, + output reg execute_op_mul_unsigned_o, + output reg execute_op_div_o, + output reg execute_op_div_signed_o, + output reg execute_op_div_unsigned_o, + output reg execute_op_shift_o, + output reg execute_op_ffl1_o, + output reg execute_op_movhi_o, + output reg execute_op_ext_o, + output reg execute_op_bf_o, + output reg execute_op_bnf_o, + output reg execute_op_msync_o, + output [`OR1K_FPUOP_WIDTH-1:0] execute_op_fpu_o, + + output reg [OPTION_OPERAND_WIDTH-1:0] execute_jal_result_o, + + output reg [`OR1K_OPCODE_WIDTH-1:0] execute_opc_insn_o, + + // branch detection + output decode_branch_o, + output [OPTION_OPERAND_WIDTH-1:0] decode_branch_target_o, + + // exceptions in + input decode_except_ibus_err_i, + input decode_except_itlb_miss_i, + input decode_except_ipagefault_i, + input decode_except_illegal_i, + input decode_except_syscall_i, + input decode_except_trap_i, + + // exception output - + output reg execute_except_ibus_err_o, + output reg execute_except_itlb_miss_o, + output reg execute_except_ipagefault_o, + output reg execute_except_illegal_o, + output reg execute_except_ibus_align_o, + output reg execute_except_syscall_o, + output reg execute_except_trap_o, + + output reg [OPTION_OPERAND_WIDTH-1:0] pc_execute_o, + + // output is valid, signal + output reg decode_valid_o, + + output decode_bubble_o, + output reg execute_bubble_o + ); + + wire ctrl_to_decode_interlock; + wire branch_to_imm; + wire [OPTION_OPERAND_WIDTH-1:0] branch_to_imm_target; + wire branch_to_reg; + + wire decode_except_ibus_align; + + wire [OPTION_OPERAND_WIDTH-1:0] next_pc_after_branch_insn; + wire [OPTION_OPERAND_WIDTH-1:0] decode_mispredict_target; + + // Op control signals to execute stage + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + execute_op_bf_o <= 1'b0; + execute_op_bnf_o <= 1'b0; + execute_op_alu_o <= 1'b0; + execute_op_add_o <= 1'b0; + execute_op_mul_o <= 1'b0; + execute_op_mul_signed_o <= 1'b0; + execute_op_mul_unsigned_o <= 1'b0; + execute_op_div_o <= 1'b0; + execute_op_div_signed_o <= 1'b0; + execute_op_div_unsigned_o <= 1'b0; + execute_op_shift_o <= 1'b0; + execute_op_ffl1_o <= 1'b0; + execute_op_movhi_o <= 1'b0; + execute_op_ext_o <= 1'b0; + execute_op_msync_o <= 1'b0; + execute_op_mfspr_o <= 1'b0; + execute_op_mtspr_o <= 1'b0; + execute_op_lsu_load_o <= 1'b0; + execute_op_lsu_store_o <= 1'b0; + execute_op_lsu_atomic_o <= 1'b0; + execute_op_setflag_o <= 1'b0; + execute_op_jbr_o <= 1'b0; + execute_op_jr_o <= 1'b0; + execute_op_jal_o <= 1'b0; + execute_op_brcond_o <= 1'b0; + execute_op_branch_o <= 0; + end else if (pipeline_flush_i) begin + execute_op_bf_o <= 1'b0; + execute_op_bnf_o <= 1'b0; + execute_op_alu_o <= 1'b0; + execute_op_add_o <= 1'b0; + execute_op_mul_o <= 1'b0; + execute_op_mul_signed_o <= 1'b0; + execute_op_mul_unsigned_o <= 1'b0; + execute_op_div_o <= 1'b0; + execute_op_div_signed_o <= 1'b0; + execute_op_div_unsigned_o <= 1'b0; + execute_op_shift_o <= 1'b0; + execute_op_ffl1_o <= 1'b0; + execute_op_movhi_o <= 1'b0; + execute_op_ext_o <= 1'b0; + execute_op_msync_o <= 1'b0; + execute_op_lsu_load_o <= 1'b0; + execute_op_lsu_store_o <= 1'b0; + execute_op_lsu_atomic_o <= 1'b0; + execute_op_setflag_o <= 1'b0; + execute_op_jbr_o <= 1'b0; + execute_op_jr_o <= 1'b0; + execute_op_jal_o <= 1'b0; + execute_op_brcond_o <= 1'b0; + execute_op_branch_o <= 1'b0; + end else if (padv_i) begin + execute_op_bf_o <= decode_op_bf_i; + execute_op_bnf_o <= decode_op_bnf_i; + execute_op_alu_o <= decode_op_alu_i; + execute_op_add_o <= decode_op_add_i; + execute_op_mul_o <= decode_op_mul_i; + execute_op_mul_signed_o <= decode_op_mul_signed_i; + execute_op_mul_unsigned_o <= decode_op_mul_unsigned_i; + execute_op_div_o <= decode_op_div_i; + execute_op_div_signed_o <= decode_op_div_signed_i; + execute_op_div_unsigned_o <= decode_op_div_unsigned_i; + execute_op_shift_o <= decode_op_shift_i; + execute_op_ffl1_o <= decode_op_ffl1_i; + execute_op_movhi_o <= decode_op_movhi_i; + execute_op_ext_o <= decode_op_ext_i; + execute_op_msync_o <= decode_op_msync_i; + execute_op_mfspr_o <= decode_op_mfspr_i; + execute_op_mtspr_o <= decode_op_mtspr_i; + execute_op_lsu_load_o <= decode_op_lsu_load_i; + execute_op_lsu_store_o <= decode_op_lsu_store_i; + execute_op_lsu_atomic_o <= decode_op_lsu_atomic_i; + execute_op_setflag_o <= decode_op_setflag_i; + execute_op_jbr_o <= decode_op_jbr_i; + execute_op_jr_o <= decode_op_jr_i; + execute_op_jal_o <= decode_op_jal_i; + execute_op_brcond_o <= decode_op_brcond_i; + execute_op_branch_o <= decode_op_branch_i; + if (decode_bubble_o) begin + execute_op_bf_o <= 1'b0; + execute_op_bnf_o <= 1'b0; + execute_op_alu_o <= 1'b0; + execute_op_add_o <= 1'b0; + execute_op_mul_o <= 1'b0; + execute_op_mul_signed_o <= 1'b0; + execute_op_mul_unsigned_o <= 1'b0; + execute_op_div_o <= 1'b0; + execute_op_div_signed_o <= 1'b0; + execute_op_div_unsigned_o <= 1'b0; + execute_op_shift_o <= 1'b0; + execute_op_ffl1_o <= 1'b0; + execute_op_movhi_o <= 1'b0; + execute_op_ext_o <= 1'b0; + execute_op_msync_o <= 1'b0; + execute_op_mtspr_o <= 1'b0; + execute_op_mfspr_o <= 1'b0; + execute_op_lsu_load_o <= 1'b0; + execute_op_lsu_store_o <= 1'b0; + execute_op_lsu_atomic_o <= 1'b0; + execute_op_setflag_o <= 1'b0; + execute_op_jbr_o <= 1'b0; + execute_op_jr_o <= 1'b0; + execute_op_jal_o <= 1'b0; + execute_op_brcond_o <= 1'b0; + execute_op_branch_o <= 1'b0; + end + end + + // FPU related + generate + /* verilator lint_off WIDTH */ + if (FEATURE_FPU!="NONE") begin : fpu_decode_execute_ena + /* verilator lint_on WIDTH */ + reg [`OR1K_FPUOP_WIDTH-1:0] execute_op_fpu_r; + assign execute_op_fpu_o = execute_op_fpu_r; + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + execute_op_fpu_r <= {`OR1K_FPUOP_WIDTH{1'b0}}; + else if (pipeline_flush_i) + execute_op_fpu_r <= {`OR1K_FPUOP_WIDTH{1'b0}}; + else if (padv_i) + execute_op_fpu_r <= (decode_bubble_o ? + {`OR1K_FPUOP_WIDTH{1'b0}} : decode_op_fpu_i); + end // @clk + end + else begin : fpu_decode_execute_none + assign execute_op_fpu_o = {`OR1K_FPUOP_WIDTH{1'b0}}; + end + endgenerate // FPU related + + // rfe is a special case, instead of pushing the pipeline full + // of nops on a decode_bubble_o, we push it full of rfes. + // The reason for this is that we need the rfe to reach control + // stage so it will cause the branch. + // It will clear itself by the pipeline_flush_i that the rfe + // will generate. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_op_rfe_o <= 0; + else if (pipeline_flush_i) + execute_op_rfe_o <= 0; + else if (padv_i) + execute_op_rfe_o <= decode_op_rfe_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + execute_rf_wb_o <= 0; + end else if (pipeline_flush_i) begin + execute_rf_wb_o <= 0; + end else if (padv_i) begin + execute_rf_wb_o <= decode_rf_wb_i; + if (decode_bubble_o) + execute_rf_wb_o <= 0; + end + + always @(posedge clk) + if (padv_i) + execute_rfd_adr_o <= decode_rfd_adr_i; + + always @(posedge clk) + if (padv_i) begin + execute_lsu_length_o <= decode_lsu_length_i; + execute_lsu_zext_o <= decode_lsu_zext_i; + end + + always @(posedge clk) + if (padv_i) begin + execute_imm16_o <= decode_imm16_i; + execute_immediate_o <= decode_immediate_i; + execute_immediate_sel_o <= decode_immediate_sel_i; + end + + always @(posedge clk) + if (padv_i ) + execute_immjbr_upper_o <= decode_immjbr_upper_i; + + always @(posedge clk) + if (padv_i) begin + execute_opc_alu_o <= decode_opc_alu_i; + execute_opc_alu_secondary_o <= decode_opc_alu_secondary_i; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + execute_opc_insn_o <= `OR1K_OPCODE_NOP; + end else if (pipeline_flush_i) begin + execute_opc_insn_o <= `OR1K_OPCODE_NOP; + end else if (padv_i) begin + execute_opc_insn_o <= decode_opc_insn_i; + if (decode_bubble_o) + execute_opc_insn_o <= `OR1K_OPCODE_NOP; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + execute_adder_do_sub_o <= 1'b0; + execute_adder_do_carry_o <= 1'b0; + end else if (pipeline_flush_i) begin + execute_adder_do_sub_o <= 1'b0; + execute_adder_do_carry_o <= 1'b0; + end else if (padv_i) begin + execute_adder_do_sub_o <= decode_adder_do_sub_i; + execute_adder_do_carry_o <= decode_adder_do_carry_i; + if (decode_bubble_o) begin + execute_adder_do_sub_o <= 1'b0; + execute_adder_do_carry_o <= 1'b0; + end + end + + // Decode for system call exception + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_except_syscall_o <= 0; + else if (padv_i && FEATURE_SYSCALL=="ENABLED") + execute_except_syscall_o <= decode_except_syscall_i; + + // Decode for system call exception + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_except_trap_o <= 0; + else if (padv_i && FEATURE_TRAP=="ENABLED") + execute_except_trap_o <= decode_except_trap_i; + + // Decode Illegal instruction + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_except_illegal_o <= 0; + else if (padv_i) + execute_except_illegal_o <= decode_except_illegal_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_except_ibus_err_o <= 1'b0; + else if (padv_i) + execute_except_ibus_err_o <= decode_except_ibus_err_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_except_itlb_miss_o <= 1'b0; + else if (padv_i) + execute_except_itlb_miss_o <= decode_except_itlb_miss_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_except_ipagefault_o <= 1'b0; + else if (padv_i) + execute_except_ipagefault_o <= decode_except_ipagefault_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_except_ibus_align_o <= 1'b0; + else if (padv_i) + execute_except_ibus_align_o <= decode_except_ibus_align; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_valid_o <= 0; + else + decode_valid_o <= padv_i; + + always @(posedge clk `OR_ASYNC_RST) + if (padv_i) + pc_execute_o <= pc_decode_i; + + // Branch detection + assign ctrl_to_decode_interlock = (ctrl_op_lsu_load_i | ctrl_op_mfspr_i | + ctrl_op_mul_i & + FEATURE_MULTIPLIER=="PIPELINED") & + ((decode_rfa_adr_i == ctrl_rfd_adr_i) || + (decode_rfb_adr_i == ctrl_rfd_adr_i)); + + assign branch_to_imm = (decode_op_jbr_i & + // l.j/l.jal + (!(|decode_opc_insn_i[2:1]) | + // l.bf/bnf and flag is right + (decode_opc_insn_i[2] == predicted_flag_i))); + + assign branch_to_imm_target = pc_decode_i + {{4{decode_immjbr_upper_i[9]}}, + decode_immjbr_upper_i, + decode_imm16_i,2'b00}; + assign branch_to_reg = decode_op_jr_i & + !(ctrl_to_decode_interlock | + execute_rf_wb_o & + (decode_rfb_adr_i == execute_rfd_adr_o)); + + assign decode_branch_o = (branch_to_imm | branch_to_reg) & + !pipeline_flush_i; + + assign decode_branch_target_o = branch_to_imm ? + branch_to_imm_target : + // If a bubble have been pushed out to get + // the instruction that will write the + // branch target to control stage, then we + // need to use the register result from + // execute stage instead of decode stage. + execute_bubble_o | execute_op_jr_o ? + execute_rfb_i : decode_rfb_i; + + assign decode_except_ibus_align = decode_branch_o & + (|decode_branch_target_o[1:0]); + + assign next_pc_after_branch_insn = FEATURE_DELAY_SLOT == "ENABLED" ? + pc_decode_i + 8 : pc_decode_i + 4; + + assign decode_mispredict_target = decode_op_bf_i & !predicted_flag_i | + decode_op_bnf_i & predicted_flag_i ? + branch_to_imm_target : + next_pc_after_branch_insn; + + // Forward branch prediction signals to execute stage + always @(posedge clk) + if (padv_i & decode_op_brcond_i) + execute_mispredict_target_o <= decode_mispredict_target; + + always @(posedge clk) + if (padv_i & decode_op_brcond_i) + execute_predicted_flag_o <= predicted_flag_i; + + // Calculate the link register result + // TODO: investigate if the ALU adder can be used for this without + // introducing critical paths + always @(posedge clk) + if (padv_i) + execute_jal_result_o <= next_pc_after_branch_insn; + + // Detect the situation where there is an instruction in execute stage + // that will produce it's result in control stage (i.e. load and mfspr), + // and an instruction currently in decode stage needing it's result as + // input in execute stage. + // Also detect the situation where there is a jump to register in decode + // stage and an instruction in execute stage that will write to that + // register. + // + // A bubble is also inserted when an rfe instruction is in decode stage, + // the main purpose of this is to stall fetch while the rfe is propagating + // up to ctrl stage. + + assign decode_bubble_o = ( + // load/mfspr/mul + (execute_op_lsu_load_o | execute_op_mfspr_o | + execute_op_mul_o & + FEATURE_MULTIPLIER=="PIPELINED") & + (decode_rfa_adr_i == execute_rfd_adr_o || + decode_rfb_adr_i == execute_rfd_adr_o) | + // mul + FEATURE_MULTIPLIER=="PIPELINED" & + (decode_op_mul_i & + (ctrl_to_decode_interlock | + execute_rf_wb_o & + (decode_rfa_adr_i == execute_rfd_adr_o || + decode_rfb_adr_i == execute_rfd_adr_o))) | + // jr + decode_op_jr_i & + (ctrl_to_decode_interlock | + execute_rf_wb_o & + (decode_rfb_adr_i == execute_rfd_adr_o)) | + // atomic store + execute_op_lsu_store_o & execute_op_lsu_atomic_o | + // rfe + decode_op_rfe_i + ) & padv_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_bubble_o <= 0; + else if (pipeline_flush_i) + execute_bubble_o <= 0; + else if (padv_i) + execute_bubble_o <= decode_bubble_o; + +endmodule // mor1kx_decode_execute_cappuccino diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_dmmu.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_dmmu.v new file mode 100644 index 0000000..76de38c --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_dmmu.v @@ -0,0 +1,457 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Data MMU implementation + + Copyright (C) 2013 Stefan Kristiansson + + ******************************************************************************/ + +`include "mor1kx-defines.v" + +module mor1kx_dmmu + #( + parameter FEATURE_DMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_DMMU_SET_WIDTH = 6, + parameter OPTION_DMMU_WAYS = 1 + ) + ( + input clk, + input rst, + + input enable_i, + input [OPTION_OPERAND_WIDTH-1:0] virt_addr_i, + input [OPTION_OPERAND_WIDTH-1:0] virt_addr_match_i, + output reg [OPTION_OPERAND_WIDTH-1:0] phys_addr_o, + output reg cache_inhibit_o, + + input op_store_i, + input op_load_i, + input supervisor_mode_i, + + output reg tlb_miss_o, + output pagefault_o, + + output reg tlb_reload_req_o, + output tlb_reload_busy_o, + input tlb_reload_ack_i, + output reg [OPTION_OPERAND_WIDTH-1:0] tlb_reload_addr_o, + input [OPTION_OPERAND_WIDTH-1:0] tlb_reload_data_i, + output tlb_reload_pagefault_o, + input tlb_reload_pagefault_clear_i, + + // SPR interface + input [15:0] spr_bus_addr_i, + input spr_bus_we_i, + input spr_bus_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_i, + + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o, + output spr_bus_ack_o + ); + + localparam WAYS_WIDTH = (OPTION_DMMU_WAYS < 2) ? 1 : 2; + + wire [OPTION_OPERAND_WIDTH-1:0] dtlb_match_dout[OPTION_DMMU_WAYS-1:0]; + wire [OPTION_DMMU_SET_WIDTH-1:0] dtlb_match_addr; + reg [OPTION_DMMU_WAYS-1:0] dtlb_match_we; + wire [OPTION_OPERAND_WIDTH-1:0] dtlb_match_din; + + wire [OPTION_OPERAND_WIDTH-1:0] dtlb_match_huge_dout[OPTION_DMMU_WAYS-1:0]; + wire [OPTION_DMMU_SET_WIDTH-1:0] dtlb_match_huge_addr; + wire dtlb_match_huge_we; + + wire [OPTION_OPERAND_WIDTH-1:0] dtlb_trans_dout[OPTION_DMMU_WAYS-1:0]; + wire [OPTION_DMMU_SET_WIDTH-1:0] dtlb_trans_addr; + reg [OPTION_DMMU_WAYS-1:0] dtlb_trans_we; + wire [OPTION_OPERAND_WIDTH-1:0] dtlb_trans_din; + + wire [OPTION_OPERAND_WIDTH-1:0] dtlb_trans_huge_dout[OPTION_DMMU_WAYS-1:0]; + wire [OPTION_DMMU_SET_WIDTH-1:0] dtlb_trans_huge_addr; + wire dtlb_trans_huge_we; + + reg dtlb_match_reload_we; + reg [OPTION_OPERAND_WIDTH-1:0] dtlb_match_reload_din; + + reg dtlb_trans_reload_we; + reg [OPTION_OPERAND_WIDTH-1:0] dtlb_trans_reload_din; + + wire dtlb_match_spr_cs; + reg dtlb_match_spr_cs_r; + wire dtlb_trans_spr_cs; + reg dtlb_trans_spr_cs_r; + + wire dmmucr_spr_cs; + reg dmmucr_spr_cs_r; + reg [OPTION_OPERAND_WIDTH-1:0] dmmucr; + + wire [1:0] spr_way_idx_full; + wire [WAYS_WIDTH-1:0] spr_way_idx; + reg [WAYS_WIDTH-1:0] spr_way_idx_r; + + wire [OPTION_DMMU_WAYS-1:0] way_huge; + + wire [OPTION_DMMU_WAYS-1:0] way_hit; + wire [OPTION_DMMU_WAYS-1:0] way_huge_hit; + + reg tlb_reload_pagefault; + reg tlb_reload_huge; + + // ure: user read enable + // uwe: user write enable + // sre: supervisor read enable + // swe: supervisor write enable + reg ure; + reg uwe; + reg sre; + reg swe; + + reg spr_bus_ack; + + genvar i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_bus_ack <= 0; + else if (spr_bus_stb_i & spr_bus_addr_i[15:11] == 5'd1) + spr_bus_ack <= 1; + else + spr_bus_ack <= 0; + + assign spr_bus_ack_o = spr_bus_ack & spr_bus_stb_i & + spr_bus_addr_i[15:11] == 5'd1; + +generate +for (i = 0; i < OPTION_DMMU_WAYS; i=i+1) begin : ways + assign way_huge[i] = &dtlb_match_huge_dout[i][1:0]; // huge & valid + + assign way_hit[i] = (dtlb_match_dout[i][31:13] == virt_addr_match_i[31:13]) & + dtlb_match_dout[i][0]; // valid bit + + assign way_huge_hit[i] = (dtlb_match_huge_dout[i][31:24] == + virt_addr_match_i[31:24]) & + dtlb_match_huge_dout[i][0]; +end +endgenerate + + integer j; + always @(*) begin + tlb_miss_o = !tlb_reload_pagefault; + phys_addr_o = {OPTION_OPERAND_WIDTH{1'b0}}; + phys_addr_o[23:0] = virt_addr_match_i[23:0]; + ure = 0; + uwe = 0; + sre = 0; + swe = 0; + cache_inhibit_o = 0; + + for (j = 0; j < OPTION_DMMU_WAYS; j=j+1) begin + if (way_huge[j] & way_huge_hit[j] | !way_huge[j] & way_hit[j]) + tlb_miss_o = 0; + + if (way_huge[j] & way_huge_hit[j]) begin + phys_addr_o = {dtlb_trans_huge_dout[j][31:24], virt_addr_match_i[23:0]}; + ure = dtlb_trans_huge_dout[j][6]; + uwe = dtlb_trans_huge_dout[j][7]; + sre = dtlb_trans_huge_dout[j][8]; + swe = dtlb_trans_huge_dout[j][9]; + cache_inhibit_o = dtlb_trans_huge_dout[j][1]; + end else if (!way_huge[j] & way_hit[j])begin + phys_addr_o = {dtlb_trans_dout[j][31:13], virt_addr_match_i[12:0]}; + ure = dtlb_trans_dout[j][6]; + uwe = dtlb_trans_dout[j][7]; + sre = dtlb_trans_dout[j][8]; + swe = dtlb_trans_dout[j][9]; + cache_inhibit_o = dtlb_trans_dout[j][1]; + end + + dtlb_match_we[j] = 0; + if (dtlb_match_reload_we) + dtlb_match_we[j] = 1; + if (j[WAYS_WIDTH-1:0] == spr_way_idx) + dtlb_match_we[j] = dtlb_match_spr_cs & spr_bus_we_i; + + dtlb_trans_we[j] = 0; + if (dtlb_trans_reload_we) + dtlb_trans_we[j] = 1; + if (j[WAYS_WIDTH-1:0] == spr_way_idx) + dtlb_trans_we[j] = dtlb_trans_spr_cs & spr_bus_we_i; + end + end + + assign pagefault_o = (supervisor_mode_i ? + !swe & op_store_i || !sre & op_load_i : + !uwe & op_store_i || !ure & op_load_i) & + !tlb_reload_busy_o; + + assign spr_way_idx_full = {spr_bus_addr_i[10], spr_bus_addr_i[8]}; + assign spr_way_idx = spr_way_idx_full[WAYS_WIDTH-1:0]; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + dtlb_match_spr_cs_r <= 0; + dtlb_trans_spr_cs_r <= 0; + dmmucr_spr_cs_r <= 0; + spr_way_idx_r <= 0; + end else begin + dtlb_match_spr_cs_r <= dtlb_match_spr_cs; + dtlb_trans_spr_cs_r <= dtlb_trans_spr_cs; + dmmucr_spr_cs_r <= dmmucr_spr_cs; + spr_way_idx_r <= spr_way_idx; + end + +generate /* verilator lint_off WIDTH */ +if (FEATURE_DMMU_HW_TLB_RELOAD == "ENABLED") begin +/* verilator lint_on WIDTH */ + assign dmmucr_spr_cs = spr_bus_stb_i & + spr_bus_addr_i == `OR1K_SPR_DMMUCR_ADDR; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + dmmucr <= 0; + else if (dmmucr_spr_cs & spr_bus_we_i) + dmmucr <= spr_bus_dat_i; + +end else begin + assign dmmucr_spr_cs = 0; + always @(posedge clk) + dmmucr <= 0; +end +endgenerate + + assign dtlb_match_spr_cs = spr_bus_stb_i & (spr_bus_addr_i[15:11] == 5'd1) & + |spr_bus_addr_i[10:9] & !spr_bus_addr_i[7]; + assign dtlb_trans_spr_cs = spr_bus_stb_i & (spr_bus_addr_i[15:11] == 5'd1) & + |spr_bus_addr_i[10:9] & spr_bus_addr_i[7]; + + assign dtlb_match_addr = dtlb_match_spr_cs ? + spr_bus_addr_i[OPTION_DMMU_SET_WIDTH-1:0] : + virt_addr_i[13+(OPTION_DMMU_SET_WIDTH-1):13]; + assign dtlb_trans_addr = dtlb_trans_spr_cs ? + spr_bus_addr_i[OPTION_DMMU_SET_WIDTH-1:0] : + virt_addr_i[13+(OPTION_DMMU_SET_WIDTH-1):13]; + + assign dtlb_match_din = dtlb_match_reload_we ? dtlb_match_reload_din : + spr_bus_dat_i; + assign dtlb_trans_din = dtlb_trans_reload_we ? dtlb_trans_reload_din : + spr_bus_dat_i; + + assign dtlb_match_huge_addr = virt_addr_i[24+(OPTION_DMMU_SET_WIDTH-1):24]; + assign dtlb_trans_huge_addr = virt_addr_i[24+(OPTION_DMMU_SET_WIDTH-1):24]; + + assign dtlb_match_huge_we = dtlb_match_reload_we & tlb_reload_huge; + assign dtlb_trans_huge_we = dtlb_trans_reload_we & tlb_reload_huge; + + assign spr_bus_dat_o = dtlb_match_spr_cs_r ? dtlb_match_dout[spr_way_idx_r] : + dtlb_trans_spr_cs_r ? dtlb_trans_dout[spr_way_idx_r] : + dmmucr_spr_cs_r ? dmmucr : 0; + + localparam TLB_IDLE = 2'd0; + localparam TLB_GET_PTE_POINTER = 2'd1; + localparam TLB_GET_PTE = 2'd2; + localparam TLB_READ = 2'd3; + +generate /* verilator lint_off WIDTH */ +if (FEATURE_DMMU_HW_TLB_RELOAD == "ENABLED") begin + /* verilator lint_on WIDTH */ + + // Hardware TLB reload + // Compliant with the suggestion outlined in this thread: + // http://lists.openrisc.net/pipermail/openrisc/2013-July/001806.html + // + // PTE layout: + // | 31 ... 13 | 12 | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + // | PPN | Reserved |PRESENT| L | X | W | U | D | A |WOM|WBC|CI |CC | + // + // Where X/W/U maps into SWE/SRE/UWE/URE like this: + // X | W | U SWE | SRE | UWE | URE + // ---------- --------------------- + // x | 0 | 0 = 0 | 1 | 0 | 0 + // x | 0 | 1 = 0 | 1 | 0 | 1 + // x | 1 | 0 = 1 | 1 | 0 | 0 + // x | 1 | 1 = 1 | 1 | 1 | 1 + + + + reg [1:0] tlb_reload_state = TLB_IDLE; + wire do_reload; + + assign do_reload = enable_i & tlb_miss_o & (dmmucr[31:10] != 0) & + (op_load_i | op_store_i); + + assign tlb_reload_busy_o = enable_i & (tlb_reload_state != TLB_IDLE) | do_reload; + + assign tlb_reload_pagefault_o = tlb_reload_pagefault & + !tlb_reload_pagefault_clear_i; + + always @(posedge clk) begin + if (tlb_reload_pagefault_clear_i) + tlb_reload_pagefault <= 0; + dtlb_trans_reload_we <= 0; + dtlb_trans_reload_din <= 0; + dtlb_match_reload_we <= 0; + dtlb_match_reload_din <= 0; + + case (tlb_reload_state) + TLB_IDLE: begin + tlb_reload_huge <= 0; + tlb_reload_req_o <= 0; + if (do_reload) begin + tlb_reload_req_o <= 1; + tlb_reload_addr_o <= {dmmucr[31:10], + virt_addr_match_i[31:24], 2'b00}; + tlb_reload_state <= TLB_GET_PTE_POINTER; + end + end + + // + // Here we get the pointer to the PTE table, next is to fetch + // the actual pte from the offset in the table. + // The offset is calculated by: + // ((virt_addr_match >> PAGE_BITS) & (PTE_CNT-1)) << 2 + // Where PAGE_BITS is 13 (8 kb page) and PTE_CNT is 2048 + // (number of PTEs in the PTE table) + // + TLB_GET_PTE_POINTER: begin + tlb_reload_huge <= 0; + if (tlb_reload_ack_i) begin + if (tlb_reload_data_i[31:13] == 0) begin + tlb_reload_pagefault <= 1; + tlb_reload_req_o <= 0; + tlb_reload_state <= TLB_IDLE; + end else if (tlb_reload_data_i[9]) begin + tlb_reload_huge <= 1; + tlb_reload_req_o <= 0; + tlb_reload_state <= TLB_GET_PTE; + end else begin + tlb_reload_addr_o <= {tlb_reload_data_i[31:13], + virt_addr_match_i[23:13], 2'b00}; + tlb_reload_state <= TLB_GET_PTE; + end + end + end + + // + // Here we get the actual PTE, left to do is to translate the + // PTE data into our translate and match registers. + // + TLB_GET_PTE: begin + if (tlb_reload_ack_i) begin + tlb_reload_req_o <= 0; + // Check PRESENT bit + if (!tlb_reload_data_i[10]) begin + tlb_reload_pagefault <= 1; + tlb_reload_state <= TLB_IDLE; + end else begin + // Translate register generation. + // PPN + dtlb_trans_reload_din[31:13] <= tlb_reload_data_i[31:13]; + // SWE = W + dtlb_trans_reload_din[9] <= tlb_reload_data_i[7]; + // SRE = 1 + dtlb_trans_reload_din[8] <= 1'b1; + // UWE = W & U + dtlb_trans_reload_din[7] <= tlb_reload_data_i[7] & + tlb_reload_data_i[6]; + // URE = U + dtlb_trans_reload_din[6] <= tlb_reload_data_i[6]; + // Dirty, Accessed, Weakly-Ordered-Memory, Writeback cache, + // Cache inhibit, Cache coherent + dtlb_trans_reload_din[5:0] <= tlb_reload_data_i[5:0]; + dtlb_trans_reload_we <= 1; + + // Match register generation. + // VPN + dtlb_match_reload_din[31:13] <= virt_addr_match_i[31:13]; + // Valid + dtlb_match_reload_din[0] <= 1; + dtlb_match_reload_we <= 1; + + tlb_reload_state <= TLB_READ; + end + end + end + + // Let the just written values propagate out on the read ports + TLB_READ: begin + tlb_reload_state <= TLB_IDLE; + end + + default: + tlb_reload_state <= TLB_IDLE; + endcase + + // Abort if enable deasserts in the middle of a reload + if (!enable_i | (dmmucr[31:10] == 0)) + tlb_reload_state <= TLB_IDLE; + + end +end else begin // if (FEATURE_DMMU_HW_TLB_RELOAD == "ENABLED") + assign tlb_reload_pagefault_o = 0; + assign tlb_reload_busy_o = 0; + always @(posedge clk) begin + tlb_reload_req_o <= 0; + tlb_reload_addr_o <= 0; + tlb_reload_pagefault <= 0; + dtlb_trans_reload_we <= 0; + dtlb_trans_reload_din <= 0; + dtlb_match_reload_we <= 0; + dtlb_match_reload_din <= 0; + end +end +endgenerate + +generate +for (i = 0; i < OPTION_DMMU_WAYS; i=i+1) begin : dtlb + // DTLB match registers + mor1kx_true_dpram_sclk + #( + .ADDR_WIDTH(OPTION_DMMU_SET_WIDTH), + .DATA_WIDTH(OPTION_OPERAND_WIDTH) + ) + dtlb_match_regs + ( + // Outputs + .dout_a (dtlb_match_dout[i]), + .dout_b (dtlb_match_huge_dout[i]), + // Inputs + .clk_a (clk), + .addr_a (dtlb_match_addr), + .we_a (dtlb_match_we[i]), + .din_a (dtlb_match_din), + .clk_b (clk), + .addr_b (dtlb_match_huge_addr), + .we_b (dtlb_match_huge_we), + .din_b (dtlb_match_reload_din) + ); + + + // DTLB translate registers + mor1kx_true_dpram_sclk + #( + .ADDR_WIDTH(OPTION_DMMU_SET_WIDTH), + .DATA_WIDTH(OPTION_OPERAND_WIDTH) + ) + dtlb_translate_regs + ( + // Outputs + .dout_a (dtlb_trans_dout[i]), + .dout_b (dtlb_trans_huge_dout[i]), + // Inputs + .clk_a (clk), + .addr_a (dtlb_trans_addr), + .we_a (dtlb_trans_we[i]), + .din_a (dtlb_trans_din), + .clk_b (clk), + .addr_b (dtlb_trans_huge_addr), + .we_b (dtlb_trans_huge_we), + .din_b (dtlb_trans_reload_din) + ); +end +endgenerate + +endmodule // mor1kx_dmmu diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_execute_alu.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_execute_alu.v new file mode 100644 index 0000000..8bd771f --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_execute_alu.v @@ -0,0 +1,860 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx execute stage ALU + + Inputs are opcodes, the immediate field, operands from RF, instruction + opcode + + Copyright (C) 2012 Julius Baxter + Copyright (C) 2012-2014 Stefan Kristiansson + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_execute_alu + #( + parameter OPTION_OPERAND_WIDTH = 32, + + parameter FEATURE_OVERFLOW = "NONE", + parameter FEATURE_CARRY_FLAG = "ENABLED", + + parameter FEATURE_MULTIPLIER = "THREESTAGE", + parameter FEATURE_DIVIDER = "NONE", + + parameter FEATURE_ADDC = "NONE", + parameter FEATURE_SRA = "ENABLED", + parameter FEATURE_ROR = "NONE", + parameter FEATURE_EXT = "NONE", + parameter FEATURE_CMOV = "NONE", + parameter FEATURE_FFL1 = "NONE", + + parameter FEATURE_CUST1 = "NONE", + parameter FEATURE_CUST2 = "NONE", + parameter FEATURE_CUST3 = "NONE", + parameter FEATURE_CUST4 = "NONE", + parameter FEATURE_CUST5 = "NONE", + parameter FEATURE_CUST6 = "NONE", + parameter FEATURE_CUST7 = "NONE", + parameter FEATURE_CUST8 = "NONE", + + parameter FEATURE_FPU = "NONE", // ENABLED|NONE + parameter OPTION_FTOI_ROUNDING = "CPP", // "CPP" / "IEEE" + parameter OPTION_SHIFTER = "BARREL", + + // Pipeline specific internal parameters + parameter CALCULATE_BRANCH_DEST = "TRUE" + ) + ( + input clk, + input rst, + + // pipeline control signal in + input padv_decode_i, + input padv_execute_i, + input padv_ctrl_i, + + input pipeline_flush_i ,// flush pipelined fpu + + // inputs to ALU + input [`OR1K_ALU_OPC_WIDTH-1:0] opc_alu_i, + input [`OR1K_ALU_OPC_WIDTH-1:0] opc_alu_secondary_i, + + input [`OR1K_IMM_WIDTH-1:0] imm16_i, + input [OPTION_OPERAND_WIDTH-1:0] immediate_i, + input immediate_sel_i, + + input [OPTION_OPERAND_WIDTH-1:0] decode_immediate_i, + input decode_immediate_sel_i, + + input decode_valid_i, + + input decode_op_mul_i, + + input op_alu_i, + input op_add_i, + input op_mul_i, + input op_mul_signed_i, + input op_mul_unsigned_i, + input op_div_i, + input op_div_signed_i, + input op_div_unsigned_i, + input op_shift_i, + input op_ffl1_i, + input op_setflag_i, + input op_mtspr_i, + input op_mfspr_i, + input op_movhi_i, + input op_ext_i, + input [`OR1K_FPUOP_WIDTH-1:0] op_fpu_i, + input [`OR1K_FPCSR_RM_SIZE-1:0] fpu_round_mode_i, + input op_jbr_i, + input op_jr_i, + input [9:0] immjbr_upper_i, + input [OPTION_OPERAND_WIDTH-1:0] pc_execute_i, + + // Adder control logic + input adder_do_sub_i, + input adder_do_carry_i, + + input [OPTION_OPERAND_WIDTH-1:0] decode_rfa_i, + input [OPTION_OPERAND_WIDTH-1:0] decode_rfb_i, + + input [OPTION_OPERAND_WIDTH-1:0] rfa_i, + input [OPTION_OPERAND_WIDTH-1:0] rfb_i, + + // flag fed back from ctrl + input flag_i, + + output flag_set_o, + output flag_clear_o, + + input carry_i, + output carry_set_o, + output carry_clear_o, + + output overflow_set_o, + output overflow_clear_o, + + output [`OR1K_FPCSR_WIDTH-1:0] fpcsr_o, + output fpcsr_set_o, + + output [OPTION_OPERAND_WIDTH-1:0] alu_result_o, + output alu_valid_o, + output [OPTION_OPERAND_WIDTH-1:0] mul_result_o, + output [OPTION_OPERAND_WIDTH-1:0] adder_result_o + ); + + wire alu_stall; + + wire [OPTION_OPERAND_WIDTH-1:0] a; + wire [OPTION_OPERAND_WIDTH-1:0] b; + + // Adder & comparator wires + wire [OPTION_OPERAND_WIDTH-1:0] adder_result; + wire adder_carryout; + wire adder_signed_overflow; + wire adder_unsigned_overflow; + wire adder_result_sign; + + wire [OPTION_OPERAND_WIDTH-1:0] b_neg; + wire [OPTION_OPERAND_WIDTH-1:0] b_mux; + wire carry_in; + + wire a_eq_b; + wire a_lts_b; + wire a_ltu_b; + + // Shifter wires + wire [`OR1K_ALU_OPC_SECONDARY_WIDTH-1:0] opc_alu_shr; + wire [OPTION_OPERAND_WIDTH-1:0] shift_result; + wire shift_valid; + + // Comparison wires + reg flag_set; // comb. + + // Logic wires + wire op_logic; + reg [OPTION_OPERAND_WIDTH-1:0] logic_result; + + // Multiplier wires + wire [OPTION_OPERAND_WIDTH-1:0] mul_result; + wire mul_valid; + wire mul_signed_overflow; + wire mul_unsigned_overflow; + + wire [OPTION_OPERAND_WIDTH-1:0] div_result; + wire div_valid; + wire div_by_zero; + + + wire [OPTION_OPERAND_WIDTH-1:0] ffl1_result; + + wire op_cmov; + wire [OPTION_OPERAND_WIDTH-1:0] cmov_result; + + wire [OPTION_OPERAND_WIDTH-1:0] decode_a; + wire [OPTION_OPERAND_WIDTH-1:0] decode_b; + + // Sign extension wires + reg [OPTION_OPERAND_WIDTH-1:0] ext_result; // comb + wire [`OR1K_ALU_OPC_SECONDARY_WIDTH-1:0] opc_alu_ext; +generate +if (CALCULATE_BRANCH_DEST=="TRUE") begin : calculate_branch_dest + assign a = (op_jbr_i | op_jr_i) ? pc_execute_i : rfa_i; + assign b = immediate_sel_i ? immediate_i : + op_jbr_i ? {{4{immjbr_upper_i[9]}},immjbr_upper_i,imm16_i,2'b00} : + rfb_i; +end else begin + assign a = rfa_i; + assign b = immediate_sel_i ? immediate_i : rfb_i; + + assign decode_a = decode_rfa_i; + assign decode_b = decode_immediate_sel_i ? decode_immediate_i : decode_rfb_i; + +end +endgenerate + + assign opc_alu_shr = opc_alu_secondary_i[`OR1K_ALU_OPC_SECONDARY_WIDTH-1:0]; + assign opc_alu_ext = opc_alu_secondary_i[`OR1K_ALU_OPC_SECONDARY_WIDTH-1:0]; + + // Adder/subtractor inputs + assign b_neg = ~b; + assign carry_in = adder_do_sub_i | adder_do_carry_i & carry_i; + assign b_mux = adder_do_sub_i ? b_neg : b; + // Adder + assign {adder_carryout, adder_result} = a + b_mux + + {{OPTION_OPERAND_WIDTH-1{1'b0}}, + carry_in}; + + assign adder_result_sign = adder_result[OPTION_OPERAND_WIDTH-1]; + + assign adder_signed_overflow = // Input signs are same and ... + (a[OPTION_OPERAND_WIDTH-1] == + b_mux[OPTION_OPERAND_WIDTH-1]) & + // result sign is different to input signs + (a[OPTION_OPERAND_WIDTH-1] ^ + adder_result[OPTION_OPERAND_WIDTH-1]); + + assign adder_unsigned_overflow = adder_carryout; + + assign adder_result_o = adder_result; + + generate + /* verilator lint_off WIDTH */ + if (FEATURE_MULTIPLIER=="THREESTAGE") begin : threestagemultiply + /* verilator lint_on WIDTH */ + // 32-bit multiplier with three registering stages to help with timing + reg [OPTION_OPERAND_WIDTH-1:0] mul_opa; + reg [OPTION_OPERAND_WIDTH-1:0] mul_opb; + reg [OPTION_OPERAND_WIDTH-1:0] mul_result1; + reg [OPTION_OPERAND_WIDTH-1:0] mul_result2; + reg [2:0] mul_valid_shr; + + always @(posedge clk) begin + if (op_mul_i) begin + mul_opa <= a; + mul_opb <= b; + end + mul_result1 <= mul_opa * mul_opb; + mul_result2 <= mul_result1; + end + + assign mul_result = mul_result2; + + always @(posedge clk) + if (decode_valid_i) + mul_valid_shr <= {2'b00, op_mul_i}; + else + mul_valid_shr <= mul_valid_shr[2] ? mul_valid_shr: + {mul_valid_shr[1:0], 1'b0}; + + assign mul_valid = mul_valid_shr[2] & !decode_valid_i; + + // Can't detect unsigned overflow in this implementation + assign mul_unsigned_overflow = 0; + + end // if (FEATURE_MULTIPLIER=="THREESTAGE") + /* verilator lint_off WIDTH */ + else if (FEATURE_MULTIPLIER=="PIPELINED") begin : pipelinedmultiply + /* verilator lint_on WIDTH */ + // 32-bit multiplier in sync with cpu pipeline + reg [OPTION_OPERAND_WIDTH-1:0] mul_opa; + reg [OPTION_OPERAND_WIDTH-1:0] mul_opb; + reg [OPTION_OPERAND_WIDTH-1:0] mul_result1; + reg [OPTION_OPERAND_WIDTH-1:0] mul_result2; + + always @(posedge clk) begin + if (decode_op_mul_i & padv_decode_i) begin + mul_opa <= decode_a; + mul_opb <= decode_b; + end + if (padv_execute_i) + mul_result1 <= mul_opa * mul_opb; + + mul_result2 <= mul_result1; + end + + assign mul_result = mul_result2; + + assign mul_valid = 1; + + // Can't detect unsigned overflow in this implementation + assign mul_unsigned_overflow = 0; + + end // if (FEATURE_MULTIPLIER=="PIPELINED") + else if (FEATURE_MULTIPLIER=="SERIAL") begin : serialmultiply + reg [(OPTION_OPERAND_WIDTH*2)-1:0] mul_prod_r; + reg [5:0] serial_mul_cnt; + reg mul_done; + wire [OPTION_OPERAND_WIDTH-1:0] mul_a, mul_b; + + // Check if it's a signed multiply and operand b is negative, + // convert to positive + assign mul_a = op_mul_signed_i & a[OPTION_OPERAND_WIDTH-1] ? + ~a + 1 : a; + assign mul_b = op_mul_signed_i & b[OPTION_OPERAND_WIDTH-1] ? + ~b + 1 : b; + + always @(posedge clk) + if (rst) begin + mul_prod_r <= 64'h0000_0000_0000_0000; + serial_mul_cnt <= 6'd0; + mul_done <= 1'b0; + end + else if (|serial_mul_cnt) begin + serial_mul_cnt <= serial_mul_cnt - 6'd1; + + if (mul_prod_r[0]) + mul_prod_r[(OPTION_OPERAND_WIDTH*2)-1:OPTION_OPERAND_WIDTH-1] + <= mul_prod_r[(OPTION_OPERAND_WIDTH*2)-1:OPTION_OPERAND_WIDTH] + mul_a; + else + mul_prod_r[(OPTION_OPERAND_WIDTH*2)-1:OPTION_OPERAND_WIDTH-1] + <= {1'b0,mul_prod_r[(OPTION_OPERAND_WIDTH*2)-1:OPTION_OPERAND_WIDTH]}; + + mul_prod_r[OPTION_OPERAND_WIDTH-2:0] <= mul_prod_r[OPTION_OPERAND_WIDTH-1:1]; + + if (serial_mul_cnt==6'd1) + mul_done <= 1'b1; + + end + else if (decode_valid_i && op_mul_i) begin + mul_prod_r[(OPTION_OPERAND_WIDTH*2)-1:OPTION_OPERAND_WIDTH] <= 32'd0; + mul_prod_r[OPTION_OPERAND_WIDTH-1:0] <= mul_b; + mul_done <= 0; + serial_mul_cnt <= 6'b10_0000; + end + else if (decode_valid_i) begin + mul_done <= 1'b0; + end + + assign mul_valid = mul_done & !decode_valid_i; + + assign mul_result = op_mul_signed_i ? + ((a[OPTION_OPERAND_WIDTH-1] ^ + b[OPTION_OPERAND_WIDTH-1]) ? + ~mul_prod_r[OPTION_OPERAND_WIDTH-1:0] + 1 : + mul_prod_r[OPTION_OPERAND_WIDTH-1:0]) : + mul_prod_r[OPTION_OPERAND_WIDTH-1:0]; + + assign mul_unsigned_overflow = OPTION_OPERAND_WIDTH==64 ? 0 : + |mul_prod_r[(OPTION_OPERAND_WIDTH*2)-1: + OPTION_OPERAND_WIDTH]; + + // synthesis translate_off + `ifndef verilator + always @(posedge mul_valid) + begin + @(posedge clk); + + if (((a*b) & {OPTION_OPERAND_WIDTH{1'b1}}) != mul_result) + begin + $display("%t incorrect serial multiply result at pc %08h", + $time, pc_execute_i); + $display("a=%08h b=%08h, mul_result=%08h, expected %08h", + a, b, mul_result, ((a*b) & {OPTION_OPERAND_WIDTH{1'b1}})); + end + end + `endif + // synthesis translate_on + + end // if (FEATURE_MULTIPLIER=="SERIAL") + else if (FEATURE_MULTIPLIER=="SIMULATION") begin + // Simple multiplier result + wire [(OPTION_OPERAND_WIDTH*2)-1:0] mul_full_result; + assign mul_full_result = a * b; + assign mul_result = mul_full_result[OPTION_OPERAND_WIDTH-1:0]; + + assign mul_unsigned_overflow = OPTION_OPERAND_WIDTH==64 ? 0 : + |mul_full_result[(OPTION_OPERAND_WIDTH*2)-1:OPTION_OPERAND_WIDTH]; + + assign mul_valid = 1; + end + else if (FEATURE_MULTIPLIER=="NONE") begin + // No multiplier + assign mul_result = 0; + assign mul_valid = 1'b1; + assign mul_unsigned_overflow = 0; + end + else begin + // Incorrect configuration option + initial begin + $display("%m: Error - chosen multiplier implementation (%s) not available", + FEATURE_MULTIPLIER); + $finish; + end + end + endgenerate + + // One signed overflow detection for all multiplication implmentations + assign mul_signed_overflow = (FEATURE_MULTIPLIER=="NONE") || + (FEATURE_MULTIPLIER=="PIPELINED") ? 1'b0 : + // Same signs, check for negative result + // (should be positive) + ((a[OPTION_OPERAND_WIDTH-1] == + b[OPTION_OPERAND_WIDTH-1]) && + mul_result[OPTION_OPERAND_WIDTH-1]) || + // Differring signs, check for positive result + // (should be negative) + ((a[OPTION_OPERAND_WIDTH-1] ^ + b[OPTION_OPERAND_WIDTH-1]) && + !mul_result[OPTION_OPERAND_WIDTH-1]); + + assign mul_result_o = mul_result; + + generate + /* verilator lint_off WIDTH */ + if (FEATURE_DIVIDER=="SERIAL") begin + /* verilator lint_on WIDTH */ + reg [5:0] div_count; + reg [OPTION_OPERAND_WIDTH-1:0] div_n; + reg [OPTION_OPERAND_WIDTH-1:0] div_d; + reg [OPTION_OPERAND_WIDTH-1:0] div_r; + wire [OPTION_OPERAND_WIDTH:0] div_sub; + reg div_neg; + reg div_done; + reg div_by_zero_r; + + + assign div_sub = {div_r[OPTION_OPERAND_WIDTH-2:0], + div_n[OPTION_OPERAND_WIDTH-1]} - div_d; + + /* Cycle counter */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + div_done <= 0; + div_count <= 0; + end else if (decode_valid_i & op_div_i) begin + div_done <= 0; + div_count <= OPTION_OPERAND_WIDTH[5:0]; + end else if (div_count == 1) + div_done <= 1; + else if (!div_done) + div_count <= div_count - 1'd1; + + always @(posedge clk) begin + if (decode_valid_i & op_div_i) begin + div_n <= rfa_i; + div_d <= rfb_i; + div_r <= 0; + div_neg <= 1'b0; + div_by_zero_r <= !(|rfb_i); + + /* + * Convert negative operands in the case of signed division. + * If only one of the operands is negative, the result is + * converted back to negative later on + */ + if (op_div_signed_i) begin + if (rfa_i[OPTION_OPERAND_WIDTH-1] ^ + rfb_i[OPTION_OPERAND_WIDTH-1]) + div_neg <= 1'b1; + + if (rfa_i[OPTION_OPERAND_WIDTH-1]) + div_n <= ~rfa_i + 1; + + if (rfb_i[OPTION_OPERAND_WIDTH-1]) + div_d <= ~rfb_i + 1; + end + end else if (!div_done) begin + if (!div_sub[OPTION_OPERAND_WIDTH]) begin // div_sub >= 0 + div_r <= div_sub[OPTION_OPERAND_WIDTH-1:0]; + div_n <= {div_n[OPTION_OPERAND_WIDTH-2:0], 1'b1}; + end else begin // div_sub < 0 + div_r <= {div_r[OPTION_OPERAND_WIDTH-2:0], + div_n[OPTION_OPERAND_WIDTH-1]}; + div_n <= {div_n[OPTION_OPERAND_WIDTH-2:0], 1'b0}; + end + end + end + + assign div_valid = div_done & !decode_valid_i; + assign div_result = div_neg ? ~div_n + 1 : div_n; + assign div_by_zero = div_by_zero_r; + end + /* verilator lint_off WIDTH */ + else if (FEATURE_DIVIDER=="SIMULATION") begin + /* verilator lint_on WIDTH */ + assign div_result = a / b; + assign div_valid = 1; + assign div_by_zero = (opc_alu_i == `OR1K_ALU_OPC_DIV || + opc_alu_i == `OR1K_ALU_OPC_DIVU) && !(|b); + + end + else if (FEATURE_DIVIDER=="NONE") begin + assign div_result = 0; + assign div_valid = 1'b1; + assign div_by_zero = 0; + end + else begin + // Incorrect configuration option + initial begin + $display("%m: Error - chosen divider implementation (%s) not available", + FEATURE_DIVIDER); + $finish; + end + end + endgenerate + + + // FPU related + // arithmetic part interface + wire fpu_op_is_arith; + wire fpu_arith_valid; + wire [OPTION_OPERAND_WIDTH-1:0] fpu_result; + // comparator part interface + wire fpu_op_is_cmp; + wire fpu_cmp_valid; + wire fpu_cmp_flag; + // instance + generate + /* verilator lint_off WIDTH */ + if (FEATURE_FPU!="NONE") begin : fpu_alu_ena + /* verilator lint_on WIDTH */ + // fpu32 instance + pfpu32_top + #( + .OPTION_FTOI_ROUNDING(OPTION_FTOI_ROUNDING) // fpu32 instance + ) + u_pfpu32 + ( + .clk(clk), + .rst(rst), + .flush_i(pipeline_flush_i), + .padv_decode_i(padv_decode_i), + .padv_execute_i(padv_execute_i), + .op_fpu_i(op_fpu_i), + .round_mode_i(fpu_round_mode_i), + .rfa_i(rfa_i), + .rfb_i(rfb_i), + .fpu_result_o(fpu_result), + .fpu_arith_valid_o(fpu_arith_valid), + .fpu_cmp_flag_o(fpu_cmp_flag), + .fpu_cmp_valid_o(fpu_cmp_valid), + .fpcsr_o(fpcsr_o) + ); + // flag to update FPCSR + assign fpcsr_set_o = fpu_arith_valid | fpu_cmp_valid; + // some glue logic + assign fpu_op_is_arith = op_fpu_i[`OR1K_FPUOP_WIDTH-1] & (~op_fpu_i[3]); + assign fpu_op_is_cmp = op_fpu_i[`OR1K_FPUOP_WIDTH-1] & op_fpu_i[3]; + end + else begin : fpu_alu_none + // arithmetic part + assign fpu_op_is_arith = 0; + assign fpu_arith_valid = 0; + assign fpu_result = {OPTION_OPERAND_WIDTH{1'b0}}; + // comparator part + assign fpu_op_is_cmp = 0; + assign fpu_cmp_valid = 0; + assign fpu_cmp_flag = 0; + // fpu's common + assign fpcsr_o = {`OR1K_FPCSR_WIDTH{1'b0}}; + assign fpcsr_set_o = 0; + end + endgenerate // FPU related + + + wire ffl1_valid; + generate + if (FEATURE_FFL1!="NONE") begin + wire [OPTION_OPERAND_WIDTH-1:0] ffl1_result_wire; + assign ffl1_result_wire = (opc_alu_secondary_i[2]) ? + (a[31] ? 32 : a[30] ? 31 : a[29] ? 30 : + a[28] ? 29 : a[27] ? 28 : a[26] ? 27 : + a[25] ? 26 : a[24] ? 25 : a[23] ? 24 : + a[22] ? 23 : a[21] ? 22 : a[20] ? 21 : + a[19] ? 20 : a[18] ? 19 : a[17] ? 18 : + a[16] ? 17 : a[15] ? 16 : a[14] ? 15 : + a[13] ? 14 : a[12] ? 13 : a[11] ? 12 : + a[10] ? 11 : a[9] ? 10 : a[8] ? 9 : + a[7] ? 8 : a[6] ? 7 : a[5] ? 6 : a[4] ? 5 : + a[3] ? 4 : a[2] ? 3 : a[1] ? 2 : a[0] ? 1 : 0 ) : + (a[0] ? 1 : a[1] ? 2 : a[2] ? 3 : a[3] ? 4 : + a[4] ? 5 : a[5] ? 6 : a[6] ? 7 : a[7] ? 8 : + a[8] ? 9 : a[9] ? 10 : a[10] ? 11 : a[11] ? 12 : + a[12] ? 13 : a[13] ? 14 : a[14] ? 15 : + a[15] ? 16 : a[16] ? 17 : a[17] ? 18 : + a[18] ? 19 : a[19] ? 20 : a[20] ? 21 : + a[21] ? 22 : a[22] ? 23 : a[23] ? 24 : + a[24] ? 25 : a[25] ? 26 : a[26] ? 27 : + a[27] ? 28 : a[28] ? 29 : a[29] ? 30 : + a[30] ? 31 : a[31] ? 32 : 0); + /* verilator lint_off WIDTH */ + if (FEATURE_FFL1=="REGISTERED") begin + /* verilator lint_on WIDTH */ + reg [OPTION_OPERAND_WIDTH-1:0] ffl1_result_r; + + assign ffl1_valid = !decode_valid_i; + assign ffl1_result = ffl1_result_r; + + always @(posedge clk) + if (decode_valid_i) + ffl1_result_r = ffl1_result_wire; + end else begin + assign ffl1_result = ffl1_result_wire; + assign ffl1_valid = 1'b1; + end + end + else begin + assign ffl1_result = 0; + assign ffl1_valid = 1'b1; + end + endgenerate + + // Equal compare + assign a_eq_b = (a == b); + // Signed compare + assign a_lts_b = !(adder_result_sign == adder_signed_overflow); + // Unsigned compare + assign a_ltu_b = !adder_carryout; + + generate + /* verilator lint_off WIDTH */ + if (OPTION_SHIFTER=="BARREL") begin : barrel_shifter + /* verilator lint_on WIDTH */ + + function [OPTION_OPERAND_WIDTH-1:0] reverse; + input [OPTION_OPERAND_WIDTH-1:0] in; + integer i; + begin + for (i = 0; i < OPTION_OPERAND_WIDTH; i=i+1) begin + reverse[(OPTION_OPERAND_WIDTH-1)-i] = in[i]; + end + end + endfunction + + wire op_sll = (opc_alu_shr==`OR1K_ALU_OPC_SECONDARY_SHRT_SLL); + wire op_srl = (opc_alu_shr==`OR1K_ALU_OPC_SECONDARY_SHRT_SRL); + wire op_sra = (opc_alu_shr==`OR1K_ALU_OPC_SECONDARY_SHRT_SRA) && + (FEATURE_SRA!="NONE"); + wire op_ror = (opc_alu_shr==`OR1K_ALU_OPC_SECONDARY_SHRT_ROR) && + (FEATURE_ROR!="NONE"); + + wire [OPTION_OPERAND_WIDTH-1:0] shift_right; + wire [OPTION_OPERAND_WIDTH-1:0] shift_lsw; + wire [OPTION_OPERAND_WIDTH-1:0] shift_msw; + wire [OPTION_OPERAND_WIDTH*2-1:0] shift_wide; + + // + // Bit-reverse on left shift, perform right shift, + // bit-reverse result on left shift. + // + assign shift_lsw = op_sll ? reverse(a) : a; + assign shift_msw = op_sra ? + {OPTION_OPERAND_WIDTH{a[OPTION_OPERAND_WIDTH-1]}} : + op_ror ? a : {OPTION_OPERAND_WIDTH{1'b0}}; + + assign shift_wide = {shift_msw, shift_lsw} >> b[4:0]; + assign shift_right = shift_wide[OPTION_OPERAND_WIDTH-1:0]; + assign shift_result = op_sll ? reverse(shift_right) : shift_right; + + assign shift_valid = 1; + + end else if (OPTION_SHIFTER=="SERIAL") begin : serial_shifter + // Serial shifter + reg [4:0] shift_cnt; + reg shift_go; + reg [OPTION_OPERAND_WIDTH-1:0] shift_result_r; + always @(posedge clk `OR_ASYNC_RST) + if (rst) + shift_go <= 0; + else if (decode_valid_i) + shift_go <= op_shift_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + shift_cnt <= 0; + shift_result_r <= 0; + end + else if (decode_valid_i & op_shift_i) begin + shift_cnt <= 0; + shift_result_r <= a; + end + else if (shift_go && !(shift_cnt==b[4:0])) begin + shift_cnt <= shift_cnt + 1; + if (opc_alu_shr==`OR1K_ALU_OPC_SECONDARY_SHRT_SRL) + shift_result_r <= {1'b0,shift_result_r[OPTION_OPERAND_WIDTH-1:1]}; + else if (opc_alu_shr==`OR1K_ALU_OPC_SECONDARY_SHRT_SLL) + shift_result_r <= {shift_result_r[OPTION_OPERAND_WIDTH-2:0],1'b0}; + else if (opc_alu_shr==`OR1K_ALU_OPC_SECONDARY_SHRT_ROR) + shift_result_r <= {shift_result_r[0] + ,shift_result_r[OPTION_OPERAND_WIDTH-1:1]}; + + else if (opc_alu_shr==`OR1K_ALU_OPC_SECONDARY_SHRT_SRA) + shift_result_r <= {a[OPTION_OPERAND_WIDTH-1], + shift_result_r[OPTION_OPERAND_WIDTH-1:1]}; + end // if (shift_go && !(shift_cnt==b[4:0])) + + assign shift_valid = (shift_cnt==b[4:0]) & shift_go & !decode_valid_i; + + assign shift_result = shift_result_r; + + end // if (OPTION_SHIFTER=="SERIAL") + else + initial begin + $display("%m: Error - chosen shifter implementation (%s) not available", + OPTION_SHIFTER); + $finish; + + end + endgenerate + + // Conditional move + generate + /* verilator lint_off WIDTH */ + if (FEATURE_CMOV=="ENABLED") begin + /* verilator lint_on WIDTH */ + assign cmov_result = flag_i ? a : b; + end + endgenerate + + // Sign Extension + generate + /* verilator lint_off WIDTH */ + if (FEATURE_EXT=="ENABLED") begin + always @* + case(opc_alu_i) + `OR1K_ALU_OPC_EXTBH: + case(opc_alu_ext) + `OR1K_ALU_OPC_SECONDARY_EXTBH_EXTBS, + `OR1K_ALU_OPC_SECONDARY_EXTBH_EXTBZ: + ext_result = a[7] && (opc_alu_ext == `OR1K_ALU_OPC_SECONDARY_EXTBH_EXTBS) ? + {{(OPTION_OPERAND_WIDTH-8){1'b1}}, a[7:0]} : + {{(OPTION_OPERAND_WIDTH-8){1'b0}}, a[7:0]}; + `OR1K_ALU_OPC_SECONDARY_EXTBH_EXTHS, + `OR1K_ALU_OPC_SECONDARY_EXTBH_EXTHZ: + ext_result = a[15] && (opc_alu_ext == `OR1K_ALU_OPC_SECONDARY_EXTBH_EXTHS) ? + {{(OPTION_OPERAND_WIDTH-16){1'b1}}, a[15:0]} : + {{(OPTION_OPERAND_WIDTH-16){1'b0}}, a[15:0]}; + default: + ext_result = a; + endcase // case(opc_alu_ext) + `OR1K_ALU_OPC_EXTW: + //`OR1K_ALU_OPC_SECONDARY_EXTW_EXTWS, + //`OR1K_ALU_OPC_SECONDARY_EXTW_EXTWZ: + ext_result = a; + default: + ext_result = a; + endcase // case(opc_alu_i) + end + endgenerate + + // Comparison logic + // To update SR[F] either from integer or float point comparision + assign flag_set_o = fpu_op_is_cmp ? + (fpu_cmp_flag & fpu_cmp_valid) : + (flag_set & op_setflag_i); + assign flag_clear_o = fpu_op_is_cmp ? + ((~fpu_cmp_flag) & fpu_cmp_valid) : + ((~flag_set) & op_setflag_i); + + // Combinatorial block + always @* + case(opc_alu_secondary_i) + `OR1K_COMP_OPC_EQ: + flag_set = a_eq_b; + `OR1K_COMP_OPC_NE: + flag_set = !a_eq_b; + `OR1K_COMP_OPC_GTU: + flag_set = !(a_eq_b | a_ltu_b); + `OR1K_COMP_OPC_GTS: + flag_set = !(a_eq_b | a_lts_b); + `OR1K_COMP_OPC_GEU: + flag_set = !a_ltu_b; + `OR1K_COMP_OPC_GES: + flag_set = !a_lts_b; + `OR1K_COMP_OPC_LTU: + flag_set = a_ltu_b; + `OR1K_COMP_OPC_LTS: + flag_set = a_lts_b; + `OR1K_COMP_OPC_LEU: + flag_set = a_eq_b | a_ltu_b; + `OR1K_COMP_OPC_LES: + flag_set = a_eq_b | a_lts_b; + default: + flag_set = 0; + endcase // case (opc_alu_secondary_i) + + // + // Logic operations + // + // Create a look-up-table for AND/OR/XOR + reg [3:0] logic_lut; + always @(*) begin + case(opc_alu_i) + `OR1K_ALU_OPC_AND: + logic_lut = 4'b1000; + `OR1K_ALU_OPC_OR: + logic_lut = 4'b1110; + `OR1K_ALU_OPC_XOR: + logic_lut = 4'b0110; + default: + logic_lut = 0; + endcase + if (!op_alu_i) + logic_lut = 0; + // Threat mfspr/mtspr as 'OR' + if (op_mfspr_i | op_mtspr_i) + logic_lut = 4'b1110; + end + + // Extract the result, bit-for-bit, from the look-up-table + integer i; + always @(*) + for (i = 0; i < OPTION_OPERAND_WIDTH; i=i+1) begin + logic_result[i] = logic_lut[{a[i], b[i]}]; + end + + assign op_logic = |logic_lut; + + assign op_cmov = op_alu_i & opc_alu_i == `OR1K_ALU_OPC_CMOV; + + // Result muxing - result is registered in RF + assign alu_result_o = op_logic ? logic_result : + op_cmov ? cmov_result : + op_movhi_i ? immediate_i : + op_ext_i ? ext_result : + op_mul_i ? mul_result[OPTION_OPERAND_WIDTH-1:0] : + fpu_arith_valid ? fpu_result : + op_shift_i ? shift_result : + op_div_i ? div_result : + op_ffl1_i ? ffl1_result : + adder_result; + + // Carry and overflow flag generation + assign overflow_set_o = FEATURE_OVERFLOW!="NONE" & + (op_add_i & adder_signed_overflow | + op_mul_signed_i & mul_signed_overflow | + op_div_signed_i & div_by_zero); + + assign overflow_clear_o = FEATURE_OVERFLOW!="NONE" & + (op_add_i & !adder_signed_overflow | + op_mul_signed_i & !mul_signed_overflow | + op_div_signed_i & !div_by_zero); + + assign carry_set_o = FEATURE_CARRY_FLAG!="NONE" & + (op_add_i & adder_unsigned_overflow | + op_mul_unsigned_i & mul_unsigned_overflow | + op_div_unsigned_i & div_by_zero); + + assign carry_clear_o = FEATURE_CARRY_FLAG!="NONE" & + (op_add_i & !adder_unsigned_overflow | + op_mul_unsigned_i & !mul_unsigned_overflow | + op_div_unsigned_i & !div_by_zero); + + // Stall logic for multicycle ALU operations + assign alu_stall = op_div_i & !div_valid | + op_mul_i & !mul_valid | + fpu_op_is_arith & !fpu_arith_valid | + fpu_op_is_cmp & !fpu_cmp_valid | + op_shift_i & !shift_valid | + op_ffl1_i & !ffl1_valid; + + assign alu_valid_o = !alu_stall; + +endmodule // mor1kx_execute_alu diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_execute_ctrl_cappuccino.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_execute_ctrl_cappuccino.v new file mode 100644 index 0000000..6e91801 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_execute_ctrl_cappuccino.v @@ -0,0 +1,387 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: execute to control stage signal passing + + Generate valid signal when stage is done + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + Stefan Kristiansson + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_execute_ctrl_cappuccino + #( + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + parameter OPTION_RF_ADDR_WIDTH = 5, + parameter FEATURE_FPU = "NONE", // ENABLED|NONE + parameter FEATURE_MULTIPLIER = "THREESTAGE" + ) + ( + input clk, + input rst, + + input padv_i, + input padv_ctrl_i, + + input execute_except_ibus_err_i, + input execute_except_itlb_miss_i, + input execute_except_ipagefault_i, + input execute_except_illegal_i, + input execute_except_ibus_align_i, + input execute_except_syscall_i, + input lsu_except_dbus_i, + input lsu_except_align_i, + input lsu_except_dtlb_miss_i, + input lsu_except_dpagefault_i, + input execute_except_trap_i, + + input pipeline_flush_i, + + input op_mul_i, + + input op_lsu_load_i, + input op_lsu_store_i, + input op_lsu_atomic_i, + input [1:0] lsu_length_i, + input lsu_zext_i, + + input op_msync_i, + + input op_mfspr_i, + input op_mtspr_i, + input alu_valid_i, + input lsu_valid_i, + + input msync_stall_i, + + input op_jr_i, + input op_jal_i, + input op_rfe_i, + + input [OPTION_OPERAND_WIDTH-1:0] alu_result_i, + input [OPTION_OPERAND_WIDTH-1:0] adder_result_i, + input [OPTION_OPERAND_WIDTH-1:0] rfb_i, + input [OPTION_OPERAND_WIDTH-1:0] execute_jal_result_i, + input flag_set_i, + input flag_clear_i, + input carry_set_i, + input carry_clear_i, + input overflow_set_i, + input overflow_clear_i, + + input [`OR1K_FPCSR_WIDTH-1:0] fpcsr_i, + input fpcsr_set_i, + + + input [OPTION_OPERAND_WIDTH-1:0] pc_execute_i, + + input execute_rf_wb_i, + output reg ctrl_rf_wb_o, + output reg wb_rf_wb_o, + + + // address of destination register from execute stage + input [OPTION_RF_ADDR_WIDTH-1:0] execute_rfd_adr_i, + output reg [OPTION_RF_ADDR_WIDTH-1:0] ctrl_rfd_adr_o, + output reg [OPTION_RF_ADDR_WIDTH-1:0] wb_rfd_adr_o, + + input execute_bubble_i, + + // Input from control stage for mfspr/mtspr ack + input ctrl_mfspr_ack_i, + input ctrl_mtspr_ack_i, + + output reg [OPTION_OPERAND_WIDTH-1:0] ctrl_alu_result_o, + output reg [OPTION_OPERAND_WIDTH-1:0] ctrl_lsu_adr_o, + output reg [OPTION_OPERAND_WIDTH-1:0] ctrl_rfb_o, + output reg ctrl_flag_set_o, + output reg ctrl_flag_clear_o, + output reg ctrl_carry_set_o, + output reg ctrl_carry_clear_o, + output reg ctrl_overflow_set_o, + output reg ctrl_overflow_clear_o, + + output reg [`OR1K_FPCSR_WIDTH-1:0] ctrl_fpcsr_o, + output reg ctrl_fpcsr_set_o, + + + output reg [OPTION_OPERAND_WIDTH-1:0] pc_ctrl_o, + + output reg ctrl_op_mul_o, + + output reg ctrl_op_lsu_load_o, + output reg ctrl_op_lsu_store_o, + output reg ctrl_op_lsu_atomic_o, + output reg [1:0] ctrl_lsu_length_o, + output reg ctrl_lsu_zext_o, + + output reg ctrl_op_msync_o, + + output reg ctrl_op_mfspr_o, + output reg ctrl_op_mtspr_o, + + output reg ctrl_op_rfe_o, + + output reg ctrl_except_ibus_err_o, + output reg ctrl_except_itlb_miss_o, + output reg ctrl_except_ipagefault_o, + output reg ctrl_except_ibus_align_o, + output reg ctrl_except_illegal_o, + output reg ctrl_except_syscall_o, + output reg ctrl_except_dbus_o, + output reg ctrl_except_dtlb_miss_o, + output reg ctrl_except_dpagefault_o, + output reg ctrl_except_align_o, + output reg ctrl_except_trap_o, + + output execute_valid_o, + output ctrl_valid_o + ); + + wire ctrl_stall; + wire execute_stall; + + // LSU or MTSPR/MFSPR can stall from ctrl stage + assign ctrl_stall = (ctrl_op_lsu_load_o | ctrl_op_lsu_store_o) & + !lsu_valid_i | + ctrl_op_msync_o & msync_stall_i | + ctrl_op_mfspr_o & !ctrl_mfspr_ack_i | + ctrl_op_mtspr_o & !ctrl_mtspr_ack_i; + assign ctrl_valid_o = !ctrl_stall; + + // Execute stage can be stalled from ctrl stage and by ALU + assign execute_stall = ctrl_stall | !alu_valid_i; + assign execute_valid_o = !execute_stall; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + ctrl_except_ibus_err_o <= 0; + ctrl_except_itlb_miss_o <= 0; + ctrl_except_ipagefault_o <= 0; + ctrl_except_ibus_align_o <= 0; + ctrl_except_illegal_o <= 0; + ctrl_except_syscall_o <= 0; + ctrl_except_trap_o <= 0; + ctrl_except_dbus_o <= 0; + ctrl_except_align_o <= 0; + end + else if (pipeline_flush_i) begin + ctrl_except_ibus_err_o <= 0; + ctrl_except_itlb_miss_o <= 0; + ctrl_except_ipagefault_o <= 0; + ctrl_except_ibus_align_o <= 0; + ctrl_except_illegal_o <= 0; + ctrl_except_syscall_o <= 0; + ctrl_except_trap_o <= 0; + ctrl_except_dbus_o <= 0; + ctrl_except_align_o <= 0; + end + else begin + if (padv_i) begin + ctrl_except_ibus_err_o <= execute_except_ibus_err_i; + ctrl_except_itlb_miss_o <= execute_except_itlb_miss_i; + ctrl_except_ipagefault_o <= execute_except_ipagefault_i; + ctrl_except_ibus_align_o <= execute_except_ibus_align_i; + ctrl_except_illegal_o <= execute_except_illegal_i; + ctrl_except_syscall_o <= execute_except_syscall_i; + ctrl_except_trap_o <= execute_except_trap_i; + end + ctrl_except_dbus_o <= lsu_except_dbus_i; + ctrl_except_align_o <= lsu_except_align_i; + ctrl_except_dtlb_miss_o <= lsu_except_dtlb_miss_i; + ctrl_except_dpagefault_o <= lsu_except_dpagefault_i; + end + + always @(posedge clk) + if (padv_i) + if (op_jal_i) + ctrl_alu_result_o <= execute_jal_result_i; + else + ctrl_alu_result_o <= alu_result_i; + + always @(posedge clk) + if (padv_i & (op_lsu_store_i | op_lsu_load_i)) + ctrl_lsu_adr_o <= adder_result_i; + + always @(posedge clk) + if (padv_i) + ctrl_rfb_o <= rfb_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + ctrl_flag_set_o <= 0; + ctrl_flag_clear_o <= 0; + ctrl_carry_set_o <= 0; + ctrl_carry_clear_o <= 0; + ctrl_overflow_set_o <= 0; + ctrl_overflow_clear_o <= 0; + end + else if (padv_i) begin + ctrl_flag_set_o <= flag_set_i; + ctrl_flag_clear_o <= flag_clear_i; + ctrl_carry_set_o <= carry_set_i; + ctrl_carry_clear_o <= carry_clear_i; + ctrl_overflow_set_o <= overflow_set_i; + ctrl_overflow_clear_o <= overflow_clear_i; + end + + // pc_ctrl should not advance when a nop bubble moves from execute to + // ctrl/mem stage + always @(posedge clk `OR_ASYNC_RST) + if (rst) + pc_ctrl_o <= OPTION_RESET_PC; + else if (padv_i & !execute_bubble_i) + pc_ctrl_o <= pc_execute_i; + + // + // The pipeline flush comes when the instruction that has caused + // an exception or the instruction that has been interrupted is in + // ctrl stage, so the padv_execute signal has to have higher prioity + // than the pipeline flush in order to not accidently kill a valid + // instruction coming in from execute stage. + // + +generate +if (FEATURE_MULTIPLIER=="PIPELINED") begin + always @(posedge clk `OR_ASYNC_RST) + if (rst) + ctrl_op_mul_o <= 0; + else if (padv_i) + ctrl_op_mul_o <= op_mul_i; + else if (pipeline_flush_i) + ctrl_op_mul_o <= 0; +end else begin + always @(posedge clk) + ctrl_op_mul_o <= 0; +end +endgenerate + + // FPU related + generate + /* verilator lint_off WIDTH */ + if (FEATURE_FPU!="NONE") begin : fpu_execute_ctrl_ena + /* verilator lint_on WIDTH */ + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) begin + ctrl_fpcsr_o <= {`OR1K_FPCSR_WIDTH{1'b0}}; + ctrl_fpcsr_set_o <= 0; + end else if (pipeline_flush_i) begin + ctrl_fpcsr_o <= {`OR1K_FPCSR_WIDTH{1'b0}}; + ctrl_fpcsr_set_o <= 0; + end else if (padv_i) begin + ctrl_fpcsr_o <= fpcsr_i; + ctrl_fpcsr_set_o <= fpcsr_set_i; + end + end // @clk + end + else begin : fpu_execute_ctrl_none + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) begin + ctrl_fpcsr_o <= {`OR1K_FPCSR_WIDTH{1'b0}}; + ctrl_fpcsr_set_o <= 0; + end + end // @clk + end + endgenerate // FPU related + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + ctrl_op_mfspr_o <= 0; + ctrl_op_mtspr_o <= 0; + end else if (padv_i) begin + ctrl_op_mfspr_o <= op_mfspr_i; + ctrl_op_mtspr_o <= op_mtspr_i; + end else if (pipeline_flush_i) begin + ctrl_op_mfspr_o <= 0; + ctrl_op_mtspr_o <= 0; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + ctrl_op_rfe_o <= 0; + else if (padv_i) + ctrl_op_rfe_o <= op_rfe_i; + else if (pipeline_flush_i) + ctrl_op_rfe_o <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + ctrl_op_msync_o <= 0; + else if (padv_i) + ctrl_op_msync_o <= op_msync_i; + else if (pipeline_flush_i) + ctrl_op_msync_o <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + ctrl_op_lsu_load_o <= 0; + ctrl_op_lsu_store_o <= 0; + ctrl_op_lsu_atomic_o <= 0; + end else if (ctrl_except_align_o | ctrl_except_dbus_o | + ctrl_except_dtlb_miss_o | ctrl_except_dpagefault_o) begin + ctrl_op_lsu_load_o <= 0; + ctrl_op_lsu_store_o <= 0; + ctrl_op_lsu_atomic_o <= 0; + end else if (padv_i) begin + ctrl_op_lsu_load_o <= op_lsu_load_i; + ctrl_op_lsu_store_o <= op_lsu_store_i; + ctrl_op_lsu_atomic_o <= op_lsu_atomic_i; + end else if (pipeline_flush_i) begin + ctrl_op_lsu_load_o <= 0; + ctrl_op_lsu_store_o <= 0; + ctrl_op_lsu_atomic_o <= 0; + end + + always @(posedge clk) + if (padv_i) begin + ctrl_lsu_length_o <= lsu_length_i; + ctrl_lsu_zext_o <= lsu_zext_i; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + ctrl_rf_wb_o <= 0; + else if (padv_i) + ctrl_rf_wb_o <= execute_rf_wb_i; + else if (ctrl_op_mfspr_o & ctrl_mfspr_ack_i | + ctrl_op_lsu_load_o & lsu_valid_i) + // Deassert the write enable when the "bus" access is done, to avoid: + // 1) Writing multiple times to RF + // 2) Signaling a need to bypass from control stage, when it really + // should be a bypass from wb stage. + ctrl_rf_wb_o <= 0; + else if (pipeline_flush_i) + ctrl_rf_wb_o <= 0; + + always @(posedge clk) + if (padv_i) + ctrl_rfd_adr_o <= execute_rfd_adr_i; + + // load and mfpsr can stall from ctrl stage, so we have to hold off the + // write back on them + always @(posedge clk `OR_ASYNC_RST) + if (rst) + wb_rf_wb_o <= 0; + else if (pipeline_flush_i) + wb_rf_wb_o <= 0; + else if (ctrl_op_mfspr_o) + wb_rf_wb_o <= ctrl_rf_wb_o & ctrl_mfspr_ack_i; + else if (ctrl_op_lsu_load_o) + wb_rf_wb_o <= ctrl_rf_wb_o & lsu_valid_i; + else + wb_rf_wb_o <= ctrl_rf_wb_o & padv_ctrl_i; + + always @(posedge clk) + wb_rfd_adr_o <= ctrl_rfd_adr_o; + +endmodule // mor1kx_execute_ctrl_cappuccino diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_fetch_cappuccino.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_fetch_cappuccino.v new file mode 100644 index 0000000..2f987eb --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_fetch_cappuccino.v @@ -0,0 +1,645 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx fetch/address stage unit + + basically an interface to the ibus/icache subsystem that can react to + exception and branch signals. + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + Stefan Kristiansson + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_fetch_cappuccino + #( + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}, + parameter OPTION_RF_ADDR_WIDTH = 5, + parameter FEATURE_INSTRUCTIONCACHE = "NONE", + parameter OPTION_ICACHE_BLOCK_WIDTH = 5, + parameter OPTION_ICACHE_SET_WIDTH = 9, + parameter OPTION_ICACHE_WAYS = 2, + parameter OPTION_ICACHE_LIMIT_WIDTH = 32, + parameter FEATURE_IMMU = "NONE", + parameter FEATURE_IMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_IMMU_SET_WIDTH = 6, + parameter OPTION_IMMU_WAYS = 1 + ) + ( + input clk, + input rst, + + // SPR interface + input [15:0] spr_bus_addr_i, + input spr_bus_we_i, + input spr_bus_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_ic_o, + output spr_bus_ack_ic_o, + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_immu_o, + output spr_bus_ack_immu_o, + + input ic_enable, + input immu_enable_i, + input supervisor_mode_i, + output ic_hit_o, + + // interface to ibus + input ibus_err_i, + input ibus_ack_i, + input [`OR1K_INSN_WIDTH-1:0] ibus_dat_i, + output ibus_req_o, + output [OPTION_OPERAND_WIDTH-1:0] ibus_adr_o, + output ibus_burst_o, + + // pipeline control input + input padv_i, + input padv_ctrl_i, // needed for immu spr + + // interface to decode unit + output reg [OPTION_OPERAND_WIDTH-1:0] pc_decode_o, + output reg [`OR1K_INSN_WIDTH-1:0] decode_insn_o, + output reg fetch_valid_o, + output [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfa_adr_o, + output [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfb_adr_o, + output fetch_rf_adr_valid_o, + + // branch/jump indication + input decode_branch_i, + input [OPTION_OPERAND_WIDTH-1:0] decode_branch_target_i, + input ctrl_branch_exception_i, + input [OPTION_OPERAND_WIDTH-1:0] ctrl_branch_except_pc_i, + input du_restart_i, + input [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_i, + input decode_op_brcond_i, + input branch_mispredict_i, + input [OPTION_OPERAND_WIDTH-1:0] execute_mispredict_target_i, + + // pipeline flush input from control unit + input pipeline_flush_i, + + // rfe instruction is being performed + input doing_rfe_i, + + // instruction ibus error indication out + output reg decode_except_ibus_err_o, + + // IMMU exceptions + output reg decode_except_itlb_miss_o, + output reg decode_except_ipagefault_o, + + output reg fetch_exception_taken_o + ); + + // registers + reg [OPTION_OPERAND_WIDTH-1:0] pc_fetch; + reg [OPTION_OPERAND_WIDTH-1:0] pc_addr; + reg ctrl_branch_exception_r; + + wire bus_access_done; + wire ctrl_branch_exception_edge; + wire stall_fetch_valid; + wire addr_valid; + reg flush; + wire flushing; + + reg nop_ack; + + reg imem_err; + wire imem_ack; + wire [`OR1K_INSN_WIDTH-1:0] imem_dat; + + wire ic_ack; + wire [`OR1K_INSN_WIDTH-1:0] ic_dat; + + wire ic_refill; + wire ic_refill_req; + wire ic_refill_done; + wire ic_invalidate; + wire [OPTION_OPERAND_WIDTH-1:0] ic_addr; + wire [OPTION_OPERAND_WIDTH-1:0] ic_addr_match; + + wire ic_access; + + + wire [OPTION_OPERAND_WIDTH-1:0] immu_phys_addr; + wire immu_cache_inhibit; + wire pagefault; + wire tlb_miss; + wire except_itlb_miss; + wire except_ipagefault; + + wire immu_busy; + + wire tlb_reload_req; + reg tlb_reload_ack; + wire [OPTION_OPERAND_WIDTH-1:0] tlb_reload_addr; + reg [OPTION_OPERAND_WIDTH-1:0] tlb_reload_data; + wire tlb_reload_pagefault; + wire tlb_reload_busy; + + reg fetching_brcond; + reg fetching_mispredicted_branch; + wire mispredict_stall; + + reg exception_while_tlb_reload; + wire except_ipagefault_clear; + + assign bus_access_done = (imem_ack | imem_err | nop_ack) & !immu_busy & + !tlb_reload_busy; + assign ctrl_branch_exception_edge = ctrl_branch_exception_i & + !ctrl_branch_exception_r; + + /* used to keep fetch_valid_o high during stall */ + assign stall_fetch_valid = !padv_i & fetch_valid_o; + + assign addr_valid = bus_access_done & padv_i & + !(except_itlb_miss | except_ipagefault) | + decode_except_itlb_miss_o & ctrl_branch_exception_i | + decode_except_ipagefault_o & ctrl_branch_exception_i | + doing_rfe_i; + + assign except_itlb_miss = tlb_miss & immu_enable_i & bus_access_done & + !mispredict_stall & !doing_rfe_i; + assign except_ipagefault = pagefault & immu_enable_i & bus_access_done & + !mispredict_stall & !doing_rfe_i | + tlb_reload_pagefault; + + assign fetch_rfa_adr_o = imem_dat[`OR1K_RA_SELECT]; + assign fetch_rfb_adr_o = imem_dat[`OR1K_RB_SELECT]; + assign fetch_rf_adr_valid_o = bus_access_done & padv_i; + + // Signal to indicate that the ongoing bus access should be flushed + always @(posedge clk `OR_ASYNC_RST) + if (rst) + flush <= 0; + else if (bus_access_done & padv_i | du_restart_i) + flush <= 0; + else if (pipeline_flush_i) + flush <= 1; + + // pipeline_flush_i comes on the same edge as branch_except_occur during + // rfe, but on an edge later when an exception occurs, but we always need + // to keep on flushing when the branch signal comes in. + assign flushing = pipeline_flush_i | ctrl_branch_exception_edge | flush; + + // Branch misprediction stall logic + always @(posedge clk `OR_ASYNC_RST) + if (rst) + fetching_brcond <= 0; + else if (pipeline_flush_i) + fetching_brcond <= 0; + else if (decode_op_brcond_i & addr_valid) + fetching_brcond <= 1; + else if (bus_access_done & padv_i | du_restart_i) + fetching_brcond <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + fetching_mispredicted_branch <= 0; + else if (pipeline_flush_i) + fetching_mispredicted_branch <= 0; + else if (bus_access_done & padv_i | du_restart_i) + fetching_mispredicted_branch <= 0; + else if (fetching_brcond & branch_mispredict_i & padv_i) + fetching_mispredicted_branch <= 1; + + assign mispredict_stall = fetching_mispredicted_branch | + branch_mispredict_i & fetching_brcond; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + ctrl_branch_exception_r <= 1'b0; + else + ctrl_branch_exception_r <= ctrl_branch_exception_i; + + // calculate address stage pc + always @(*) + if (rst) + pc_addr = OPTION_RESET_PC; + else if (du_restart_i) + pc_addr = du_restart_pc_i; + else if (ctrl_branch_exception_i & !fetch_exception_taken_o) + pc_addr = ctrl_branch_except_pc_i; + else if (branch_mispredict_i | fetching_mispredicted_branch) + pc_addr = execute_mispredict_target_i; + else if (decode_branch_i) + pc_addr = decode_branch_target_i; + else + pc_addr = pc_fetch + 4; + + // Register fetch pc from address stage + always @(posedge clk `OR_ASYNC_RST) + if (rst) + pc_fetch <= OPTION_RESET_PC; + else if (addr_valid | du_restart_i) + pc_fetch <= pc_addr; + + // fetch_exception_taken_o generation + always @(posedge clk `OR_ASYNC_RST) + if (rst) + fetch_exception_taken_o <= 1'b0; + else if (fetch_exception_taken_o) + fetch_exception_taken_o <= 1'b0; + else if (ctrl_branch_exception_i & bus_access_done & padv_i) + fetch_exception_taken_o <= 1'b1; + else + fetch_exception_taken_o <= 1'b0; + + // fetch_valid_o generation + always @(posedge clk `OR_ASYNC_RST) + if (rst) + fetch_valid_o <= 1'b0; + else if (pipeline_flush_i) + fetch_valid_o <= 1'b0; + else if (bus_access_done & padv_i & !mispredict_stall & !immu_busy & + !tlb_reload_busy | stall_fetch_valid) + fetch_valid_o <= 1'b1; + else + fetch_valid_o <= 1'b0; + + // Register instruction coming in + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (imem_err | flushing) + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (bus_access_done & padv_i & !mispredict_stall) + decode_insn_o <= imem_dat; + + // Register PC for later stages + always @(posedge clk) + if (bus_access_done & padv_i & !mispredict_stall) + pc_decode_o <= pc_fetch; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_except_ibus_err_o <= 0; + else if (du_restart_i) + decode_except_ibus_err_o <= 0; + else if (imem_err) + decode_except_ibus_err_o <= 1; + else if (decode_except_ibus_err_o & ctrl_branch_exception_i) + decode_except_ibus_err_o <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_except_itlb_miss_o <= 0; + else if (du_restart_i) + decode_except_itlb_miss_o <= 0; + else if (tlb_reload_busy) + decode_except_itlb_miss_o <= 0; + else if (except_itlb_miss) + decode_except_itlb_miss_o <= 1; + else if (decode_except_itlb_miss_o & ctrl_branch_exception_i) + decode_except_itlb_miss_o <= 0; + + assign except_ipagefault_clear = decode_except_ipagefault_o & + ctrl_branch_exception_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_except_ipagefault_o <= 0; + else if (du_restart_i) + decode_except_ipagefault_o <= 0; + else if (except_ipagefault) + decode_except_ipagefault_o <= 1; + else if (except_ipagefault_clear) + decode_except_ipagefault_o <= 0; + + // Bus access logic + localparam [2:0] + IDLE = 0, + READ = 1, + TLB_RELOAD = 2, + IC_REFILL = 3; + + reg [2:0] state; + + reg [OPTION_OPERAND_WIDTH-1:0] ibus_adr; + wire [OPTION_OPERAND_WIDTH-1:0] next_ibus_adr; + reg [`OR1K_INSN_WIDTH-1:0] ibus_dat; + reg ibus_req; + reg ibus_ack; + + wire ibus_access; + + // + // Under certain circumstances, there is a need to insert an nop + // into the pipeline in order for it to move forward. + // Here those conditions are handled and an acknowledged signal + // is generated. + // + always @(posedge clk `OR_ASYNC_RST) + if (rst) + nop_ack <= 0; + else + nop_ack <= padv_i & !bus_access_done & !(ibus_req & ibus_access) & + ((immu_enable_i & (tlb_miss | pagefault) & + !tlb_reload_busy) | + ctrl_branch_exception_edge & !tlb_reload_busy | + exception_while_tlb_reload & !tlb_reload_busy | + tlb_reload_pagefault | + mispredict_stall); + + assign ibus_access = (!ic_access | tlb_reload_busy | ic_invalidate) & + !ic_refill | + (state != IDLE) & (state != IC_REFILL) | + ibus_ack; + assign imem_ack = ibus_access ? ibus_ack : ic_ack; + assign imem_dat = (nop_ack | except_itlb_miss | except_ipagefault) ? + {`OR1K_OPCODE_NOP,26'd0} : + ibus_access ? ibus_dat : ic_dat; + assign ibus_adr_o = ibus_adr; + assign ibus_req_o = ibus_req; + assign ibus_burst_o = !ibus_access & ic_refill & !ic_refill_done; + + assign next_ibus_adr = (OPTION_ICACHE_BLOCK_WIDTH == 5) ? + {ibus_adr[31:5], ibus_adr[4:0] + 5'd4} : // 32 byte + {ibus_adr[31:4], ibus_adr[3:0] + 4'd4}; // 16 byte + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + imem_err <= 0; + else + imem_err <= ibus_err_i; + + always @(posedge clk) begin + ibus_ack <= 0; + exception_while_tlb_reload <= 0; + tlb_reload_ack <= 0; + + case (state) + IDLE: begin + ibus_req <= 0; + if (padv_i & ibus_access & !ibus_ack & !imem_err & !nop_ack) begin + if (tlb_reload_req) begin + ibus_adr <= tlb_reload_addr; + ibus_req <= 1; + state <= TLB_RELOAD; + end else if (immu_enable_i) begin + ibus_adr <= immu_phys_addr; + if (!tlb_miss & !pagefault & !immu_busy) begin + ibus_req <= 1; + state <= READ; + end + end else if (!ctrl_branch_exception_i | doing_rfe_i) begin + ibus_adr <= pc_fetch; + ibus_req <= 1; + state <= READ; + end + end else if (ic_refill_req) begin + ibus_adr <= ic_addr_match; + ibus_req <= 1; + state <= IC_REFILL; + end + end + + IC_REFILL: begin + ibus_req <= 1; + if (ibus_ack_i) begin + ibus_adr <= next_ibus_adr; + if (ic_refill_done) begin + ibus_req <= 0; + state <= IDLE; + end + end + if (ibus_err_i) begin + ibus_req <= 0; + state <= IDLE; + end + end + + READ: begin + ibus_ack <= ibus_ack_i; + ibus_dat <= ibus_dat_i; + if (ibus_ack_i | ibus_err_i) begin + ibus_req <= 0; + state <= IDLE; + end + end + + TLB_RELOAD: begin + if (ctrl_branch_exception_i) + exception_while_tlb_reload <= 1; + + ibus_adr <= tlb_reload_addr; + tlb_reload_data <= ibus_dat_i; + tlb_reload_ack <= ibus_ack_i & tlb_reload_req; + + if (!tlb_reload_req) + state <= IDLE; + + ibus_req <= tlb_reload_req; + if (ibus_ack_i | tlb_reload_ack) + ibus_req <= 0; + end + + default: + state <= IDLE; + endcase // case (state) + + if (rst) begin + ibus_req <= 0; + state <= IDLE; + end + end + + + assign ic_addr = (addr_valid | du_restart_i) ? pc_addr : pc_fetch; + assign ic_addr_match = immu_enable_i ? immu_phys_addr : pc_fetch; + +generate +if (FEATURE_INSTRUCTIONCACHE!="NONE") begin : icache_gen + reg ic_enable_r; + always @(posedge clk `OR_ASYNC_RST) + if (rst) + ic_enable_r <= 0; + else if (ic_enable & !ibus_req) + ic_enable_r <= 1; + else if (!ic_enable & !ic_refill) + ic_enable_r <= 0; + wire ic_enabled = ic_enable & ic_enable_r; + wire ic_refill_allowed = (!((tlb_miss | pagefault) & immu_enable_i) & + !ctrl_branch_exception_i & !pipeline_flush_i & + !mispredict_stall | doing_rfe_i) & + !tlb_reload_busy & !immu_busy; + wire ic_req = padv_i & !decode_except_ibus_err_o & + !decode_except_itlb_miss_o & !except_itlb_miss & + !decode_except_ipagefault_o & !except_ipagefault & + ic_access & ic_refill_allowed; + + if (OPTION_ICACHE_LIMIT_WIDTH == OPTION_OPERAND_WIDTH) begin + assign ic_access = ic_enabled & + !(immu_cache_inhibit & immu_enable_i); + end else if (OPTION_ICACHE_LIMIT_WIDTH < OPTION_OPERAND_WIDTH) begin + assign ic_access = ic_enabled & + ic_addr_match[OPTION_OPERAND_WIDTH-1: + OPTION_ICACHE_LIMIT_WIDTH] == 0 & + !(immu_cache_inhibit & immu_enable_i); + end else begin + initial begin + $display("ERROR: OPTION_ICACHE_LIMIT_WIDTH > OPTION_OPERAND_WIDTH"); + $finish(); + end + end + + /* mor1kx_icache AUTO_TEMPLATE ( + // Outputs + .cpu_ack_o (ic_ack), + .cpu_dat_o (ic_dat[OPTION_OPERAND_WIDTH-1:0]), + .spr_bus_dat_o (spr_bus_dat_ic_o), + .spr_bus_ack_o (spr_bus_ack_ic_o), + .refill_o (ic_refill), + .refill_req_o (ic_refill_req), + .refill_done_o (ic_refill_done), + .invalidate_o (ic_invalidate), + // Inputs + .rst (rst), + .ic_imem_err_i (imem_err), + .ic_access_i (ic_access), + .cpu_adr_i (ic_addr), + .cpu_adr_match_i (ic_addr_match), + .cpu_req_i (ic_req), + .wradr_i (ibus_adr), + .wrdat_i (ibus_dat_i), + .we_i (ibus_ack_i), + );*/ + + mor1kx_icache + #( + .OPTION_ICACHE_BLOCK_WIDTH(OPTION_ICACHE_BLOCK_WIDTH), + .OPTION_ICACHE_SET_WIDTH(OPTION_ICACHE_SET_WIDTH), + .OPTION_ICACHE_WAYS(OPTION_ICACHE_WAYS), + .OPTION_ICACHE_LIMIT_WIDTH(OPTION_ICACHE_LIMIT_WIDTH) + ) + mor1kx_icache + (/*AUTOINST*/ + // Outputs + .refill_o (ic_refill), // Templated + .refill_req_o (ic_refill_req), // Templated + .refill_done_o (ic_refill_done), // Templated + .invalidate_o (ic_invalidate), // Templated + .cpu_ack_o (ic_ack), // Templated + .cpu_dat_o (ic_dat[OPTION_OPERAND_WIDTH-1:0]), // Templated + .spr_bus_dat_o (spr_bus_dat_ic_o), // Templated + .spr_bus_ack_o (spr_bus_ack_ic_o), // Templated + .cache_hit_o (ic_hit_o), + // Inputs + .clk (clk), + .rst (rst), // Templated + .ic_imem_err_i (imem_err), + .ic_access_i (ic_access), // Templated + .cpu_adr_i (ic_addr), // Templated + .cpu_adr_match_i (ic_addr_match), // Templated + .cpu_req_i (ic_req), // Templated + .wradr_i (ibus_adr), // Templated + .wrdat_i (ibus_dat_i), // Templated + .we_i (ibus_ack_i), // Templated + .spr_bus_addr_i (spr_bus_addr_i[15:0]), + .spr_bus_we_i (spr_bus_we_i), + .spr_bus_stb_i (spr_bus_stb_i), + .spr_bus_dat_i (spr_bus_dat_i[OPTION_OPERAND_WIDTH-1:0])); +end else begin // block: icache_gen + assign ic_access = 0; + assign ic_refill = 0; + assign ic_refill_req = 1'b0; + assign ic_refill_done = 0; + assign ic_ack = 0; + assign ic_hit_o = 0; + assign ic_dat = 0; + assign ic_invalidate = 0; + assign spr_bus_dat_ic_o = 0; + assign spr_bus_ack_ic_o = 0; +end +endgenerate + +generate +if (FEATURE_IMMU!="NONE") begin : immu_gen + wire [OPTION_OPERAND_WIDTH-1:0] virt_addr = ic_addr; + wire immu_spr_bus_stb; + wire immu_enable; + // small hack to delay immu spr reads by one cycle + // ideally the spr accesses should work so that the address is presented + // in execute stage and the delayed data should be available in control + // stage, but this is not how things currently work. + assign immu_spr_bus_stb = spr_bus_stb_i & (!padv_ctrl_i | spr_bus_we_i); + + assign immu_enable = immu_enable_i & !pipeline_flush_i & !mispredict_stall; + + /* mor1kx_immu AUTO_TEMPLATE ( + .enable_i (immu_enable), + .busy_o (immu_busy), + .phys_addr_o (immu_phys_addr), + .cache_inhibit_o (immu_cache_inhibit), + .tlb_miss_o (tlb_miss), + .tlb_reload_req_o (tlb_reload_req), + .tlb_reload_addr_o (tlb_reload_addr), + .tlb_reload_pagefault_o (tlb_reload_pagefault), + .tlb_reload_ack_i (tlb_reload_ack), + .tlb_reload_data_i (tlb_reload_data), + .tlb_reload_busy_o (tlb_reload_busy), + .tlb_reload_pagefault_clear_i (except_ipagefault_clear), + .pagefault_o (pagefault), + .spr_bus_dat_o (spr_bus_dat_immu_o), + .spr_bus_ack_o (spr_bus_ack_immu_o), + .spr_bus_stb_i (immu_spr_bus_stb), + .virt_addr_i (virt_addr), + .virt_addr_match_i (pc_fetch), + ); */ + mor1kx_immu + #( + .FEATURE_IMMU_HW_TLB_RELOAD(FEATURE_IMMU_HW_TLB_RELOAD), + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_IMMU_SET_WIDTH(OPTION_IMMU_SET_WIDTH), + .OPTION_IMMU_WAYS(OPTION_IMMU_WAYS) + ) + mor1kx_immu + (/*AUTOINST*/ + // Outputs + .busy_o (immu_busy), // Templated + .phys_addr_o (immu_phys_addr), // Templated + .cache_inhibit_o (immu_cache_inhibit), // Templated + .tlb_miss_o (tlb_miss), // Templated + .pagefault_o (pagefault), // Templated + .tlb_reload_req_o (tlb_reload_req), // Templated + .tlb_reload_addr_o (tlb_reload_addr), // Templated + .tlb_reload_pagefault_o (tlb_reload_pagefault), // Templated + .tlb_reload_busy_o (tlb_reload_busy), // Templated + .spr_bus_dat_o (spr_bus_dat_immu_o), // Templated + .spr_bus_ack_o (spr_bus_ack_immu_o), // Templated + // Inputs + .clk (clk), + .rst (rst), + .enable_i (immu_enable), // Templated + .virt_addr_i (virt_addr), // Templated + .virt_addr_match_i (pc_fetch), // Templated + .supervisor_mode_i (supervisor_mode_i), + .tlb_reload_ack_i (tlb_reload_ack), // Templated + .tlb_reload_data_i (tlb_reload_data), // Templated + .tlb_reload_pagefault_clear_i (except_ipagefault_clear), // Templated + .spr_bus_addr_i (spr_bus_addr_i[15:0]), + .spr_bus_we_i (spr_bus_we_i), + .spr_bus_stb_i (immu_spr_bus_stb), // Templated + .spr_bus_dat_i (spr_bus_dat_i[OPTION_OPERAND_WIDTH-1:0])); +end else begin + assign immu_cache_inhibit = 0; + assign immu_busy = 0; + assign tlb_miss = 0; + assign pagefault = 0; + assign tlb_reload_busy = 0; + assign tlb_reload_req = 0; + assign tlb_reload_pagefault = 0; +end +endgenerate + +endmodule // mor1kx_fetch_cappuccino diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_fetch_espresso.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_fetch_espresso.v new file mode 100644 index 0000000..e44913e --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_fetch_espresso.v @@ -0,0 +1,287 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx espresso fetch unit + + Fetch insn, advance PC (or take new branch address) on padv_i. + + What we might want to do is have a 1-insn buffer here, so when the current + insn is fetched, but the main pipeline doesn't want it yet + + indicate ibus errors + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_fetch_espresso + (/*AUTOARG*/ + // Outputs + ibus_adr_o, ibus_req_o, ibus_burst_o, decode_insn_o, + next_fetch_done_o, fetch_rfa_adr_o, fetch_rfb_adr_o, pc_fetch_o, + pc_fetch_next_o, decode_except_ibus_err_o, fetch_advancing_o, + // Inputs + clk, rst, ibus_err_i, ibus_ack_i, ibus_dat_i, padv_i, + branch_occur_i, branch_dest_i, du_restart_i, du_restart_pc_i, + fetch_take_exception_branch_i, execute_waiting_i, du_stall_i, + stepping_i + ); + + parameter OPTION_OPERAND_WIDTH = 32; + parameter OPTION_RF_ADDR_WIDTH = 5; + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}; + + + input clk, rst; + + // interface to ibus + output [OPTION_OPERAND_WIDTH-1:0] ibus_adr_o; + output ibus_req_o; + output ibus_burst_o; + input ibus_err_i; + input ibus_ack_i; + input [`OR1K_INSN_WIDTH-1:0] ibus_dat_i; + + // pipeline control input + input padv_i; + + // interface to decode unit + output reg [`OR1K_INSN_WIDTH-1:0] decode_insn_o; + // Indication to pipeline control that the fetch is valid + output next_fetch_done_o; + + output [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfa_adr_o; + output [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfb_adr_o; + + // Signal back to the control + output [OPTION_OPERAND_WIDTH-1:0] pc_fetch_o; + output [OPTION_OPERAND_WIDTH-1:0] pc_fetch_next_o; + + + // branch/jump indication + input branch_occur_i; + input [OPTION_OPERAND_WIDTH-1:0] branch_dest_i; + + // restart signals from debug unit + input du_restart_i; + input [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_i; + + input fetch_take_exception_branch_i; + + input execute_waiting_i; + + // CPU is stalled + input du_stall_i; + + // We're single stepping - this should cause us to fetch only a single insn + input stepping_i; + + + // instruction ibus error indication out + output reg decode_except_ibus_err_o; + + output fetch_advancing_o; + + // registers + reg [OPTION_OPERAND_WIDTH-1:0] pc_fetch; + reg fetch_req; + reg next_insn_buffered; + reg [OPTION_OPERAND_WIDTH-1:0] insn_buffer; + reg branch_occur_r; + reg bus_access_done_re_r; + reg advancing_into_branch; + reg bus_access_done_r; + reg wait_for_exception_after_ibus_err; + + wire [OPTION_OPERAND_WIDTH-1:0] pc_fetch_next; + wire bus_access_done; + wire bus_access_done_fe; + wire branch_occur_re; + wire awkward_transition_to_branch_target; + wire taking_branch; + wire jal_buffered; + wire retain_fetch_pc; + + assign taking_branch = branch_occur_i & padv_i; + + assign bus_access_done = (ibus_ack_i | ibus_err_i) & !(taking_branch); + + assign pc_fetch_next = pc_fetch + 4; + + assign ibus_adr_o = pc_fetch; + assign ibus_req_o = fetch_req; + assign ibus_burst_o = 0; + + assign fetch_advancing_o = (padv_i | fetch_take_exception_branch_i | + stepping_i) & + next_fetch_done_o; + + // Early RF address fetch + assign fetch_rfa_adr_o = insn_buffer[`OR1K_RA_SELECT]; + assign fetch_rfb_adr_o = insn_buffer[`OR1K_RB_SELECT]; + + assign jal_buffered = insn_buffer[`OR1K_OPCODE_SELECT]==`OR1K_OPCODE_JALR || + insn_buffer[`OR1K_OPCODE_SELECT]==`OR1K_OPCODE_JAL; + + assign retain_fetch_pc = jal_buffered & bus_access_done; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + pc_fetch <= OPTION_RESET_PC; + else if (fetch_take_exception_branch_i | + (((bus_access_done & !ibus_err_i) | taking_branch) & + (!execute_waiting_i | !next_insn_buffered) & + !retain_fetch_pc) | + awkward_transition_to_branch_target | + du_restart_i) + // next PC - are we going somewhere else or advancing? + pc_fetch <= du_restart_i ? du_restart_pc_i : + (fetch_take_exception_branch_i | taking_branch) ? + branch_dest_i : pc_fetch_next; + + // Actually goes to pipeline control + assign pc_fetch_o = pc_fetch; + assign pc_fetch_next_o = pc_fetch_next; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + fetch_req <= 1; + else if (fetch_take_exception_branch_i | du_restart_i) + fetch_req <= 1; + else if (padv_i) + // Force de-assert of req signal when branching. + // This is to stop (ironically) the case where we've got the + // instruction we're branching to already coming in on the bus, + // which we usually don't assume will happen. + // TODO: fix things so that we don't have to force a penalty to make + // it work properly. + fetch_req <= !branch_occur_i & !du_stall_i; + else if (du_stall_i) + fetch_req <= fetch_req & !bus_access_done; + else if (!fetch_req & !execute_waiting_i & + !wait_for_exception_after_ibus_err & !retain_fetch_pc & + !du_stall_i & !stepping_i) + fetch_req <= 1; + else if (bus_access_done & (fetch_take_exception_branch_i | + execute_waiting_i | ibus_err_i | stepping_i)) + fetch_req <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + begin + bus_access_done_r <= 0; + branch_occur_r <= 0; + end + else + begin + bus_access_done_r <= bus_access_done; + branch_occur_r <= branch_occur_i; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + advancing_into_branch <= 0; + else + advancing_into_branch <= fetch_advancing_o & branch_occur_i; + + assign next_fetch_done_o = (bus_access_done_r | next_insn_buffered) & + // Whenever we've just changed the fetch PC to + // take a branch this will gate off any ACKs we + // might get (legit or otherwise) from where we're + // getting our instructions from (bus/cache). + !(advancing_into_branch); + + assign branch_occur_re = branch_occur_i & !branch_occur_r; + + /* When this occurs we had the insn burst stream finish just as we + had a new branch address requested. Because the control logic will + immediately continue onto the delay slot instruction, the branch target + is only valid for 1 cycle. The PC out to the bus/cache will then need + to change 1 cycle after it requested the insn after the delay slot. + This is annoying for the bus control/cache logic, but should result in + less cycles wasted fetching something we don't need, and as well reduce + the number of flops as we don't need to save the target PC which we had + for only 1 cycle */ + assign awkward_transition_to_branch_target = branch_occur_re & + bus_access_done_fe; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (fetch_take_exception_branch_i | (du_stall_i & !execute_waiting_i)) + // Put a NOP in the pipeline when starting exception - remove any state + // which may be causing the exception + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if ((padv_i & ( + bus_access_done_r | + bus_access_done | + next_insn_buffered + ) & + !branch_occur_r ) | + // This case is when we stalled to get the delay-slot instruction + // and we don't get enough padv to push it through the buffer + (branch_occur_i & padv_i & bus_access_done_re_r) | + (bus_access_done_fe & stepping_i)) + decode_insn_o <= insn_buffer; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_except_ibus_err_o <= 0; + else if ((padv_i | fetch_take_exception_branch_i) & branch_occur_i | + du_stall_i) + decode_except_ibus_err_o <= 0; + else if (fetch_req) + decode_except_ibus_err_o <= ibus_err_i; + + // Register rising edge on bus_access_done + always @(posedge clk `OR_ASYNC_RST) + if (rst) + bus_access_done_re_r <= 0; + else + bus_access_done_re_r <= bus_access_done & !bus_access_done_r; + + assign bus_access_done_fe = !bus_access_done & bus_access_done_r; + + /* If insn_buffer contains the next insn we need, save that information + here */ + always @(posedge clk `OR_ASYNC_RST) + if (rst) + next_insn_buffered <= 0; + else if (fetch_take_exception_branch_i) + next_insn_buffered <= 0; + else if (padv_i) + // Next instruction is usually buffered when we've got bus ack and + // pipeline advance, except when we're branching (usually throw + // away the fetch when branch is being indicated) + next_insn_buffered <= ibus_ack_i & !branch_occur_i; + else if (ibus_ack_i & execute_waiting_i) + next_insn_buffered <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + insn_buffer <= {`OR1K_OPCODE_NOP,26'd0}; + else if (ibus_ack_i & (!execute_waiting_i | !next_insn_buffered) & + // Don't buffer instruction after delay slot instruction + // (usually we're receiving it as taking branch is asserted) + // it could be another jump instruction and having it in + // the insn_buffer has annoying side-effects. + !taking_branch) + insn_buffer <= ibus_dat_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + wait_for_exception_after_ibus_err <= 0; + else if (fetch_take_exception_branch_i) + wait_for_exception_after_ibus_err <= 0; + else if (ibus_err_i) + wait_for_exception_after_ibus_err <= 1; + +endmodule // mor1kx_fetch_espresso diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_fetch_prontoespresso.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_fetch_prontoespresso.v new file mode 100644 index 0000000..ec187e2 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_fetch_prontoespresso.v @@ -0,0 +1,549 @@ + /* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx pronto espresso fetch unit + + Fetch insn, advance PC (or take new branch address) on padv_i. + + What we might want to do is have a 1-insn buffer here, so when the current + insn is fetched, but the main pipeline doesn't want it yet + + indicate ibus errors + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_fetch_prontoespresso + (/*AUTOARG*/ + // Outputs + ibus_adr_o, ibus_req_o, ibus_burst_o, decode_insn_o, fetched_pc_o, + fetch_ready_o, fetch_rfa_adr_o, fetch_rfb_adr_o, fetch_rf_re_o, + pc_fetch_next_o, decode_except_ibus_err_o, fetch_sleep_o, + fetch_quick_branch_o, spr_bus_dat_ic_o, spr_bus_ack_ic_o, + // Inputs + clk, rst, ibus_err_i, ibus_ack_i, ibus_dat_i, ic_enable, padv_i, + branch_occur_i, branch_dest_i, ctrl_insn_done_i, du_restart_i, + du_restart_pc_i, fetch_take_exception_branch_i, execute_waiting_i, + du_stall_i, stepping_i, flag_i, flag_clear_i, flag_set_i, + spr_bus_addr_i, spr_bus_we_i, spr_bus_stb_i, spr_bus_dat_i + ); + + parameter OPTION_OPERAND_WIDTH = 32; + parameter OPTION_RF_ADDR_WIDTH = 5; + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}; + // Mini cache registers, signals + parameter FEATURE_INSTRUCTIONCACHE = "NONE"; + parameter OPTION_ICACHE_BLOCK_WIDTH = 3; // 3 for 8 words + parameter FEATURE_QUICK_BRANCH_DETECTION = "NONE"; + + input clk, rst; + + // interface to ibus + output [OPTION_OPERAND_WIDTH-1:0] ibus_adr_o; + output ibus_req_o; + output ibus_burst_o; + input ibus_err_i; + input ibus_ack_i; + input [`OR1K_INSN_WIDTH-1:0] ibus_dat_i; + input ic_enable; + + // pipeline control input + input padv_i; + + // interface to decode unit + output reg [`OR1K_INSN_WIDTH-1:0] decode_insn_o; + + // PC of the current instruction, SPR_PPC basically + output [OPTION_OPERAND_WIDTH-1:0] fetched_pc_o; + + // Indication to pipeline control that the fetch stage is ready + output fetch_ready_o; + + // Signals going to register file to do the read access as we + // register the instruction out to the decode stage + output [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfa_adr_o; + output [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfb_adr_o; + output fetch_rf_re_o; + + // Signal back to the control + output [OPTION_OPERAND_WIDTH-1:0] pc_fetch_next_o; + + + // branch/jump indication + input branch_occur_i; + input [OPTION_OPERAND_WIDTH-1:0] branch_dest_i; + + // Instruction "retire" indication from control stage + input ctrl_insn_done_i; + + // restart signals from debug unit + input du_restart_i; + input [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_i; + + input fetch_take_exception_branch_i; + + input execute_waiting_i; + + // CPU is stalled + input du_stall_i; + + // We're single stepping - this should cause us to fetch only a single insn + input stepping_i; + + // Flag status information + input flag_i, flag_clear_i, flag_set_i; + + // instruction ibus error indication out + output reg decode_except_ibus_err_o; + + // fetch sleep mode enabled (due to jump-to-self instruction + output fetch_sleep_o; + + // Indicate to the control stage that we had zero delay fetching + // the branch target address + output fetch_quick_branch_o; + + // SPR interface + input [15:0] spr_bus_addr_i; + input spr_bus_we_i; + input spr_bus_stb_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_i; + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_ic_o; + output spr_bus_ack_ic_o; + + + // Registers + reg [OPTION_OPERAND_WIDTH-1:0] pc; + reg [OPTION_OPERAND_WIDTH-1:0] fetched_pc; + reg fetch_req; + reg next_insn_will_branch; + reg have_early_pc_next; + reg jump_insn_in_decode; + reg took_early_calc_pc; + reg [1:0] took_early_calc_pc_r; + reg padv_r; + reg took_branch; + reg took_branch_r; + reg execute_waiting_r; + reg sleep; + reg complete_current_req; + reg no_rf_read; + reg new_insn_wasnt_ready; + reg took_early_pc_onto_cache_hit; + reg waited_with_early_pc_onto_cache_hit; + + // Wires + wire [`OR1K_INSN_WIDTH-1:0] new_insn; + wire new_insn_ready; + wire [OPTION_OPERAND_WIDTH-1:0] pc_fetch_next; + wire [OPTION_OPERAND_WIDTH-1:0] pc_plus_four; + wire [OPTION_OPERAND_WIDTH-1:0] early_pc_next; + wire padv_deasserted; + wire padv_asserted; + wire [`OR1K_OPCODE_WIDTH-1:0] next_insn_opcode; + wire will_go_to_sleep; + wire mini_cache_hit; + wire mini_cache_hit_ungated; + wire [`OR1K_INSN_WIDTH-1:0] mini_cache_insn; + wire hold_decode_output; + wire next_instruction_to_decode_condition; + + assign pc_plus_four = pc + 4; + + assign pc_fetch_next = have_early_pc_next ? + early_pc_next : pc_plus_four; + + assign ibus_adr_o = pc; + assign ibus_req_o = (fetch_req & !(fetch_take_exception_branch_i/* | branch_occur_i*/) + // This is needed in the case that: + // 1. a burst just finished and ack in went low because of this + // 2. the instruction we just ACKed is a multicycle insn so the + // execute_waiting_i goes high, but the bus interface will have + // already put out the request onto the bus. It causes a bug + // if we deassert the req from here 1 cycle later, so put this + // signal into the assign logic so that the first cycle of it + // causes req to go low, after which fetch_req is deasserted + // and should handle it + & !(execute_waiting_i & fetch_req) + & !mini_cache_hit_ungated) | + complete_current_req; + assign ibus_burst_o = 0; + + assign fetch_ready_o = new_insn_ready | jump_insn_in_decode | ibus_err_i; + + assign pc_fetch_next_o = pc_fetch_next; + + assign new_insn = mini_cache_hit ? mini_cache_insn : ibus_dat_i; + + assign new_insn_ready = mini_cache_hit | ibus_ack_i; + + // Register file control + assign fetch_rfa_adr_o = new_insn_ready ? new_insn[`OR1K_RA_SELECT] : 0; + assign fetch_rfb_adr_o = new_insn_ready ? new_insn[`OR1K_RB_SELECT] : 0; + assign fetch_rf_re_o = new_insn_ready & (padv_i | stepping_i) & + !(no_rf_read | hold_decode_output); + + // Pick out opcode of next instruction to go to decode stage + assign next_insn_opcode = new_insn[`OR1K_OPCODE_SELECT]; + + // Can calculate next PC based on instruction coming in + assign early_pc_next = {OPTION_OPERAND_WIDTH{have_early_pc_next}} & + ({{4{new_insn[25]}}, + new_insn[`OR1K_JUMPBRANCH_IMMEDIATE_SELECT], + 2'b00} + pc); + + assign will_go_to_sleep = have_early_pc_next & + (early_pc_next == pc); + + assign fetch_sleep_o = sleep; + + // The pipeline advance signal deasserted for the instruction + // we just put out, and we're still attempting to fetch. This should + // result in a deassert cycle on the request signal out to the bus. + // But, we don't want this to indicate when padv_i was deasserted for + // a branch, because we will know about that, we just want this to + // indicate it was deasserted for other reasons. + assign padv_deasserted = padv_r & !padv_i & fetch_req & !took_branch; + + assign padv_asserted = !padv_r & padv_i; + + // This makes us hold the decode stage output for an additional + // cycle when we've already got the next instruction in the + // register output to the decode stage, but the pipeline has + // stalled. + assign hold_decode_output = (padv_asserted & + mini_cache_hit & took_branch_r & + !new_insn_wasnt_ready & + took_early_calc_pc_r[1]) || + waited_with_early_pc_onto_cache_hit; + always @* + if (new_insn_ready) + case (next_insn_opcode) + `OR1K_OPCODE_J, + `OR1K_OPCODE_JAL: begin + have_early_pc_next = 1; + next_insn_will_branch = 1; + no_rf_read = 1; + end + `OR1K_OPCODE_JR, + `OR1K_OPCODE_JALR: begin + have_early_pc_next = 0; + next_insn_will_branch = 1; + no_rf_read = 0; + end + `OR1K_OPCODE_BNF: begin + have_early_pc_next = !(flag_i | flag_set_i) | flag_clear_i; + next_insn_will_branch = !(flag_i | flag_set_i) | flag_clear_i; + no_rf_read = 1; + end + `OR1K_OPCODE_BF: begin + have_early_pc_next = !(!flag_i | flag_clear_i) |flag_set_i; + next_insn_will_branch = !(!flag_i | flag_clear_i) |flag_set_i; + no_rf_read = 1; + end + `OR1K_OPCODE_SYSTRAPSYNC, + `OR1K_OPCODE_RFE: begin + have_early_pc_next = 0; + next_insn_will_branch = 1; + no_rf_read = 1; + end + default: begin + have_early_pc_next = 0; + next_insn_will_branch = 0; + no_rf_read = 0; + end + endcase // case (next_insn_opcode) + else + begin + have_early_pc_next = 0; + next_insn_will_branch = 0; + no_rf_read = 0; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + begin + pc <= OPTION_RESET_PC; + fetched_pc <= OPTION_RESET_PC; + end + else if (branch_occur_i & !took_early_calc_pc) + begin + pc <= branch_dest_i; + end + else if (fetch_take_exception_branch_i & !du_stall_i) + begin + pc <= branch_dest_i; + end + else if (new_insn_ready & (padv_i | stepping_i) & + !hold_decode_output) + begin + pc <= pc_fetch_next_o; + fetched_pc <= pc; + end + else if (du_restart_i) + begin + pc <= du_restart_pc_i; + end + else if (fetch_take_exception_branch_i & du_stall_i) + begin + pc <= du_restart_pc_i; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + new_insn_wasnt_ready <= 0; + else if (branch_occur_i & !took_early_calc_pc) + new_insn_wasnt_ready <= !new_insn_ready; + else if (new_insn_ready & (padv_i | stepping_i) & !padv_deasserted) + new_insn_wasnt_ready <= 0; + + assign fetched_pc_o = fetched_pc; + + assign next_instruction_to_decode_condition = new_insn_ready & + (padv_i | stepping_i) & + !padv_deasserted & + !hold_decode_output & + !((branch_occur_i & padv_i & + !took_early_calc_pc) | + fetch_take_exception_branch_i); + + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (sleep | du_stall_i) + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (next_instruction_to_decode_condition) + decode_insn_o <= new_insn; + else if (branch_occur_i & padv_i) + // We've just taken a branch, put a nop on the + // instruction to the rest of the pipeline + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (fetch_take_exception_branch_i) + // Exception was just taken, get rid of whatever + // we're outputting + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (took_early_calc_pc) + // This covers the case where, for some reason, + // we don't get the branch_occur_i + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (ctrl_insn_done_i & !new_insn_ready) + // If the current instruction in the decode stage is retired + // then let's put a no-op back in the pipeline + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + fetch_req <= 1'b1; + else if (fetch_req & stepping_i & new_insn_ready) + // Deassert on ack + fetch_req <= 1'b0; + else if (!fetch_req & du_stall_i) + fetch_req <= 1'b0; + else if (ibus_err_i) + fetch_req <= 1'b0; + else if (sleep) + fetch_req <= 1'b0; + else if (next_insn_will_branch) + fetch_req <= 1'b0; + else if (execute_waiting_i) + /* + Put the execute wait signal through this register to break any long + chains of logic from the execute stage (LSU, ALU) which could result + from using it to just gate the req signal out. + TODO - actually check the impact of gating fetch_req_o with + execute_waiting_i + */ + fetch_req <= 1'b0; + else if (padv_deasserted) + fetch_req <= 1'b0; + else if (mini_cache_hit_ungated) + // We'll get this ungated signal immediately after we've + // terminated a burst, so we'll know if we really should + // fetch the branch target or whether it's in cache. + fetch_req <= 1'b0; + else + fetch_req <= 1'b1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + took_early_pc_onto_cache_hit <= 0; + else if (padv_i) + took_early_pc_onto_cache_hit <= took_early_calc_pc & mini_cache_hit & + !fetch_take_exception_branch_i; + else if (ctrl_insn_done_i) + took_early_pc_onto_cache_hit <= 0; + + // This register signifies when: + // a) we had a branch to somewhere where we took the early calculated PC and + // that branch location was a hit in the cache + // b) the subsequent instruction wasn't in the cache, so we put the + // insn out to the decode stage, but wasn't immediately retired by the + // control stage, so we must wait until the next instruction is ready + // before it will be completed by the control stage + always @(posedge clk `OR_ASYNC_RST) + if (rst) + waited_with_early_pc_onto_cache_hit <= 0; + else if (took_branch_r | padv_i) + waited_with_early_pc_onto_cache_hit <= took_early_pc_onto_cache_hit & + !fetch_ready_o; + else if (ctrl_insn_done_i) + waited_with_early_pc_onto_cache_hit <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + jump_insn_in_decode <= 0; + else if (sleep) + jump_insn_in_decode <= 0; + else if (!jump_insn_in_decode & next_insn_will_branch & new_insn_ready & padv_i) + jump_insn_in_decode <= 1; + else + jump_insn_in_decode <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + took_early_calc_pc <= 0; + else if (sleep) + took_early_calc_pc <= 0; + else if (next_insn_will_branch & have_early_pc_next & padv_i) + took_early_calc_pc <= 1; + else + took_early_calc_pc <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + took_early_calc_pc_r <= 0; + else + took_early_calc_pc_r <= {took_early_calc_pc_r[0], took_early_calc_pc}; + + always @(posedge clk) + padv_r <= padv_i; + + /* Whether it was early branch or not, we've branched, and this + signal will be asserted the cycle after. */ + always @(posedge clk) + begin + took_branch <= (branch_occur_i | fetch_take_exception_branch_i) & + fetch_ready_o; + took_branch_r <= took_branch; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_except_ibus_err_o <= 0; + else if ((padv_i | fetch_take_exception_branch_i) & + branch_occur_i | du_stall_i) + decode_except_ibus_err_o <= 0; + else if (fetch_req) + decode_except_ibus_err_o <= ibus_err_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + sleep <= 1'b0; + else if (fetch_take_exception_branch_i | du_stall_i) + sleep <= 1'b0; + else if (will_go_to_sleep & !stepping_i) + sleep <= 1'b1; + + // A signal to make sure the request out line stays high + // if we've already issued an instruction request and padv_i + // goes low. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + complete_current_req <= 0; + else if (fetch_req & padv_deasserted & !new_insn_ready) + complete_current_req <= 1; + else if (new_insn_ready & complete_current_req) + complete_current_req <= 0; + + // Mini cache logic + genvar i; + generate + /* verilator lint_off WIDTH */ + if (FEATURE_INSTRUCTIONCACHE != "ENABLED") + /* verilator lint_on WIDTH */ + begin : no_mini_cache + assign mini_cache_hit = 0; + assign mini_cache_hit_ungated = 0; + assign mini_cache_insn = {`OR1K_INSN_WIDTH{1'b0}}; + assign fetch_quick_branch_o = 0; + end + else + begin : mini_cache + localparam NUMBER_MINI_CACHE_WORDS = (1< + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_fetch_tcm_prontoespresso + (/*AUTOARG*/ + // Outputs + ibus_adr_o, ibus_req_o, decode_insn_o, fetched_pc_o, fetch_ready_o, + fetch_rfa_adr_o, fetch_rfb_adr_o, fetch_rf_re_o, pc_fetch_next_o, + decode_except_ibus_err_o, fetch_sleep_o, + // Inputs + clk, rst, ibus_err_i, ibus_ack_i, ibus_dat_i, padv_i, + branch_occur_i, branch_dest_i, du_restart_i, du_restart_pc_i, + fetch_take_exception_branch_i, execute_waiting_i, du_stall_i, + stepping_i, flag_i, flag_clear_i, flag_set_i + ); + + parameter OPTION_OPERAND_WIDTH = 32; + parameter OPTION_RF_ADDR_WIDTH = 5; + parameter OPTION_RESET_PC = {{(OPTION_OPERAND_WIDTH-13){1'b0}}, + `OR1K_RESET_VECTOR,8'd0}; + + input clk, rst; + + // interface to ibus + output [OPTION_OPERAND_WIDTH-1:0] ibus_adr_o; + output ibus_req_o; + input ibus_err_i; + input ibus_ack_i; + input [`OR1K_INSN_WIDTH-1:0] ibus_dat_i; + + // pipeline control input + input padv_i; + + // interface to decode unit + output reg [`OR1K_INSN_WIDTH-1:0] decode_insn_o; + + // PC of the current instruction, SPR_PPC basically + output reg [OPTION_OPERAND_WIDTH-1:0] fetched_pc_o; + + // Indication to pipeline control that the fetch stage is ready + output fetch_ready_o; + + // Signals going to register file to do the read access as we + // register the instruction out to the decode stage + output [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfa_adr_o; + output [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfb_adr_o; + output fetch_rf_re_o; + + // Signal back to the control which pc we're goint to + // deliver next + output [OPTION_OPERAND_WIDTH-1:0] pc_fetch_next_o; + + + // branch/jump indication + input branch_occur_i; + input [OPTION_OPERAND_WIDTH-1:0] branch_dest_i; + + // restart signals from debug unit + input du_restart_i; + input [OPTION_OPERAND_WIDTH-1:0] du_restart_pc_i; + + input fetch_take_exception_branch_i; + + input execute_waiting_i; + + // CPU is stalled + input du_stall_i; + + // We're single stepping - this should cause us to fetch only a single insn + input stepping_i; + + // Flag status information + input flag_i, flag_clear_i, flag_set_i; + + // instruction ibus error indication out + output reg decode_except_ibus_err_o; + + // fetch sleep mode enabled (due to jump-to-self instruction + output fetch_sleep_o; + + + reg [OPTION_OPERAND_WIDTH-1:0] current_bus_pc; + wire [OPTION_OPERAND_WIDTH-1:0] next_bus_pc; + reg [OPTION_OPERAND_WIDTH-1:0] insn_buffer; + + wire first_bus_req_cycle; + reg addr_pipelined; + reg bus_req, bus_req_r; + wire [`OR1K_OPCODE_WIDTH-1:0] next_insn_opcode; + reg next_insn_will_branch; + reg jump_insn_in_decode; + reg just_took_branch_addr; + wire taking_branch_addr; + reg insn_from_branch_on_input; + reg insn_from_branch_in_pipeline; + reg execute_waiting_r; + wire execute_waiting_deasserted; + wire execute_waiting_asserted; + reg execute_waiting_asserted_r; + wire execute_waited_single_cycle; + reg just_waited_single_cycle; + reg just_waited_single_cycle_r; + reg insn_buffered; + wire buffered_insn_is_jump; + reg push_buffered_jump_through_pipeline; + wire will_go_to_sleep; + reg sleep; + reg fetch_take_exception_branch_r; + reg [3:0] padv_r; + wire long_stall; + + + assign next_bus_pc = current_bus_pc + 4; + assign ibus_adr_o = addr_pipelined ? next_bus_pc : current_bus_pc; + + assign pc_fetch_next_o = ibus_adr_o; + + assign ibus_req_o = bus_req & !(stepping_i & ibus_ack_i) | + (execute_waiting_deasserted & + !(insn_buffered & next_insn_will_branch)) | + fetch_take_exception_branch_r; + + // Signal rising edge on bus request signal + assign first_bus_req_cycle = ibus_req_o & !bus_req_r; + + assign taking_branch_addr = (branch_occur_i & padv_i) | + fetch_take_exception_branch_i; + + assign buffered_insn_is_jump = insn_buffered & next_insn_will_branch; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + begin + current_bus_pc <= OPTION_RESET_PC; + just_took_branch_addr <= 0; + end + else if (du_restart_i) + begin + current_bus_pc <= du_restart_pc_i; + just_took_branch_addr <= 0; + end + else if (fetch_take_exception_branch_i) + begin + current_bus_pc <= branch_dest_i; + just_took_branch_addr <= 1; + end + else if (branch_occur_i & padv_i) + begin + current_bus_pc <= branch_dest_i; + just_took_branch_addr <= 1; + end + else if (ibus_ack_i & (padv_i | (just_waited_single_cycle_r && + !({padv_r[0],padv_i}==2'b00))) & + !execute_waited_single_cycle & !stepping_i) + begin + current_bus_pc <= next_bus_pc; + just_took_branch_addr <= 0; + end + else if (execute_waiting_asserted & ibus_ack_i & !just_took_branch_addr) + begin + current_bus_pc <= next_bus_pc; + end + else if (just_took_branch_addr) + begin + just_took_branch_addr <= 0; + end + + else if (long_stall) + begin + // Long wait - this is a work around for an annoying bug which + // I can't solve any other way! + current_bus_pc <= fetched_pc_o + 4; + end + + // BIG assumptions here - that the read only takes a single cycle!! + always @(posedge clk `OR_ASYNC_RST) + if (rst) + begin + insn_from_branch_on_input <= 0; + insn_from_branch_in_pipeline <= 0; + end + else + begin + insn_from_branch_on_input <= just_took_branch_addr; + insn_from_branch_in_pipeline <= insn_from_branch_on_input; + end + + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + bus_req <= 1'b0; + else if (stepping_i & ibus_ack_i) + // Deassert on ack of stepping + bus_req <= 1'b0; + else if (du_stall_i) + bus_req <= 1'b0; + else if (ibus_err_i | decode_except_ibus_err_o) + bus_req <= 1'b0; + else if (sleep) + bus_req <= 1'b0; + else if (execute_waiting_i) + bus_req <= 1'b0; + else if (buffered_insn_is_jump) + bus_req <= 1'b0; + else + bus_req <= 1'b1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + bus_req_r <= 0; + else + bus_req_r <= ibus_req_o; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + addr_pipelined <= 0; + else if (ibus_err_i | decode_except_ibus_err_o | + fetch_take_exception_branch_i) + addr_pipelined <= 0; + else if (first_bus_req_cycle) + addr_pipelined <= 1; + else if (taking_branch_addr) + addr_pipelined <= 0; + else if (just_took_branch_addr) + addr_pipelined <= 1; + else if (just_waited_single_cycle) + addr_pipelined <= 1; + else if (!bus_req) + addr_pipelined <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + begin + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + fetched_pc_o <= 0; + end + else if (sleep | (du_stall_i & !execute_waiting_i)) + begin + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + end + else if (fetch_take_exception_branch_i & !du_stall_i) + begin + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + end + else if ((padv_i | stepping_i) & ibus_ack_i & (ibus_req_o | stepping_i) & + ((!jump_insn_in_decode & !just_took_branch_addr) | + (insn_from_branch_on_input)) + & !(execute_waited_single_cycle | just_waited_single_cycle)) + begin + decode_insn_o <= ibus_dat_i; + fetched_pc_o <= current_bus_pc; + end + else if (just_waited_single_cycle_r & !execute_waiting_i) + begin + decode_insn_o <= ibus_dat_i; + fetched_pc_o <= current_bus_pc; + end + else if (execute_waiting_deasserted & insn_buffered) + begin + decode_insn_o <= insn_buffer; + fetched_pc_o <= fetched_pc_o + 4; + end + else if ((jump_insn_in_decode | branch_occur_i) & padv_i) + // About to jump - remove this instruction from the pipeline + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (fetch_take_exception_branch_i) + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + else if (push_buffered_jump_through_pipeline) + decode_insn_o <= {`OR1K_OPCODE_NOP,26'd0}; + + reg fetch_ready_r; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + fetch_ready_r <= 0; + else + fetch_ready_r <= fetch_ready_o; + + assign fetch_ready_o = (ibus_ack_i | insn_buffered ) & + !(just_took_branch_addr) & + !(just_waited_single_cycle) & + !du_stall_i | + push_buffered_jump_through_pipeline ; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + decode_except_ibus_err_o <= 0; + else if ((padv_i | fetch_take_exception_branch_i) & + branch_occur_i | du_stall_i) + decode_except_ibus_err_o <= 0; + else if (bus_req) + decode_except_ibus_err_o <= ibus_err_i; + + assign fetch_sleep_o = sleep; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_waiting_r <= 0; + else + execute_waiting_r <= execute_waiting_i; + + assign execute_waiting_deasserted = !execute_waiting_i & execute_waiting_r; + assign execute_waiting_asserted = execute_waiting_i & !execute_waiting_r; + + + // Register file control + assign fetch_rfa_adr_o = insn_buffered ? insn_buffer[`OR1K_RA_SELECT] : + ibus_dat_i[`OR1K_RA_SELECT]; + assign fetch_rfb_adr_o = insn_buffered ? insn_buffer[`OR1K_RB_SELECT] : + ibus_dat_i[`OR1K_RB_SELECT]; + assign fetch_rf_re_o = (ibus_ack_i | execute_waiting_deasserted) & + (padv_i | stepping_i); + + // Pick out opcode of next instruction to go to decode stage + assign next_insn_opcode = insn_buffered ? + insn_buffer[`OR1K_OPCODE_SELECT] : + ibus_dat_i[`OR1K_OPCODE_SELECT]; + + always @* + if ((ibus_ack_i & !just_took_branch_addr) | insn_buffered) + case (next_insn_opcode) + `OR1K_OPCODE_J, + `OR1K_OPCODE_JAL: begin + next_insn_will_branch = 1; + end + `OR1K_OPCODE_JR, + `OR1K_OPCODE_JALR: begin + next_insn_will_branch = 1; + end + `OR1K_OPCODE_BNF: begin + next_insn_will_branch = !(flag_i | flag_set_i) | flag_clear_i; + end + `OR1K_OPCODE_BF: begin + next_insn_will_branch = !(!flag_i | flag_clear_i) |flag_set_i; + end + `OR1K_OPCODE_SYSTRAPSYNC, + `OR1K_OPCODE_RFE: begin + next_insn_will_branch = 1; + end + default: begin + next_insn_will_branch = 0; + end + endcase // case (next_insn_opcode) + else + begin + next_insn_will_branch = 0; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + jump_insn_in_decode <= 0; + else if (sleep) + jump_insn_in_decode <= 0; + else if (!jump_insn_in_decode & next_insn_will_branch & ibus_ack_i) + jump_insn_in_decode <= 1; + else + jump_insn_in_decode <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + insn_buffer <= 0; + else if (execute_waiting_asserted & ibus_ack_i & !just_took_branch_addr) + insn_buffer <= ibus_dat_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + insn_buffered <= 0; + else if (execute_waiting_asserted & ibus_ack_i & !just_took_branch_addr) + insn_buffered <= 1; + else if (execute_waiting_deasserted) + insn_buffered <= 0; + else if (fetch_take_exception_branch_i) + insn_buffered <= 0; + else if (long_stall) + insn_buffered <= 0; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + push_buffered_jump_through_pipeline <= 0; + else + push_buffered_jump_through_pipeline <= buffered_insn_is_jump & + execute_waiting_deasserted; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + fetch_take_exception_branch_r <= 0; + else + fetch_take_exception_branch_r <= fetch_take_exception_branch_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + sleep <= 1'b0; + else if (fetch_take_exception_branch_i) + sleep <= 1'b0; + else if (will_go_to_sleep) + sleep <= 1'b1; + + assign will_go_to_sleep = ibus_dat_i==0 & padv_i & ibus_ack_i & + ibus_req_o & ((!jump_insn_in_decode & + !just_took_branch_addr) | + (insn_from_branch_on_input)); + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_waiting_asserted_r <= 0; + else + execute_waiting_asserted_r <= execute_waiting_asserted; + + assign execute_waited_single_cycle = execute_waiting_asserted_r & + !execute_waiting_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + begin + just_waited_single_cycle <= 0; + just_waited_single_cycle_r <= 0; + end + else + begin + just_waited_single_cycle <= execute_waited_single_cycle; + just_waited_single_cycle_r <= just_waited_single_cycle; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + padv_r <= 4'd0; + else + padv_r <= {padv_r[2:0],padv_i}; + + assign long_stall = {padv_r,padv_i}==5'b10000 && execute_waiting_i; + +endmodule // mor1kx_fetch_tcm_prontoespresso diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_icache.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_icache.v new file mode 100644 index 0000000..e48b754 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_icache.v @@ -0,0 +1,486 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Instruction cache implementation + + Copyright (C) 2012-2013 + Stefan Kristiansson + Stefan Wallentowitz + + ******************************************************************************/ + +`include "mor1kx-defines.v" + +module mor1kx_icache + #( + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_ICACHE_BLOCK_WIDTH = 5, + parameter OPTION_ICACHE_SET_WIDTH = 9, + parameter OPTION_ICACHE_WAYS = 2, + parameter OPTION_ICACHE_LIMIT_WIDTH = 32 + ) + ( + input clk, + input rst, + + input ic_imem_err_i, + input ic_access_i, + output refill_o, + output refill_req_o, + output refill_done_o, + output invalidate_o, + output cache_hit_o, + + // CPU Interface + output cpu_ack_o, + output reg [`OR1K_INSN_WIDTH-1:0] cpu_dat_o, + input [OPTION_OPERAND_WIDTH-1:0] cpu_adr_i, + input [OPTION_OPERAND_WIDTH-1:0] cpu_adr_match_i, + input cpu_req_i, + + input [OPTION_OPERAND_WIDTH-1:0] wradr_i, + input [`OR1K_INSN_WIDTH-1:0] wrdat_i, + input we_i, + + // SPR interface + input [15:0] spr_bus_addr_i, + input spr_bus_we_i, + input spr_bus_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_i, + + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o, + output reg spr_bus_ack_o + ); + + // States + localparam IDLE = 4'b0001; + localparam READ = 4'b0010; + localparam REFILL = 4'b0100; + localparam INVALIDATE = 4'b1000; + + // Address space in bytes for a way + localparam WAY_WIDTH = OPTION_ICACHE_BLOCK_WIDTH + OPTION_ICACHE_SET_WIDTH; + /* + * Tag memory layout + * +---------------------------------------------------------+ + * (index) -> | LRU | wayN valid | wayN tag |...| way0 valid | way0 tag | + * +---------------------------------------------------------+ + */ + + // The tag is the part left of the index + localparam TAG_WIDTH = (OPTION_ICACHE_LIMIT_WIDTH - WAY_WIDTH); + + // The tag memory contains entries with OPTION_ICACHE_WAYS parts of + // each TAGMEM_WAY_WIDTH. Each of those is tag and a valid flag. + localparam TAGMEM_WAY_WIDTH = TAG_WIDTH + 1; + localparam TAGMEM_WAY_VALID = TAGMEM_WAY_WIDTH - 1; + + // Additionally, the tag memory entry contains an LRU value. The + // width of this is actually 0 for OPTION_ICACHE_LIMIT_WIDTH==1 + localparam TAG_LRU_WIDTH = OPTION_ICACHE_WAYS*(OPTION_ICACHE_WAYS-1) >> 1; + + // We have signals for the LRU which are not used for one way + // caches. To avoid signal width [-1:0] this generates [0:0] + // vectors for them, which are removed automatically then. + localparam TAG_LRU_WIDTH_BITS = (OPTION_ICACHE_WAYS >= 2) ? TAG_LRU_WIDTH : 1; + + // Compute the total sum of the entry elements + localparam TAGMEM_WIDTH = TAGMEM_WAY_WIDTH * OPTION_ICACHE_WAYS + TAG_LRU_WIDTH; + + // For convenience we define the position of the LRU in the tag + // memory entries + localparam TAG_LRU_MSB = TAGMEM_WIDTH - 1; + localparam TAG_LRU_LSB = TAG_LRU_MSB - TAG_LRU_WIDTH + 1; + + // FSM state signals + reg [3:0] state; + wire read; + wire refill; + wire invalidate; + + reg [WAY_WIDTH-1:OPTION_ICACHE_BLOCK_WIDTH] invalidate_adr; + wire [31:0] next_refill_adr; + wire refill_done; + wire refill_hit; + reg [(1<<(OPTION_ICACHE_BLOCK_WIDTH-2))-1:0] refill_valid; + reg [(1<<(OPTION_ICACHE_BLOCK_WIDTH-2))-1:0] refill_valid_r; + + // The index we read and write from tag memory + wire [OPTION_ICACHE_SET_WIDTH-1:0] tag_rindex; + wire [OPTION_ICACHE_SET_WIDTH-1:0] tag_windex; + + // The data from the tag memory + wire [TAGMEM_WIDTH-1:0] tag_dout; + wire [TAG_LRU_WIDTH_BITS-1:0] tag_lru_out; + wire [TAGMEM_WAY_WIDTH-1:0] tag_way_out [OPTION_ICACHE_WAYS-1:0]; + + // The data to the tag memory + wire [TAGMEM_WIDTH-1:0] tag_din; + reg [TAG_LRU_WIDTH_BITS-1:0] tag_lru_in; + reg [TAGMEM_WAY_WIDTH-1:0] tag_way_in [OPTION_ICACHE_WAYS-1:0]; + + reg [TAGMEM_WAY_WIDTH-1:0] tag_way_save [OPTION_ICACHE_WAYS-1:0]; + + // Whether to write to the tag memory in this cycle + reg tag_we; + + // This is the tag we need to write to the tag memory during refill + wire [TAG_WIDTH-1:0] tag_wtag; + + // This is the tag we check against + wire [TAG_WIDTH-1:0] tag_tag; + + // Access to the way memories + wire [WAY_WIDTH-3:0] way_raddr[OPTION_ICACHE_WAYS-1:0]; + wire [WAY_WIDTH-3:0] way_waddr[OPTION_ICACHE_WAYS-1:0]; + wire [OPTION_OPERAND_WIDTH-1:0] way_din[OPTION_ICACHE_WAYS-1:0]; + wire [OPTION_OPERAND_WIDTH-1:0] way_dout[OPTION_ICACHE_WAYS-1:0]; + reg [OPTION_ICACHE_WAYS-1:0] way_we; + + // Does any way hit? + wire hit; + wire [OPTION_ICACHE_WAYS-1:0] way_hit; + + // This is the least recently used value before access the memory. + // Those are one hot encoded. + wire [OPTION_ICACHE_WAYS-1:0] lru; + + // Register that stores the LRU value from lru + reg [OPTION_ICACHE_WAYS-1:0] tag_save_lru; + + // The access vector to update the LRU history is the way that has + // a hit or is refilled. It is also one-hot encoded. + reg [OPTION_ICACHE_WAYS-1:0] access; + + // The current LRU history as read from tag memory and the update + // value after we accessed it to write back to tag memory. + wire [TAG_LRU_WIDTH_BITS-1:0] current_lru_history; + wire [TAG_LRU_WIDTH_BITS-1:0] next_lru_history; + + // Intermediate signals to ease debugging + wire [TAG_WIDTH-1:0] check_way_tag [OPTION_ICACHE_WAYS-1:0]; + wire check_way_match [OPTION_ICACHE_WAYS-1:0]; + wire check_way_valid [OPTION_ICACHE_WAYS-1:0]; + + genvar i; + + // Allowing (out of the cache line being refilled) accesses during refill + // exposes a bug somewhere, causing the Linux kernel to end up with a + // bus error UNHANDLED EXCEPTION. + // Until that is sorted out, disable it. + assign cpu_ack_o = (read /*| refill & ic_access_i*/) & hit | + refill_hit & ic_access_i; + + assign tag_rindex = cpu_adr_i[WAY_WIDTH-1:OPTION_ICACHE_BLOCK_WIDTH]; + /* + * The tag mem is written during reads to write the lru info and during + * refill and invalidate + */ + assign tag_windex = read ? + cpu_adr_match_i[WAY_WIDTH-1:OPTION_ICACHE_BLOCK_WIDTH] : + invalidate ? invalidate_adr : + wradr_i[WAY_WIDTH-1:OPTION_ICACHE_BLOCK_WIDTH]; + assign tag_tag = cpu_adr_match_i[OPTION_ICACHE_LIMIT_WIDTH-1:WAY_WIDTH]; + assign tag_wtag = wradr_i[OPTION_ICACHE_LIMIT_WIDTH-1:WAY_WIDTH]; + + generate + if (OPTION_ICACHE_WAYS >= 2) begin + // Multiplex the LRU history from and to tag memory + assign current_lru_history = tag_dout[TAG_LRU_MSB:TAG_LRU_LSB]; + assign tag_din[TAG_LRU_MSB:TAG_LRU_LSB] = tag_lru_in; + assign tag_lru_out = tag_dout[TAG_LRU_MSB:TAG_LRU_LSB]; + end + + for (i = 0; i < OPTION_ICACHE_WAYS; i=i+1) begin : ways + assign way_raddr[i] = cpu_adr_i[WAY_WIDTH-1:2]; + assign way_waddr[i] = wradr_i[WAY_WIDTH-1:2]; + assign way_din[i] = wrdat_i; + + // compare stored tag with incoming tag and check valid bit + assign check_way_tag[i] = tag_way_out[i][TAG_WIDTH-1:0]; + assign check_way_match[i] = (check_way_tag[i] == tag_tag); + assign check_way_valid[i] = tag_way_out[i][TAGMEM_WAY_VALID]; + + assign way_hit[i] = check_way_valid[i] & check_way_match[i]; + + // Multiplex the way entries in the tag memory + assign tag_din[(i+1)*TAGMEM_WAY_WIDTH-1:i*TAGMEM_WAY_WIDTH] = tag_way_in[i]; + assign tag_way_out[i] = tag_dout[(i+1)*TAGMEM_WAY_WIDTH-1:i*TAGMEM_WAY_WIDTH]; + end + endgenerate + + assign hit = |way_hit; + assign cache_hit_o = hit; + + integer w0; + always @(*) begin + cpu_dat_o = {OPTION_OPERAND_WIDTH{1'bx}}; + + // Put correct way on the data port + for (w0 = 0; w0 < OPTION_ICACHE_WAYS; w0 = w0 + 1) begin + if (way_hit[w0] | (refill_hit & tag_save_lru[w0])) begin + cpu_dat_o = way_dout[w0]; + end + end + end + + assign next_refill_adr = (OPTION_ICACHE_BLOCK_WIDTH == 5) ? + {wradr_i[31:5], wradr_i[4:0] + 5'd4} : // 32 byte + {wradr_i[31:4], wradr_i[3:0] + 4'd4}; // 16 byte + + assign refill_done_o = refill_done; + assign refill_done = refill_valid[next_refill_adr[OPTION_ICACHE_BLOCK_WIDTH-1:2]]; + assign refill_hit = refill_valid_r[cpu_adr_match_i[OPTION_ICACHE_BLOCK_WIDTH-1:2]] & + cpu_adr_match_i[OPTION_ICACHE_LIMIT_WIDTH-1: + OPTION_ICACHE_BLOCK_WIDTH] == + wradr_i[OPTION_ICACHE_LIMIT_WIDTH-1: + OPTION_ICACHE_BLOCK_WIDTH] & + refill; + + assign refill = (state == REFILL); + assign read = (state == READ); + assign invalidate = (state == INVALIDATE); + + assign refill_o = refill; + + assign refill_req_o = read & cpu_req_i & !hit | refill; + + /* + * SPR bus interface + */ + assign invalidate_o = spr_bus_stb_i & spr_bus_we_i & + (spr_bus_addr_i == `OR1K_SPR_ICBIR_ADDR); + + /* + * Cache FSM + */ + integer w1; + always @(posedge clk `OR_ASYNC_RST) begin + refill_valid_r <= refill_valid; + spr_bus_ack_o <= 0; + case (state) + IDLE: begin + if (cpu_req_i) + state <= READ; + end + + READ: begin + if (ic_access_i) begin + if (hit) begin + state <= READ; + end else if (cpu_req_i) begin + refill_valid <= 0; + refill_valid_r <= 0; + + // Store the LRU information for correct replacement + // on refill. Always one when only one way. + tag_save_lru <= (OPTION_ICACHE_WAYS==1) | lru; + + for (w1 = 0; w1 < OPTION_ICACHE_WAYS; w1 = w1 + 1) begin + tag_way_save[w1] <= tag_way_out[w1]; + end + + state <= REFILL; + end + end else begin + state <= IDLE; + end + end + + REFILL: begin + if (we_i) begin + refill_valid[wradr_i[OPTION_ICACHE_BLOCK_WIDTH-1:2]] <= 1; + + if (refill_done) + state <= IDLE; + end + end + + INVALIDATE: begin + if (!invalidate_o) + state <= IDLE; + spr_bus_ack_o <= 1; + end + + default: + state <= IDLE; + endcase + + if (invalidate_o & !refill) begin + invalidate_adr <= spr_bus_dat_i[WAY_WIDTH-1:OPTION_ICACHE_BLOCK_WIDTH]; + spr_bus_ack_o <= 1; + state <= INVALIDATE; + end + + if (rst) + state <= IDLE; + else if(ic_imem_err_i) + state <= IDLE; + end + + integer w2; + always @(*) begin + // Default is to keep data, don't write and don't access + tag_lru_in = tag_lru_out; + for (w2 = 0; w2 < OPTION_ICACHE_WAYS; w2 = w2 + 1) begin + tag_way_in[w2] = tag_way_out[w2]; + end + + tag_we = 1'b0; + way_we = {(OPTION_ICACHE_WAYS){1'b0}}; + + access = {(OPTION_ICACHE_WAYS){1'b0}}; + + case (state) + READ: begin + if (hit) begin + // We got a hit. The LRU module gets the access + // information. Depending on this we update the LRU + // history in the tag. + access = way_hit; + + // This is the updated LRU history after hit + tag_lru_in = next_lru_history; + + tag_we = 1'b1; + end + end + + REFILL: begin + if (we_i) begin + // Write the data to the way that is replaced (which is + // the LRU) + way_we = tag_save_lru; + + // Access pattern + access = tag_save_lru; + + /* Invalidate the way on the first write */ + if (refill_valid == 0) begin + for (w2 = 0; w2 < OPTION_ICACHE_WAYS; w2 = w2 + 1) begin + if (tag_save_lru[w2]) begin + tag_way_in[w2][TAGMEM_WAY_VALID] = 1'b0; + end + end + + tag_we = 1'b1; + end + + // After refill update the tag memory entry of the + // filled way with the LRU history, the tag and set + // valid to 1. + if (refill_done) begin + for (w2 = 0; w2 < OPTION_ICACHE_WAYS; w2 = w2 + 1) begin + tag_way_in[w2] = tag_way_save[w2]; + if (tag_save_lru[w2]) begin + tag_way_in[w2] = { 1'b1, tag_wtag }; + end + end + tag_lru_in = next_lru_history; + + tag_we = 1'b1; + end + end + end + + INVALIDATE: begin + // Lazy invalidation, invalidate everything that matches tag address + tag_lru_in = 0; + for (w2 = 0; w2 < OPTION_ICACHE_WAYS; w2 = w2 + 1) begin + tag_way_in[w2] = 0; + end + + tag_we = 1'b1; + end + + default: begin + end + endcase + end + + /* mor1kx_simple_dpram_sclk AUTO_TEMPLATE ( + // Outputs + .dout (way_dout[i][OPTION_OPERAND_WIDTH-1:0]), + // Inputs + .raddr (way_raddr[i][WAY_WIDTH-3:0]), + .re (1'b1), + .waddr (way_waddr[i][WAY_WIDTH-3:0]), + .we (way_we[i]), + .din (way_din[i][31:0])); + */ + generate + for (i = 0; i < OPTION_ICACHE_WAYS; i=i+1) begin : way_memories + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH(WAY_WIDTH-2), + .DATA_WIDTH(OPTION_OPERAND_WIDTH), + .ENABLE_BYPASS(0) + ) + way_data_ram + (/*AUTOINST*/ + // Outputs + .dout (way_dout[i][OPTION_OPERAND_WIDTH-1:0]), // Templated + // Inputs + .clk (clk), + .raddr (way_raddr[i][WAY_WIDTH-3:0]), // Templated + .re (1'b1), // Templated + .waddr (way_waddr[i][WAY_WIDTH-3:0]), // Templated + .we (way_we[i]), // Templated + .din (way_din[i][31:0])); // Templated + + end // block: way_memories + + if (OPTION_ICACHE_WAYS >= 2) begin : gen_u_lru + /* mor1kx_cache_lru AUTO_TEMPLATE( + .current (current_lru_history), + .update (next_lru_history), + .lru_pre (lru), + .lru_post (), + .access (access), + ); */ + + mor1kx_cache_lru + #(.NUMWAYS(OPTION_ICACHE_WAYS)) + u_lru(/*AUTOINST*/ + // Outputs + .update (next_lru_history), // Templated + .lru_pre (lru), // Templated + .lru_post (), // Templated + // Inputs + .current (current_lru_history), // Templated + .access (access)); // Templated + end // if (OPTION_ICACHE_WAYS >= 2) + endgenerate + + /* mor1kx_simple_dpram_sclk AUTO_TEMPLATE ( + // Outputs + .dout (tag_dout[TAGMEM_WIDTH-1:0]), + // Inputs + .raddr (tag_rindex), + .re (1'b1), + .waddr (tag_windex), + .we (tag_we), + .din (tag_din)); + */ + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH(OPTION_ICACHE_SET_WIDTH), + .DATA_WIDTH(TAGMEM_WIDTH), + .ENABLE_BYPASS(0) + ) + tag_ram + (/*AUTOINST*/ + // Outputs + .dout (tag_dout[TAGMEM_WIDTH-1:0]), // Templated + // Inputs + .clk (clk), + .raddr (tag_rindex), // Templated + .re (1'b1), // Templated + .waddr (tag_windex), // Templated + .we (tag_we), // Templated + .din (tag_din)); // Templated + +endmodule diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_immu.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_immu.v new file mode 100644 index 0000000..d05537d --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_immu.v @@ -0,0 +1,459 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Instruction MMU implementation + + Copyright (C) 2013 Stefan Kristiansson + + ******************************************************************************/ + +`include "mor1kx-defines.v" + +module mor1kx_immu + #( + parameter FEATURE_IMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_IMMU_SET_WIDTH = 6, + parameter OPTION_IMMU_WAYS = 1 + ) + ( + input clk, + input rst, + + input enable_i, + + output busy_o, + + input [OPTION_OPERAND_WIDTH-1:0] virt_addr_i, + input [OPTION_OPERAND_WIDTH-1:0] virt_addr_match_i, + output reg [OPTION_OPERAND_WIDTH-1:0] phys_addr_o, + output reg cache_inhibit_o, + + input supervisor_mode_i, + + output reg tlb_miss_o, + output pagefault_o, + + output reg tlb_reload_req_o, + input tlb_reload_ack_i, + output reg [OPTION_OPERAND_WIDTH-1:0] tlb_reload_addr_o, + input [OPTION_OPERAND_WIDTH-1:0] tlb_reload_data_i, + output tlb_reload_pagefault_o, + input tlb_reload_pagefault_clear_i, + output tlb_reload_busy_o, + + // SPR interface + input [15:0] spr_bus_addr_i, + input spr_bus_we_i, + input spr_bus_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_i, + + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_o, + output spr_bus_ack_o + ); + + localparam WAYS_WIDTH = (OPTION_IMMU_WAYS < 2) ? 1 : 2; + + wire [OPTION_OPERAND_WIDTH-1:0] itlb_match_dout[OPTION_IMMU_WAYS-1:0]; + wire [OPTION_IMMU_SET_WIDTH-1:0] itlb_match_addr; + reg [OPTION_IMMU_WAYS-1:0] itlb_match_we; + wire [OPTION_OPERAND_WIDTH-1:0] itlb_match_din; + + wire [OPTION_OPERAND_WIDTH-1:0] itlb_match_huge_dout[OPTION_IMMU_WAYS-1:0]; + wire [OPTION_IMMU_SET_WIDTH-1:0] itlb_match_huge_addr; + wire itlb_match_huge_we; + + wire [OPTION_OPERAND_WIDTH-1:0] itlb_trans_dout[OPTION_IMMU_WAYS-1:0]; + wire [OPTION_IMMU_SET_WIDTH-1:0] itlb_trans_addr; + reg [OPTION_IMMU_WAYS-1:0] itlb_trans_we; + wire [OPTION_OPERAND_WIDTH-1:0] itlb_trans_din; + + wire [OPTION_OPERAND_WIDTH-1:0] itlb_trans_huge_dout[OPTION_IMMU_WAYS-1:0]; + wire [OPTION_IMMU_SET_WIDTH-1:0] itlb_trans_huge_addr; + wire itlb_trans_huge_we; + + reg itlb_match_reload_we; + reg [OPTION_OPERAND_WIDTH-1:0] itlb_match_reload_din; + + reg itlb_trans_reload_we; + reg [OPTION_OPERAND_WIDTH-1:0] itlb_trans_reload_din; + + wire itlb_match_spr_cs; + reg itlb_match_spr_cs_r; + wire itlb_trans_spr_cs; + reg itlb_trans_spr_cs_r; + + wire immucr_spr_cs; + reg immucr_spr_cs_r; + reg [OPTION_OPERAND_WIDTH-1:0] immucr; + + wire [1:0] spr_way_idx_full; + wire [WAYS_WIDTH-1:0] spr_way_idx; + reg [WAYS_WIDTH-1:0] spr_way_idx_r; + + wire [OPTION_IMMU_WAYS-1:0] way_huge; + + wire [OPTION_IMMU_WAYS-1:0] way_hit; + wire [OPTION_IMMU_WAYS-1:0] way_huge_hit; + + reg tlb_reload_pagefault; + reg tlb_reload_huge; + + // sxe: supervisor execute enable + // uxe: user exexute enable + reg sxe; + reg uxe; + + reg spr_bus_ack; + reg spr_bus_ack_r; + wire [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat; + reg [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_r; + + genvar i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_bus_ack <= 0; + else if (spr_bus_stb_i & spr_bus_addr_i[15:11] == 5'd2) + spr_bus_ack <= 1; + else + spr_bus_ack <= 0; + + always @(posedge clk) + spr_bus_ack_r <= spr_bus_ack; + + always @(posedge clk) + if (spr_bus_ack & !spr_bus_ack_r) + spr_bus_dat_r <= spr_bus_dat; + + assign spr_bus_ack_o = spr_bus_ack & spr_bus_stb_i & + spr_bus_addr_i[15:11] == 5'd2; + +generate +for (i = 0; i < OPTION_IMMU_WAYS; i=i+1) begin : ways + assign way_huge[i] = &itlb_match_huge_dout[i][1:0]; // huge & valid + + assign way_hit[i] = (itlb_match_dout[i][31:13] == virt_addr_match_i[31:13]) & + itlb_match_dout[i][0]; // valid bit + + assign way_huge_hit[i] = (itlb_match_huge_dout[i][31:24] == + virt_addr_match_i[31:24]) & + itlb_match_huge_dout[i][0]; +end +endgenerate + + integer j; + always @(*) begin + tlb_miss_o = !tlb_reload_pagefault & !busy_o; + phys_addr_o = {OPTION_OPERAND_WIDTH{1'b0}}; + phys_addr_o[23:0] = virt_addr_match_i[23:0]; + sxe = 0; + uxe = 0; + cache_inhibit_o = 0; + + for (j = 0; j < OPTION_IMMU_WAYS; j=j+1) begin + if (way_huge[j] & way_huge_hit[j] | !way_huge[j] & way_hit[j]) + tlb_miss_o = 0; + + if (way_huge[j] & way_huge_hit[j]) begin + phys_addr_o = {itlb_trans_huge_dout[j][31:24], virt_addr_match_i[23:0]}; + sxe = itlb_trans_huge_dout[j][6]; + uxe = itlb_trans_huge_dout[j][7]; + cache_inhibit_o = itlb_trans_huge_dout[j][1]; + end else if (!way_huge[j] & way_hit[j])begin + phys_addr_o = {itlb_trans_dout[j][31:13], virt_addr_match_i[12:0]}; + sxe = itlb_trans_dout[j][6]; + uxe = itlb_trans_dout[j][7]; + cache_inhibit_o = itlb_trans_dout[j][1]; + end + + itlb_match_we[j] = 0; + if (itlb_match_reload_we & !tlb_reload_huge) + itlb_match_we[j] = 1; + if (j[WAYS_WIDTH-1:0] == spr_way_idx) + itlb_match_we[j] = itlb_match_spr_cs & spr_bus_we_i & !spr_bus_ack; + + itlb_trans_we[j] = 0; + if (itlb_trans_reload_we & !tlb_reload_huge) + itlb_trans_we[j] = 1; + if (j[WAYS_WIDTH-1:0] == spr_way_idx) + itlb_trans_we[j] = itlb_trans_spr_cs & spr_bus_we_i & !spr_bus_ack; + end + end + + assign pagefault_o = (supervisor_mode_i ? !sxe : !uxe) & + !tlb_reload_busy_o & !busy_o; + + assign busy_o = ((itlb_match_spr_cs | itlb_trans_spr_cs) & !spr_bus_ack | + (itlb_match_spr_cs_r | itlb_trans_spr_cs_r) & + spr_bus_ack & !spr_bus_ack_r) & enable_i; + + assign spr_way_idx_full = {spr_bus_addr_i[10], spr_bus_addr_i[8]}; + assign spr_way_idx = spr_way_idx_full[WAYS_WIDTH-1:0]; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + itlb_match_spr_cs_r <= 0; + itlb_trans_spr_cs_r <= 0; + immucr_spr_cs_r <= 0; + spr_way_idx_r <= 0; + end else begin + itlb_match_spr_cs_r <= itlb_match_spr_cs; + itlb_trans_spr_cs_r <= itlb_trans_spr_cs; + immucr_spr_cs_r <= immucr_spr_cs; + spr_way_idx_r <= spr_way_idx; + end + +generate /* verilator lint_off WIDTH */ +if (FEATURE_IMMU_HW_TLB_RELOAD == "ENABLED") begin +/* verilator lint_on WIDTH */ + assign immucr_spr_cs = spr_bus_stb_i & + spr_bus_addr_i == `OR1K_SPR_IMMUCR_ADDR; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + immucr <= 0; + else if (immucr_spr_cs & spr_bus_we_i) + immucr <= spr_bus_dat_i; + +end else begin + assign immucr_spr_cs = 0; + always @(posedge clk) + immucr <= 0; +end +endgenerate + + assign itlb_match_spr_cs = spr_bus_stb_i & (spr_bus_addr_i[15:11] == 5'd2) & + |spr_bus_addr_i[10:9] & !spr_bus_addr_i[7]; + assign itlb_trans_spr_cs = spr_bus_stb_i & (spr_bus_addr_i[15:11] == 5'd2) & + |spr_bus_addr_i[10:9] & spr_bus_addr_i[7]; + + assign itlb_match_addr = itlb_match_spr_cs & !spr_bus_ack ? + spr_bus_addr_i[OPTION_IMMU_SET_WIDTH-1:0] : + virt_addr_i[13+(OPTION_IMMU_SET_WIDTH-1):13]; + assign itlb_trans_addr = itlb_trans_spr_cs & !spr_bus_ack ? + spr_bus_addr_i[OPTION_IMMU_SET_WIDTH-1:0] : + virt_addr_i[13+(OPTION_IMMU_SET_WIDTH-1):13]; + + assign itlb_match_din = itlb_match_spr_cs & spr_bus_we_i & !spr_bus_ack ? + spr_bus_dat_i : itlb_match_reload_din; + assign itlb_trans_din = itlb_trans_spr_cs & spr_bus_we_i & !spr_bus_ack ? + spr_bus_dat_i : itlb_trans_reload_din; + + assign itlb_match_huge_addr = virt_addr_i[24+(OPTION_IMMU_SET_WIDTH-1):24]; + assign itlb_trans_huge_addr = virt_addr_i[24+(OPTION_IMMU_SET_WIDTH-1):24]; + + assign itlb_match_huge_we = itlb_match_reload_we & tlb_reload_huge; + assign itlb_trans_huge_we = itlb_trans_reload_we & tlb_reload_huge; + + assign spr_bus_dat = itlb_match_spr_cs_r ? itlb_match_dout[spr_way_idx_r] : + itlb_trans_spr_cs_r ? itlb_trans_dout[spr_way_idx_r] : + immucr_spr_cs_r ? immucr : 0; + + // Use registered value on all but the first cycle spr_bus_ack is asserted + assign spr_bus_dat_o = spr_bus_ack & !spr_bus_ack_r ? spr_bus_dat : + spr_bus_dat_r; + + localparam TLB_IDLE = 2'd0; + localparam TLB_GET_PTE_POINTER = 2'd1; + localparam TLB_GET_PTE = 2'd2; + localparam TLB_READ = 2'd3; + +generate /* verilator lint_off WIDTH */ +if (FEATURE_IMMU_HW_TLB_RELOAD == "ENABLED") begin + /* verilator lint_on WIDTH */ + + // Hardware TLB reload + // Compliant with the suggestions outlined in this thread: + // http://lists.openrisc.net/pipermail/openrisc/2013-July/001806.html + // + // PTE layout: + // | 31 ... 13 | 12 | 11 | 10 | 9 | 8 | 7 | 6 | 5 | 4 | 3 | 2 | 1 | 0 | + // | PPN | Reserved |PRESENT| L | X | W | U | D | A |WOM|WBC|CI |CC | + // + // Where X/W/U maps into SXE/UXE like this: + // X | W | U SXE | UXE + // --------- --------- + // 0 | x | 0 = 0 | 0 + // 0 | x | 1 = 0 | 0 + // ... + // 1 | x | 0 = 1 | 0 + // 1 | x | 1 = 1 | 1 + + + + reg [1:0] tlb_reload_state = TLB_IDLE; + wire do_reload; + + assign do_reload = enable_i & tlb_miss_o & (immucr[31:10] != 0); + assign tlb_reload_busy_o = (tlb_reload_state != TLB_IDLE) | do_reload; + assign tlb_reload_pagefault_o = tlb_reload_pagefault & + !tlb_reload_pagefault_clear_i; + + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + tlb_reload_pagefault <= 0; + else if(tlb_reload_pagefault_clear_i) + tlb_reload_pagefault <= 0; + itlb_trans_reload_we <= 0; + itlb_trans_reload_din <= 0; + itlb_match_reload_we <= 0; + itlb_match_reload_din <= 0; + + case (tlb_reload_state) + TLB_IDLE: begin + tlb_reload_huge <= 0; + tlb_reload_req_o <= 0; + if (do_reload) begin + tlb_reload_req_o <= 1; + tlb_reload_addr_o <= {immucr[31:10], + virt_addr_match_i[31:24], 2'b00}; + tlb_reload_state <= TLB_GET_PTE_POINTER; + end + end + + // + // Here we get the pointer to the PTE table, next is to fetch + // the actual pte from the offset in the table. + // The offset is calculated by: + // ((virt_addr_match >> PAGE_BITS) & (PTE_CNT-1)) << 2 + // Where PAGE_BITS is 13 (8 kb page) and PTE_CNT is 2048 + // (number of PTEs in the PTE table) + // + TLB_GET_PTE_POINTER: begin + tlb_reload_huge <= 0; + if (tlb_reload_ack_i) begin + if (tlb_reload_data_i[31:13] == 0) begin + tlb_reload_pagefault <= 1; + tlb_reload_req_o <= 0; + tlb_reload_state <= TLB_IDLE; + end else if (tlb_reload_data_i[9]) begin + tlb_reload_huge <= 1; + tlb_reload_req_o <= 0; + tlb_reload_state <= TLB_GET_PTE; + end else begin + tlb_reload_addr_o <= {tlb_reload_data_i[31:13], + virt_addr_match_i[23:13], 2'b00}; + tlb_reload_state <= TLB_GET_PTE; + end + end + end + + // + // Here we get the actual PTE, left to do is to translate the + // PTE data into our translate and match registers. + // + TLB_GET_PTE: begin + if (tlb_reload_ack_i) begin + tlb_reload_req_o <= 0; + // Check PRESENT bit + if (!tlb_reload_data_i[10]) begin + tlb_reload_pagefault <= 1; + tlb_reload_state <= TLB_IDLE; + end else begin + // Translate register generation. + // PPN + itlb_trans_reload_din[31:13] <= tlb_reload_data_i[31:13]; + // UXE = X & U + itlb_trans_reload_din[7] <= tlb_reload_data_i[8] & + tlb_reload_data_i[6]; + // SXE = X + itlb_trans_reload_din[6] <= tlb_reload_data_i[8]; + // Dirty, Accessed, Weakly-Ordered-Memory, Writeback cache, + // Cache inhibit, Cache coherent + itlb_trans_reload_din[5:0] <= tlb_reload_data_i[5:0]; + itlb_trans_reload_we <= 1; + + // Match register generation. + // VPN + itlb_match_reload_din[31:13] <= virt_addr_match_i[31:13]; + // PL1 + itlb_match_reload_din[1] <= tlb_reload_huge; + // Valid + itlb_match_reload_din[0] <= 1; + itlb_match_reload_we <= 1; + + tlb_reload_state <= TLB_READ; + end + end + end + + // Let the just written values propagate out on the read ports + TLB_READ: begin + tlb_reload_state <= TLB_IDLE; + end + + default: + tlb_reload_state <= TLB_IDLE; + + endcase + end +end else begin // if (FEATURE_IMMU_HW_TLB_RELOAD == "ENABLED") + assign tlb_reload_pagefault_o = 0; + assign tlb_reload_busy_o = 0; + always @(posedge clk) begin + tlb_reload_req_o <= 0; + tlb_reload_addr_o <= 0; + tlb_reload_huge <= 1'b0; + tlb_reload_pagefault <= 0; + itlb_trans_reload_we <= 0; + itlb_trans_reload_din <= 0; + itlb_match_reload_we <= 0; + itlb_match_reload_din <= 0; + end +end +endgenerate + +generate +for (i = 0; i < OPTION_IMMU_WAYS; i=i+1) begin : itlb + // ITLB match registers + mor1kx_true_dpram_sclk + #( + .ADDR_WIDTH(OPTION_IMMU_SET_WIDTH), + .DATA_WIDTH(OPTION_OPERAND_WIDTH) + ) + itlb_match_regs + ( + // Outputs + .dout_a (itlb_match_dout[i]), + .dout_b (itlb_match_huge_dout[i]), + // Inputs + .clk_a (clk), + .addr_a (itlb_match_addr), + .we_a (itlb_match_we[i]), + .din_a (itlb_match_din), + .clk_b (clk), + .addr_b (itlb_match_huge_addr), + .we_b (itlb_match_huge_we), + .din_b (itlb_match_reload_din) + ); + + + // ITLB translate registers + mor1kx_true_dpram_sclk + #( + .ADDR_WIDTH(OPTION_IMMU_SET_WIDTH), + .DATA_WIDTH(OPTION_OPERAND_WIDTH) + ) + itlb_translate_regs + ( + // Outputs + .dout_a (itlb_trans_dout[i]), + .dout_b (itlb_trans_huge_dout[i]), + // Inputs + .clk_a (clk), + .addr_a (itlb_trans_addr), + .we_a (itlb_trans_we[i]), + .din_a (itlb_trans_din), + .clk_b (clk), + .addr_b (itlb_trans_huge_addr), + .we_b (itlb_trans_huge_we), + .din_b (itlb_trans_reload_din) + ); +end +endgenerate + +endmodule diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_lsu_cappuccino.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_lsu_cappuccino.v new file mode 100644 index 0000000..2ffd152 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_lsu_cappuccino.v @@ -0,0 +1,870 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Data bus interface + + All combinatorial outputs to pipeline + Dbus interface request signal out synchronous + + 32-bit specific + + Copyright (C) 2012 Julius Baxter + Copyright (C) 2013 Stefan Kristiansson + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_lsu_cappuccino + #( + parameter FEATURE_DATACACHE = "NONE", + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_DCACHE_BLOCK_WIDTH = 5, + parameter OPTION_DCACHE_SET_WIDTH = 9, + parameter OPTION_DCACHE_WAYS = 2, + parameter OPTION_DCACHE_LIMIT_WIDTH = 32, + parameter OPTION_DCACHE_SNOOP = "NONE", + parameter FEATURE_DMMU = "NONE", + parameter FEATURE_DMMU_HW_TLB_RELOAD = "NONE", + parameter OPTION_DMMU_SET_WIDTH = 6, + parameter OPTION_DMMU_WAYS = 1, + parameter FEATURE_STORE_BUFFER = "ENABLED", + parameter OPTION_STORE_BUFFER_DEPTH_WIDTH = 8, + parameter FEATURE_ATOMIC = "ENABLED" + ) + ( + input clk, + input rst, + + input padv_execute_i, + input padv_ctrl_i, // needed for dmmu spr + input decode_valid_i, + // calculated address from ALU + input [OPTION_OPERAND_WIDTH-1:0] exec_lsu_adr_i, + input [OPTION_OPERAND_WIDTH-1:0] ctrl_lsu_adr_i, + + // register file B in (store operand) + input [OPTION_OPERAND_WIDTH-1:0] ctrl_rfb_i, + + // from decode stage regs, indicate if load or store + input exec_op_lsu_load_i, + input exec_op_lsu_store_i, + input exec_op_lsu_atomic_i, + input ctrl_op_lsu_load_i, + input ctrl_op_lsu_store_i, + input ctrl_op_lsu_atomic_i, + input ctrl_op_msync_i, + input [1:0] ctrl_lsu_length_i, + input ctrl_lsu_zext_i, + + // From control stage, exception PC for the store buffer input + input [OPTION_OPERAND_WIDTH-1:0] ctrl_epcr_i, + // The exception PC as it has went through the store buffer + output [OPTION_OPERAND_WIDTH-1:0] store_buffer_epcr_o, + + output [OPTION_OPERAND_WIDTH-1:0] lsu_result_o, + output lsu_valid_o, + // exception output + output lsu_except_dbus_o, + output lsu_except_align_o, + output lsu_except_dtlb_miss_o, + output lsu_except_dpagefault_o, + + // Indicator that the dbus exception came via the store buffer + output reg store_buffer_err_o, + + // Atomic operation flag set/clear logic + output atomic_flag_set_o, + output atomic_flag_clear_o, + + // stall signal for msync logic + output msync_stall_o, + + // SPR interface + input [15:0] spr_bus_addr_i, + input spr_bus_we_i, + input spr_bus_stb_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_i, + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dc_o, + output spr_bus_ack_dc_o, + output [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_dmmu_o, + output spr_bus_ack_dmmu_o, + + input dc_enable_i, + input dmmu_enable_i, + input supervisor_mode_i, + output dc_hit_o, + + // interface to data bus + output [OPTION_OPERAND_WIDTH-1:0] dbus_adr_o, + output reg dbus_req_o, + output [OPTION_OPERAND_WIDTH-1:0] dbus_dat_o, + output reg [3:0] dbus_bsel_o, + output dbus_we_o, + output dbus_burst_o, + input dbus_err_i, + input dbus_ack_i, + input [OPTION_OPERAND_WIDTH-1:0] dbus_dat_i, + input pipeline_flush_i, + + input [31:0] snoop_adr_i, + input snoop_en_i + ); + + reg [OPTION_OPERAND_WIDTH-1:0] dbus_dat_aligned; // comb. + reg [OPTION_OPERAND_WIDTH-1:0] dbus_dat_extended; // comb. + + reg access_done; + + wire align_err_word; + wire align_err_short; + + wire align_err; + + wire except_align; + + reg except_dbus; + + reg dbus_ack; + reg dbus_err; + reg [OPTION_OPERAND_WIDTH-1:0] dbus_dat; + reg [OPTION_OPERAND_WIDTH-1:0] dbus_adr; + wire [OPTION_OPERAND_WIDTH-1:0] next_dbus_adr; + reg dbus_we; + reg [3:0] dbus_bsel; + wire dbus_access; + wire dbus_stall; + + wire [OPTION_OPERAND_WIDTH-1:0] lsu_ldat; + wire [OPTION_OPERAND_WIDTH-1:0] lsu_sdat; + wire lsu_ack; + + wire dc_ack; + wire dc_err; + wire [31:0] dc_ldat; + wire [31:0] dc_sdat; + wire [31:0] dc_adr; + wire [31:0] dc_adr_match; + wire dc_we; + wire [3:0] dc_bsel; + + wire dc_access; + wire dc_hit; + wire dc_refill_allowed; + wire dc_refill; + wire dc_refill_req; + wire dc_refill_done; + + reg dc_enable_r; + wire dc_enabled; + + wire ctrl_op_lsu; + + // DMMU + wire tlb_miss; + wire pagefault; + wire [OPTION_OPERAND_WIDTH-1:0] dmmu_phys_addr; + wire except_dtlb_miss; + reg except_dtlb_miss_r; + wire except_dpagefault; + reg except_dpagefault_r; + wire dmmu_cache_inhibit; + + wire tlb_reload_req; + wire tlb_reload_busy; + wire [OPTION_OPERAND_WIDTH-1:0] tlb_reload_addr; + wire tlb_reload_pagefault; + reg tlb_reload_ack; + reg [OPTION_OPERAND_WIDTH-1:0] tlb_reload_data; + wire tlb_reload_pagefault_clear; + reg tlb_reload_done; + + // Store buffer + wire store_buffer_write; + wire store_buffer_read; + wire store_buffer_full; + wire store_buffer_empty; + wire [OPTION_OPERAND_WIDTH-1:0] store_buffer_radr; + wire [OPTION_OPERAND_WIDTH-1:0] store_buffer_wadr; + wire [OPTION_OPERAND_WIDTH-1:0] store_buffer_dat; + wire [OPTION_OPERAND_WIDTH/8-1:0] store_buffer_bsel; + wire store_buffer_atomic; + reg store_buffer_write_pending; + + reg dbus_atomic; + + reg last_write; + reg write_done; + + // Atomic operations + reg [OPTION_OPERAND_WIDTH-1:0] atomic_addr; + reg atomic_reserve; + wire swa_success; + + wire snoop_valid; + wire dc_snoop_hit; + + // We have to mask out our snooped bus accesses + assign snoop_valid = (OPTION_DCACHE_SNOOP != "NONE") ? + snoop_en_i & !((snoop_adr_i == dbus_adr_o) & dbus_ack_i) : + 0; + + assign ctrl_op_lsu = ctrl_op_lsu_load_i | ctrl_op_lsu_store_i; + + assign lsu_sdat = (ctrl_lsu_length_i == 2'b00) ? // byte access + {ctrl_rfb_i[7:0],ctrl_rfb_i[7:0], + ctrl_rfb_i[7:0],ctrl_rfb_i[7:0]} : + (ctrl_lsu_length_i == 2'b01) ? // halfword access + {ctrl_rfb_i[15:0],ctrl_rfb_i[15:0]} : + ctrl_rfb_i; // word access + + assign align_err_word = |ctrl_lsu_adr_i[1:0]; + assign align_err_short = ctrl_lsu_adr_i[0]; + + + assign lsu_valid_o = (lsu_ack | access_done) & !tlb_reload_busy & !dc_snoop_hit; + + assign lsu_except_dbus_o = except_dbus | store_buffer_err_o; + + assign align_err = (ctrl_lsu_length_i == 2'b10) & align_err_word | + (ctrl_lsu_length_i == 2'b01) & align_err_short; + + assign except_align = ctrl_op_lsu & align_err; + + assign lsu_except_align_o = except_align & !pipeline_flush_i; + + assign except_dtlb_miss = ctrl_op_lsu & tlb_miss & dmmu_enable_i & + !tlb_reload_busy; + + assign lsu_except_dtlb_miss_o = except_dtlb_miss & !pipeline_flush_i; + + assign except_dpagefault = ctrl_op_lsu & pagefault & dmmu_enable_i & + !tlb_reload_busy | tlb_reload_pagefault; + + assign lsu_except_dpagefault_o = except_dpagefault & !pipeline_flush_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + access_done <= 0; + else if (padv_execute_i) + access_done <= 0; + else if (lsu_ack) + access_done <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + except_dbus <= 0; + else if (padv_execute_i | pipeline_flush_i) + except_dbus <= 0; + else if (dbus_err_i) + except_dbus <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + except_dtlb_miss_r <= 0; + else if (padv_execute_i) + except_dtlb_miss_r <= 0; + else if (except_dtlb_miss) + except_dtlb_miss_r <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + except_dpagefault_r <= 0; + else if (padv_execute_i) + except_dpagefault_r <= 0; + else if (except_dpagefault) + except_dpagefault_r <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + store_buffer_err_o <= 0; + else if (pipeline_flush_i) + store_buffer_err_o <= 0; + else if (dbus_err_i & dbus_we_o) + store_buffer_err_o <= 1; + + // Big endian bus mapping + always @(*) + case (ctrl_lsu_length_i) + 2'b00: // byte access + case(ctrl_lsu_adr_i[1:0]) + 2'b00: + dbus_bsel = 4'b1000; + 2'b01: + dbus_bsel = 4'b0100; + 2'b10: + dbus_bsel = 4'b0010; + 2'b11: + dbus_bsel = 4'b0001; + endcase + 2'b01: // halfword access + case(ctrl_lsu_adr_i[1]) + 1'b0: + dbus_bsel = 4'b1100; + 1'b1: + dbus_bsel = 4'b0011; + endcase + 2'b10, + 2'b11: + dbus_bsel = 4'b1111; + endcase + + // Select part of read word + always @* + case(ctrl_lsu_adr_i[1:0]) + 2'b00: + dbus_dat_aligned = lsu_ldat; + 2'b01: + dbus_dat_aligned = {lsu_ldat[23:0],8'd0}; + 2'b10: + dbus_dat_aligned = {lsu_ldat[15:0],16'd0}; + 2'b11: + dbus_dat_aligned = {lsu_ldat[7:0],24'd0}; + endcase // case (ctrl_lsu_adr_i[1:0]) + + // Do appropriate extension + always @(*) + case({ctrl_lsu_zext_i, ctrl_lsu_length_i}) + 3'b100: // lbz + dbus_dat_extended = {24'd0,dbus_dat_aligned[31:24]}; + 3'b101: // lhz + dbus_dat_extended = {16'd0,dbus_dat_aligned[31:16]}; + 3'b000: // lbs + dbus_dat_extended = {{24{dbus_dat_aligned[31]}}, + dbus_dat_aligned[31:24]}; + 3'b001: // lhs + dbus_dat_extended = {{16{dbus_dat_aligned[31]}}, + dbus_dat_aligned[31:16]}; + default: + dbus_dat_extended = dbus_dat_aligned; + endcase + + assign lsu_result_o = dbus_dat_extended; + + // Bus access logic + localparam [2:0] + IDLE = 3'd0, + READ = 3'd1, + WRITE = 3'd2, + TLB_RELOAD = 3'd3, + DC_REFILL = 3'd4; + + reg [2:0] state; + + assign dbus_access = (!dc_access | tlb_reload_busy | ctrl_op_lsu_store_i) & + (state != DC_REFILL) | (state == WRITE); + reg dc_refill_r; + + always @(posedge clk) + dc_refill_r <= dc_refill; + + wire store_buffer_ack; + assign store_buffer_ack = (FEATURE_STORE_BUFFER!="NONE") ? + store_buffer_write : + write_done; + + assign lsu_ack = (ctrl_op_lsu_store_i | state == WRITE) ? + (store_buffer_ack & !ctrl_op_lsu_atomic_i | + write_done & ctrl_op_lsu_atomic_i) : + (dbus_access ? dbus_ack : dc_ack); + + assign lsu_ldat = dbus_access ? dbus_dat : dc_ldat; + assign dbus_adr_o = dbus_adr; + + assign dbus_dat_o = dbus_dat; + + assign dbus_burst_o = (state == DC_REFILL) & !dc_refill_done; + + // + // Slightly subtle, but if there is an atomic store coming out from the + // store buffer, and the link has been broken while it was waiting there, + // the bus access is still performed as a (discarded) read. + // + assign dbus_we_o = dbus_we & (!dbus_atomic | atomic_reserve); + + assign next_dbus_adr = (OPTION_DCACHE_BLOCK_WIDTH == 5) ? + {dbus_adr[31:5], dbus_adr[4:0] + 5'd4} : // 32 byte + {dbus_adr[31:4], dbus_adr[3:0] + 4'd4}; // 16 byte + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + dbus_err <= 0; + else + dbus_err <= dbus_err_i; + + always @(posedge clk) begin + dbus_ack <= 0; + write_done <= 0; + tlb_reload_ack <= 0; + tlb_reload_done <= 0; + case (state) + IDLE: begin + dbus_req_o <= 0; + dbus_we <= 0; + dbus_adr <= 0; + dbus_bsel_o <= 4'hf; + dbus_atomic <= 0; + last_write <= 0; + if (store_buffer_write | !store_buffer_empty) begin + state <= WRITE; + end else if (ctrl_op_lsu & dbus_access & !dc_refill & !dbus_ack & + !dbus_err & !except_dbus & !access_done & + !pipeline_flush_i) begin + if (tlb_reload_req) begin + dbus_adr <= tlb_reload_addr; + dbus_req_o <= 1; + state <= TLB_RELOAD; + end else if (dmmu_enable_i) begin + dbus_adr <= dmmu_phys_addr; + if (!tlb_miss & !pagefault & !except_align) begin + if (ctrl_op_lsu_load_i) begin + dbus_req_o <= 1; + dbus_bsel_o <= dbus_bsel; + state <= READ; + end + end + end else if (!except_align) begin + dbus_adr <= ctrl_lsu_adr_i; + if (ctrl_op_lsu_load_i) begin + dbus_req_o <= 1; + dbus_bsel_o <= dbus_bsel; + state <= READ; + end + end + end else if (dc_refill_req) begin + dbus_req_o <= 1; + dbus_adr <= dc_adr_match; + state <= DC_REFILL; + end + end + + DC_REFILL: begin + dbus_req_o <= 1; + if (dbus_ack_i) begin + dbus_adr <= next_dbus_adr; + if (dc_refill_done) begin + dbus_req_o <= 0; + state <= IDLE; + end + end + + // TODO: only abort on snoop-hits to refill address + if (dbus_err_i | dc_snoop_hit) begin + dbus_req_o <= 0; + state <= IDLE; + end + end + + READ: begin + dbus_ack <= dbus_ack_i; + dbus_dat <= dbus_dat_i; + if (dbus_ack_i | dbus_err_i) begin + dbus_req_o <= 0; + state <= IDLE; + end + end + + WRITE: begin + dbus_req_o <= 1; + dbus_we <= 1; + + if (!dbus_req_o | dbus_ack_i & !last_write) begin + dbus_bsel_o <= store_buffer_bsel; + dbus_adr <= store_buffer_radr; + dbus_dat <= store_buffer_dat; + dbus_atomic <= store_buffer_atomic; + last_write <= store_buffer_empty; + end + + if (store_buffer_write) + last_write <= 0; + + if (last_write & dbus_ack_i | dbus_err_i) begin + dbus_req_o <= 0; + dbus_we <= 0; + if (!store_buffer_write) begin + state <= IDLE; + write_done <= 1; + end + end + end + + TLB_RELOAD: begin + dbus_adr <= tlb_reload_addr; + tlb_reload_data <= dbus_dat_i; + tlb_reload_ack <= dbus_ack_i & tlb_reload_req; + + if (!tlb_reload_req | dbus_err_i) begin + state <= IDLE; + tlb_reload_done <= 1; + end + + dbus_req_o <= tlb_reload_req; + if (dbus_ack_i | tlb_reload_ack) + dbus_req_o <= 0; + end + + default: + state <= IDLE; + endcase + + if (rst) + state <= IDLE; + end + + assign dbus_stall = tlb_reload_busy | except_align | except_dbus | + except_dtlb_miss | except_dpagefault | + pipeline_flush_i; + + // Stall until the store buffer is empty + assign msync_stall_o = ctrl_op_msync_i & (state == WRITE); + +generate +if (FEATURE_ATOMIC!="NONE") begin : atomic_gen + // Atomic operations logic + reg atomic_flag_set; + reg atomic_flag_clear; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + atomic_reserve <= 0; + else if (pipeline_flush_i) + atomic_reserve <= 0; + else if (ctrl_op_lsu_store_i & ctrl_op_lsu_atomic_i & write_done || + !ctrl_op_lsu_atomic_i & store_buffer_write & + (store_buffer_wadr == atomic_addr) || + (snoop_valid & (snoop_adr_i == atomic_addr))) + atomic_reserve <= 0; + else if (ctrl_op_lsu_load_i & ctrl_op_lsu_atomic_i & padv_ctrl_i) + atomic_reserve <= !(snoop_valid & (snoop_adr_i == dc_adr_match)); + + always @(posedge clk) + if (ctrl_op_lsu_load_i & ctrl_op_lsu_atomic_i & padv_ctrl_i) + atomic_addr <= dc_adr_match; + + assign swa_success = ctrl_op_lsu_store_i & ctrl_op_lsu_atomic_i & + atomic_reserve & (dbus_adr == atomic_addr); + + always @(posedge clk) + if (padv_ctrl_i) + atomic_flag_set <= 0; + else if (write_done) + atomic_flag_set <= swa_success & lsu_valid_o; + + always @(posedge clk) + if (padv_ctrl_i) + atomic_flag_clear <= 0; + else if (write_done) + atomic_flag_clear <= !swa_success & lsu_valid_o & + ctrl_op_lsu_atomic_i & ctrl_op_lsu_store_i; + + assign atomic_flag_set_o = atomic_flag_set; + assign atomic_flag_clear_o = atomic_flag_clear; + +end else begin + assign atomic_flag_set_o = 0; + assign atomic_flag_clear_o = 0; + assign swa_success = 0; + always @(posedge clk) begin + atomic_addr <= 0; + atomic_reserve <= 0; + end +end +endgenerate + + // Store buffer logic + always @(posedge clk) + if (rst) + store_buffer_write_pending <= 0; + else if (store_buffer_write | pipeline_flush_i) + store_buffer_write_pending <= 0; + else if (ctrl_op_lsu_store_i & padv_ctrl_i & !dbus_stall & + (store_buffer_full | dc_refill | dc_refill_r | dc_snoop_hit)) + store_buffer_write_pending <= 1; + + assign store_buffer_write = (ctrl_op_lsu_store_i & + (padv_ctrl_i | tlb_reload_done) | + store_buffer_write_pending) & + !store_buffer_full & !dc_refill & !dc_refill_r & + !dbus_stall & !dc_snoop_hit; + +generate +if (FEATURE_STORE_BUFFER!="NONE") begin : store_buffer_gen + assign store_buffer_read = (state == IDLE) & store_buffer_write | + (state == IDLE) & !store_buffer_empty | + (state == WRITE) & (dbus_ack_i | !dbus_req_o) & + (!store_buffer_empty | store_buffer_write) & + !last_write | + (state == WRITE) & last_write & + store_buffer_write; + + mor1kx_store_buffer + #( + .DEPTH_WIDTH(OPTION_STORE_BUFFER_DEPTH_WIDTH), + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH) + ) + mor1kx_store_buffer + ( + .clk (clk), + .rst (rst), + + .pc_i (ctrl_epcr_i), + .adr_i (store_buffer_wadr), + .dat_i (lsu_sdat), + .bsel_i (dbus_bsel), + .atomic_i (ctrl_op_lsu_atomic_i), + .write_i (store_buffer_write), + + .pc_o (store_buffer_epcr_o), + .adr_o (store_buffer_radr), + .dat_o (store_buffer_dat), + .bsel_o (store_buffer_bsel), + .atomic_o (store_buffer_atomic), + .read_i (store_buffer_read), + + .full_o (store_buffer_full), + .empty_o (store_buffer_empty) + ); +end else begin + assign store_buffer_epcr_o = ctrl_epcr_i; + assign store_buffer_radr = store_buffer_wadr; + assign store_buffer_dat = lsu_sdat; + assign store_buffer_bsel = dbus_bsel; + assign store_buffer_empty = 1'b1; + + reg store_buffer_full_r; + always @(posedge clk `OR_ASYNC_RST) + if (rst) + store_buffer_full_r <= 0; + else if (store_buffer_write) + store_buffer_full_r <= 1; + else if (write_done) + store_buffer_full_r <= 0; + + assign store_buffer_full = store_buffer_full_r & !write_done; +end +endgenerate + assign store_buffer_wadr = dc_adr_match; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + dc_enable_r <= 0; + else if (dc_enable_i & !dbus_req_o) + dc_enable_r <= 1; + else if (!dc_enable_i & !dc_refill) + dc_enable_r <= 0; + + assign dc_enabled = dc_enable_i & dc_enable_r; + assign dc_adr = padv_execute_i & + (exec_op_lsu_load_i | exec_op_lsu_store_i) ? + exec_lsu_adr_i : ctrl_lsu_adr_i; + assign dc_adr_match = dmmu_enable_i ? + {dmmu_phys_addr[OPTION_OPERAND_WIDTH-1:2],2'b0} : + {ctrl_lsu_adr_i[OPTION_OPERAND_WIDTH-1:2],2'b0}; + + assign dc_refill_allowed = !(ctrl_op_lsu_store_i | state == WRITE) & + !dc_snoop_hit & !snoop_valid; + +generate +if (FEATURE_DATACACHE!="NONE") begin : dcache_gen + wire dc_req = ctrl_op_lsu & dc_access & !access_done & !dbus_stall & + !(dbus_atomic & dbus_we & !atomic_reserve); + if (OPTION_DCACHE_LIMIT_WIDTH == OPTION_OPERAND_WIDTH) begin + assign dc_access = ctrl_op_lsu_store_i | dc_enabled & + !(dmmu_cache_inhibit & dmmu_enable_i); + end else if (OPTION_DCACHE_LIMIT_WIDTH < OPTION_OPERAND_WIDTH) begin + assign dc_access = ctrl_op_lsu_store_i | dc_enabled & + dc_adr_match[OPTION_OPERAND_WIDTH-1: + OPTION_DCACHE_LIMIT_WIDTH] == 0 & + !(dmmu_cache_inhibit & dmmu_enable_i); + end else begin + initial begin + $display("ERROR: OPTION_DCACHE_LIMIT_WIDTH > OPTION_OPERAND_WIDTH"); + $finish(); + end + end + + assign dc_bsel = dbus_bsel; + assign dc_we = exec_op_lsu_store_i & !exec_op_lsu_atomic_i & padv_execute_i | + dbus_atomic & dbus_we_o & !write_done | + ctrl_op_lsu_store_i & tlb_reload_busy & !tlb_reload_req; + + /* mor1kx_dcache AUTO_TEMPLATE ( + .refill_o (dc_refill), + .refill_req_o (dc_refill_req), + .refill_done_o (dc_refill_done), + .cache_hit_o (dc_hit), + .cpu_err_o (dc_err), + .cpu_ack_o (dc_ack), + .cpu_dat_o (dc_ldat), + .spr_bus_dat_o (spr_bus_dat_dc_o), + .spr_bus_ack_o (spr_bus_ack_dc_o), + .snoop_hit_o (dc_snoop_hit), + // Inputs + .clk (clk), + .rst (rst), + .dc_dbus_err_i (dbus_err), + .dc_enable_i (dc_enabled), + .dc_access_i (dc_access), + .cpu_dat_i (lsu_sdat), + .cpu_adr_i (dc_adr), + .cpu_adr_match_i (dc_adr_match), + .cpu_req_i (dc_req), + .cpu_we_i (dc_we), + .cpu_bsel_i (dc_bsel), + .refill_allowed (dc_refill_allowed), + .wradr_i (dbus_adr), + .wrdat_i (dbus_dat_i), + .we_i (dbus_ack_i), + .snoop_valid_i (snoop_valid), + );*/ + + mor1kx_dcache + #( + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_DCACHE_BLOCK_WIDTH(OPTION_DCACHE_BLOCK_WIDTH), + .OPTION_DCACHE_SET_WIDTH(OPTION_DCACHE_SET_WIDTH), + .OPTION_DCACHE_WAYS(OPTION_DCACHE_WAYS), + .OPTION_DCACHE_LIMIT_WIDTH(OPTION_DCACHE_LIMIT_WIDTH), + .OPTION_DCACHE_SNOOP(OPTION_DCACHE_SNOOP) + ) + mor1kx_dcache + (/*AUTOINST*/ + // Outputs + .refill_o (dc_refill), // Templated + .refill_req_o (dc_refill_req), // Templated + .refill_done_o (dc_refill_done), // Templated + .cache_hit_o (dc_hit), // Templated + .cpu_err_o (dc_err), // Templated + .cpu_ack_o (dc_ack), // Templated + .cpu_dat_o (dc_ldat), // Templated + .snoop_hit_o (dc_snoop_hit), // Templated + .spr_bus_dat_o (spr_bus_dat_dc_o), // Templated + .spr_bus_ack_o (spr_bus_ack_dc_o), // Templated + // Inputs + .clk (clk), // Templated + .rst (rst), // Templated + .dc_dbus_err_i (dbus_err), // Templated + .dc_enable_i (dc_enabled), // Templated + .dc_access_i (dc_access), // Templated + .cpu_dat_i (lsu_sdat), // Templated + .cpu_adr_i (dc_adr), // Templated + .cpu_adr_match_i (dc_adr_match), // Templated + .cpu_req_i (dc_req), // Templated + .cpu_we_i (dc_we), // Templated + .cpu_bsel_i (dc_bsel), // Templated + .refill_allowed (dc_refill_allowed), // Templated + .wradr_i (dbus_adr), // Templated + .wrdat_i (dbus_dat_i), // Templated + .we_i (dbus_ack_i), // Templated + .snoop_adr_i (snoop_adr_i[31:0]), + .snoop_valid_i (snoop_valid), // Templated + .spr_bus_addr_i (spr_bus_addr_i[15:0]), + .spr_bus_we_i (spr_bus_we_i), + .spr_bus_stb_i (spr_bus_stb_i), + .spr_bus_dat_i (spr_bus_dat_i[OPTION_OPERAND_WIDTH-1:0])); +end else begin + assign dc_access = 0; + assign dc_refill = 0; + assign dc_refill_done = 0; + assign dc_refill_req = 0; + assign dc_ack = 0; + assign dc_err = 0; + assign dc_bsel = 0; + assign dc_we = 0; + assign dc_snoop_hit = 0; + assign dc_hit_o = 0; + assign dc_ldat = 0; +end + +endgenerate + +generate +if (FEATURE_DMMU!="NONE") begin : dmmu_gen + wire [OPTION_OPERAND_WIDTH-1:0] virt_addr; + wire dmmu_spr_bus_stb; + wire dmmu_enable; + + assign virt_addr = dc_adr; + + // small hack to delay dmmu spr reads by one cycle + // ideally the spr accesses should work so that the address is presented + // in execute stage and the delayed data should be available in control + // stage, but this is not how things currently work. + assign dmmu_spr_bus_stb = spr_bus_stb_i & (!padv_ctrl_i | spr_bus_we_i); + + assign tlb_reload_pagefault_clear = !ctrl_op_lsu; // use pipeline_flush_i? + + assign dmmu_enable = dmmu_enable_i & !pipeline_flush_i; + + /* mor1kx_dmmu AUTO_TEMPLATE ( + .enable_i (dmmu_enable), + .phys_addr_o (dmmu_phys_addr), + .cache_inhibit_o (dmmu_cache_inhibit), + .op_store_i (ctrl_op_lsu_store_i), + .op_load_i (ctrl_op_lsu_load_i), + .tlb_miss_o (tlb_miss), + .pagefault_o (pagefault), + .tlb_reload_req_o (tlb_reload_req), + .tlb_reload_busy_o (tlb_reload_busy), + .tlb_reload_addr_o (tlb_reload_addr), + .tlb_reload_pagefault_o (tlb_reload_pagefault), + .tlb_reload_ack_i (tlb_reload_ack), + .tlb_reload_data_i (tlb_reload_data), + .tlb_reload_pagefault_clear_i (tlb_reload_pagefault_clear), + .spr_bus_dat_o (spr_bus_dat_dmmu_o), + .spr_bus_ack_o (spr_bus_ack_dmmu_o), + .spr_bus_stb_i (dmmu_spr_bus_stb), + .virt_addr_i (virt_addr), + .virt_addr_match_i (ctrl_lsu_adr_i), + ); */ + mor1kx_dmmu + #( + .FEATURE_DMMU_HW_TLB_RELOAD(FEATURE_DMMU_HW_TLB_RELOAD), + .OPTION_OPERAND_WIDTH(OPTION_OPERAND_WIDTH), + .OPTION_DMMU_SET_WIDTH(OPTION_DMMU_SET_WIDTH), + .OPTION_DMMU_WAYS(OPTION_DMMU_WAYS) + ) + mor1kx_dmmu + (/*AUTOINST*/ + // Outputs + .phys_addr_o (dmmu_phys_addr), // Templated + .cache_inhibit_o (dmmu_cache_inhibit), // Templated + .tlb_miss_o (tlb_miss), // Templated + .pagefault_o (pagefault), // Templated + .tlb_reload_req_o (tlb_reload_req), // Templated + .tlb_reload_busy_o (tlb_reload_busy), // Templated + .tlb_reload_addr_o (tlb_reload_addr), // Templated + .tlb_reload_pagefault_o (tlb_reload_pagefault), // Templated + .spr_bus_dat_o (spr_bus_dat_dmmu_o), // Templated + .spr_bus_ack_o (spr_bus_ack_dmmu_o), // Templated + // Inputs + .clk (clk), + .rst (rst), + .enable_i (dmmu_enable), // Templated + .virt_addr_i (virt_addr), // Templated + .virt_addr_match_i (ctrl_lsu_adr_i), // Templated + .op_store_i (ctrl_op_lsu_store_i), // Templated + .op_load_i (ctrl_op_lsu_load_i), // Templated + .supervisor_mode_i (supervisor_mode_i), + .tlb_reload_ack_i (tlb_reload_ack), // Templated + .tlb_reload_data_i (tlb_reload_data), // Templated + .tlb_reload_pagefault_clear_i (tlb_reload_pagefault_clear), // Templated + .spr_bus_addr_i (spr_bus_addr_i[15:0]), + .spr_bus_we_i (spr_bus_we_i), + .spr_bus_stb_i (dmmu_spr_bus_stb), // Templated + .spr_bus_dat_i (spr_bus_dat_i[OPTION_OPERAND_WIDTH-1:0])); +end else begin + assign dmmu_cache_inhibit = 0; + assign tlb_miss = 0; + assign pagefault = 0; + assign tlb_reload_busy = 0; + assign tlb_reload_req = 0; + assign tlb_reload_pagefault = 0; +end +endgenerate + +endmodule // mor1kx_lsu_cappuccino diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_lsu_espresso.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_lsu_espresso.v new file mode 100644 index 0000000..872013a --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_lsu_espresso.v @@ -0,0 +1,275 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Load, store unit for espresso pipeline + + All combinatorial outputs to pipeline + Dbus interface request signal out synchronous + + 32-bit specific due to sign extension of results + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_lsu_espresso + (/*AUTOARG*/ + // Outputs + lsu_result_o, lsu_valid_o, lsu_except_dbus_o, lsu_except_align_o, + dbus_adr_o, dbus_req_o, dbus_dat_o, dbus_bsel_o, dbus_we_o, + dbus_burst_o, + // Inputs + clk, rst, padv_fetch_i, lsu_adr_i, rfb_i, op_lsu_load_i, + op_lsu_store_i, lsu_length_i, lsu_zext_i, exception_taken_i, + du_restart_i, stepping_i, next_fetch_done_i, dbus_err_i, + dbus_ack_i, dbus_dat_i + ); + + parameter OPTION_OPERAND_WIDTH = 32; + parameter OPTION_REGISTERED_IO = "NO"; + + input clk, rst; + + input padv_fetch_i; + // calculated address from ALU + input [OPTION_OPERAND_WIDTH-1:0] lsu_adr_i; + + // register file B in (store operand) + input [OPTION_OPERAND_WIDTH-1:0] rfb_i; + // from decode stage regs, indicate if load or store + input op_lsu_load_i; + input op_lsu_store_i; + input [1:0] lsu_length_i; + input lsu_zext_i; + + input exception_taken_i; + input du_restart_i; + input stepping_i; + input next_fetch_done_i; + + + output [OPTION_OPERAND_WIDTH-1:0] lsu_result_o; + output lsu_valid_o; + // exception output + output lsu_except_dbus_o; + output lsu_except_align_o; + + // interface to data bus + output [OPTION_OPERAND_WIDTH-1:0] dbus_adr_o; + output dbus_req_o; + output [OPTION_OPERAND_WIDTH-1:0] dbus_dat_o; + output [3:0] dbus_bsel_o; + output dbus_we_o; + output dbus_burst_o; + input dbus_err_i; + input dbus_ack_i; + input [OPTION_OPERAND_WIDTH-1:0] dbus_dat_i; + + reg [OPTION_OPERAND_WIDTH-1:0] dbus_dat_aligned; // comb. + reg [OPTION_OPERAND_WIDTH-1:0] dbus_dat_extended; // comb. + + + reg [OPTION_OPERAND_WIDTH-1:0] dbus_adr_r; + + reg [3:0] dbus_bsel; + + reg dbus_err_r; + + reg access_done; + + reg [OPTION_OPERAND_WIDTH-1:0] lsu_result_r; + + reg op_lsu; + + wire align_err_word; + wire align_err_short; + + wire align_err; + + wire except_align; + reg except_align_r; + + reg except_dbus; + reg execute_go; + + assign dbus_dat_o = (lsu_length_i == 2'b00) ? // byte access + {rfb_i[7:0],rfb_i[7:0],rfb_i[7:0],rfb_i[7:0]} : + (lsu_length_i == 2'b01) ? // halfword access + {rfb_i[15:0],rfb_i[15:0]} : + rfb_i; // word access + + assign align_err_word = |dbus_adr_o[1:0]; + assign align_err_short = dbus_adr_o[0]; + + + assign lsu_valid_o = dbus_ack_i | dbus_err_r| access_done; + assign lsu_except_dbus_o = dbus_err_r | except_dbus; + + assign align_err = (lsu_length_i == 2'b10) & align_err_word | + (lsu_length_i == 2'b01) & align_err_short; + + assign lsu_except_align_o = except_align_r; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + execute_go <= 0; + else + execute_go <= padv_fetch_i | (stepping_i & next_fetch_done_i); + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + access_done <= 0; + else if (padv_fetch_i | du_restart_i) + access_done <= 0; + else if (dbus_ack_i | dbus_err_r | lsu_except_align_o) + access_done <= 1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + except_align_r <= 0; + else if (exception_taken_i) + except_align_r <= 0; + else + except_align_r <= except_align; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + except_dbus <= 0; + else if (exception_taken_i) + except_dbus <= 0; + else if (dbus_err_r) + except_dbus <= 1; + + // Need to register address due to behavior of RF when source register is + // same as destination register - value changes after one cycle to the + // forwarding register's value, which is incorrect. + // So we save it on first cycle. + // TODO - perhaps detect in RF when this is case, and make it keep the + // output steady to avoid an extra address registering stage here. + always @(posedge clk `OR_ASYNC_RST) + if (rst) + dbus_adr_r <= 0; + else if (execute_go & (op_lsu_load_i | op_lsu_store_i)) + dbus_adr_r <= lsu_adr_i; + + // Big endian bus mapping + always @(*) + case (lsu_length_i) + 2'b00: // byte access + case(dbus_adr_o[1:0]) + 2'b00: + dbus_bsel = 4'b1000; + 2'b01: + dbus_bsel = 4'b0100; + 2'b10: + dbus_bsel = 4'b0010; + 2'b11: + dbus_bsel = 4'b0001; + endcase + 2'b01: // halfword access + case(dbus_adr_o[1]) + 1'b0: + dbus_bsel = 4'b1100; + 1'b1: + dbus_bsel = 4'b0011; + endcase + 2'b10, + 2'b11: + dbus_bsel = 4'b1111; + endcase + + assign dbus_bsel_o = dbus_bsel; + + assign dbus_we_o = op_lsu_store_i; + + // Select part of read word + // Can use registered address here, as it'll take at least one cycle for + // the data to come back, and by that time dbus_adr_r has the address + always @* + case(dbus_adr_r[1:0]) + 2'b00: + dbus_dat_aligned = dbus_dat_i; + 2'b01: + dbus_dat_aligned = {dbus_dat_i[23:0],8'd0}; + 2'b10: + dbus_dat_aligned = {dbus_dat_i[15:0],16'd0}; + 2'b11: + dbus_dat_aligned = {dbus_dat_i[7:0],24'd0}; + endcase // case (dbus_adr_r[1:0]) + + // Do appropriate extension + always @(*) + case({lsu_zext_i, lsu_length_i}) + 3'b100: // lbz + dbus_dat_extended = {24'd0,dbus_dat_aligned[31:24]}; + 3'b101: // lhz + dbus_dat_extended = {16'd0,dbus_dat_aligned[31:16]}; + 3'b000: // lbs + dbus_dat_extended = {{24{dbus_dat_aligned[31]}}, + dbus_dat_aligned[31:24]}; + 3'b001: // lhs + dbus_dat_extended = {{16{dbus_dat_aligned[31]}}, + dbus_dat_aligned[31:16]}; + default: + dbus_dat_extended = dbus_dat_aligned; + endcase + + // Register result incase writeback doesn't occur for a few cycles + // TODO - remove this - we should write straight into the RF! + always @(posedge clk) + if (dbus_ack_i & op_lsu_load_i) + lsu_result_r <= dbus_dat_extended; + + assign dbus_burst_o = 0; + + // Break up paths of signals which are usually pretty long + generate + if (OPTION_REGISTERED_IO!="NO") + begin : registered_io + + assign dbus_adr_o = dbus_adr_r; + + always @(posedge clk) + begin + dbus_err_r <= dbus_err_i; + op_lsu <= op_lsu_load_i | op_lsu_store_i; + end + + // Make sure padv_i isn't high because we'll be registering the + // fact that this cycle is an LSU op while it is + assign dbus_req_o = !execute_go & op_lsu & + !(except_align | except_align_r) & + !access_done; + + assign except_align = op_lsu & (op_lsu_load_i | op_lsu_store_i) & + align_err & !execute_go; + + end + else + begin : nonregistered_io + + assign dbus_adr_o = execute_go ? lsu_adr_i : dbus_adr_r; + + always @* + begin + dbus_err_r = dbus_err_i; + op_lsu = op_lsu_load_i | op_lsu_store_i; + end + + assign dbus_req_o = op_lsu & !except_align & !access_done; + + assign except_align = op_lsu & align_err; + + end + endgenerate + + assign lsu_result_o = access_done ? lsu_result_r : dbus_dat_extended; + +endmodule // mor1kx_lsu diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_pcu.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_pcu.v new file mode 100644 index 0000000..bf2ca7b --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_pcu.v @@ -0,0 +1,140 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx Perfomance Counters Unit + + Copyright (C) 2016 Authors + + Author(s): Alexey Baturo + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_pcu + #( + parameter OPTION_PERFCOUNTERS_NUM = 7 + ) + ( + input clk, + input rst, + + // SPR Bus interface + input spr_access_i, + input spr_we_i, + input spr_re_i, + input [15:0] spr_addr_i, + input [31:0] spr_dat_i, + output spr_bus_ack, + output [31:0] spr_dat_o, + + // Current cpu mode: user/supervisor + input spr_sys_mode_i, + // Events that can occur + input pcu_event_load_i, // track load insn + input pcu_event_store_i, // track store insn + input pcu_event_ifetch_i, // track insn fetch + input pcu_event_dcache_miss_i, // track data cache miss + input pcu_event_icache_miss_i, // track insn cache miss + input pcu_event_ifetch_stall_i, // track SOME stall + input pcu_event_lsu_stall_i, // track LSU stall + input pcu_event_brn_stall_i, // track brn miss + input pcu_event_dtlb_miss_i, // track data tlb miss + input pcu_event_itlb_miss_i, // track insn tlb miss + input pcu_event_datadep_stall_i // track SOME stall + ); + + // Registers + reg [31:0] pcu_pccr[0:OPTION_PERFCOUNTERS_NUM]; + reg [31:0] pcu_pcmr[0:OPTION_PERFCOUNTERS_NUM]; + + wire pcu_pccr_access; + wire pcu_pcmr_access; + + // check if we access pcu + // TODO: generate this signals according to present units + assign pcu_pccr_access = + spr_access_i & + ((`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCCR0_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCCR1_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCCR2_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCCR3_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCCR4_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCCR5_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCCR6_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCCR7_ADDR))); + + assign pcu_pcmr_access = + spr_access_i & + ((`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCMR0_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCMR1_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCMR2_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCMR3_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCMR4_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCMR5_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCMR6_ADDR)) | + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PCMR7_ADDR))); + + // put data on data bus + assign spr_bus_ack = spr_access_i; + assign spr_dat_o = (spr_access_i & pcu_pccr_access & spr_re_i) ? pcu_pccr[spr_addr_i[2:0]] : + (spr_access_i & pcu_pcmr_access & spr_re_i & spr_sys_mode_i) ? pcu_pcmr[spr_addr_i[2:0]] : + 0; + genvar pcu_num; + generate + for(pcu_num = 0; pcu_num < OPTION_PERFCOUNTERS_NUM + 1; pcu_num = pcu_num + 1) begin: pcu_generate + wire [`OR1K_PCMR_DDS:`OR1K_PCMR_LA] pcu_events_active; + wire [`OR1K_PCMR_DDS:`OR1K_PCMR_LA] pcu_events_hit; + + assign pcu_events_active = + (pcu_event_load_i << `OR1K_PCMR_LA) | + (pcu_event_store_i << `OR1K_PCMR_SA) | + (pcu_event_ifetch_i << `OR1K_PCMR_IF) | + (pcu_event_dcache_miss_i << `OR1K_PCMR_DCM) | + (pcu_event_icache_miss_i << `OR1K_PCMR_ICM) | + (pcu_event_ifetch_stall_i << `OR1K_PCMR_IFS) | + (pcu_event_lsu_stall_i << `OR1K_PCMR_LSUS) | + (pcu_event_brn_stall_i << `OR1K_PCMR_BS) | + (pcu_event_dtlb_miss_i << `OR1K_PCMR_DTLBM) | + (pcu_event_itlb_miss_i << `OR1K_PCMR_ITLBM) | + (pcu_event_datadep_stall_i << `OR1K_PCMR_DDS); + + assign pcu_events_hit = + pcu_events_active & pcu_pcmr[pcu_num]; + + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) begin + pcu_pccr[pcu_num] <= 32'd0; + pcu_pcmr[pcu_num] <= 32'd0 | 1 << `OR1K_PCMR_CP; + // we could write pcu registers only in system mode + end else if (spr_we_i && spr_sys_mode_i) begin + if (pcu_pccr_access & (spr_addr_i[2:0] == pcu_num)) + pcu_pccr[pcu_num] <= spr_dat_i; + // WPE are not implemented, hence we do not update WPE part + if (pcu_pcmr_access && (spr_addr_i[2:0] == pcu_num)) begin + pcu_pcmr[pcu_num][`OR1K_PCMR_DDS:`OR1K_PCMR_CISM] <= + spr_dat_i[`OR1K_PCMR_DDS:`OR1K_PCMR_CISM]; + end + end else if (((pcu_pcmr[pcu_num][`OR1K_PCMR_CISM] & spr_sys_mode_i) | + (pcu_pcmr[pcu_num][`OR1K_PCMR_CIUM] & ~spr_sys_mode_i))) begin + pcu_pccr[pcu_num] <= pcu_pccr[pcu_num] + + pcu_events_hit[`OR1K_PCMR_LA] + + pcu_events_hit[`OR1K_PCMR_SA] + + pcu_events_hit[`OR1K_PCMR_IF] + + pcu_events_hit[`OR1K_PCMR_DCM] + + pcu_events_hit[`OR1K_PCMR_ICM] + + pcu_events_hit[`OR1K_PCMR_IFS] + + pcu_events_hit[`OR1K_PCMR_LSUS] + + pcu_events_hit[`OR1K_PCMR_BS] + + pcu_events_hit[`OR1K_PCMR_DTLBM] + + pcu_events_hit[`OR1K_PCMR_ITLBM] + + pcu_events_hit[`OR1K_PCMR_DDS]; + end + end + end + endgenerate + +endmodule // mor1kx_pcu diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_pic.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_pic.v new file mode 100644 index 0000000..d0e25b1 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_pic.v @@ -0,0 +1,142 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx PIC + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_pic + (/*AUTOARG*/ + // Outputs + spr_picmr_o, spr_picsr_o, spr_bus_ack, spr_dat_o, + // Inputs + clk, rst, irq_i, spr_access_i, spr_we_i, spr_addr_i, spr_dat_i + ); + + parameter OPTION_PIC_TRIGGER="LEVEL"; + parameter OPTION_PIC_NMI_WIDTH = 0; + + input clk; + input rst; + + input [31:0] irq_i; + + output [31:0] spr_picmr_o; + output [31:0] spr_picsr_o; + + // SPR Bus interface + input spr_access_i; + input spr_we_i; + input [15:0] spr_addr_i; + input [31:0] spr_dat_i; + output spr_bus_ack; + output [31:0] spr_dat_o; + + // Registers + reg [31:0] spr_picmr; + reg [31:0] spr_picsr; + + wire spr_picmr_access; + wire spr_picsr_access; + + wire [31:0] irq_unmasked; + + assign spr_picmr_o = spr_picmr; + assign spr_picsr_o = spr_picsr; + + assign spr_picmr_access = + spr_access_i & + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PICMR_ADDR)); + assign spr_picsr_access = + spr_access_i & + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_PICSR_ADDR)); + + assign spr_bus_ack = spr_access_i; + assign spr_dat_o = (spr_access_i & spr_picsr_access) ? spr_picsr : + (spr_access_i & spr_picmr_access) ? spr_picmr : + 0; + + assign irq_unmasked = spr_picmr & irq_i; + + generate + + genvar irqline; + + if (OPTION_PIC_TRIGGER=="EDGE") begin : edge_triggered + reg [31:0] irq_unmasked_r; + wire [31:0] irq_unmasked_edge; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + irq_unmasked_r <= 0; + else + irq_unmasked_r <= irq_unmasked; + + for(irqline=0;irqline<32;irqline=irqline+1) begin: picgenerate + assign irq_unmasked_edge[irqline] = irq_unmasked[irqline] & + !irq_unmasked_r[irqline]; + + // PIC status register + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_picsr[irqline] <= 0; + // Set + else if (irq_unmasked_edge[irqline]) + spr_picsr[irqline] <= 1; + // Clear + else if (spr_we_i & spr_picsr_access & spr_dat_i[irqline]) + spr_picsr[irqline] <= 0; + end + end else if (OPTION_PIC_TRIGGER=="LEVEL") begin : level_triggered + for(irqline=0;irqline<32;irqline=irqline+1) + begin: picsrlevelgenerate + // PIC status register + always @(*) + spr_picsr[irqline] = irq_unmasked[irqline]; + end + end // if (OPTION_PIC_TRIGGER=="LEVEL") + + else if (OPTION_PIC_TRIGGER=="LATCHED_LEVEL") begin : latched_level + for(irqline=0;irqline<32;irqline=irqline+1) + begin: piclatchedlevelgenerate + // PIC status register + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_picsr[irqline] <= 0; + else if (spr_we_i && spr_picsr_access) + spr_picsr[irqline] <= irq_unmasked[irqline] | + spr_dat_i[irqline]; + else + spr_picsr[irqline] <= spr_picsr[irqline] | + irq_unmasked[irqline]; + end // block: picgenerate + end // if (OPTION_PIC_TRIGGER=="EDGE") + + else begin : invalid + initial begin + $display("Error - invalid PIC level detection option %s", + OPTION_PIC_TRIGGER); + $finish; + end + end // else: !if(OPTION_PIC_TRIGGER=="LEVEL") + endgenerate + + // PIC (un)mask register + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_picmr <= {{(32-OPTION_PIC_NMI_WIDTH){1'b0}}, + {OPTION_PIC_NMI_WIDTH{1'b1}}}; + else if (spr_we_i && spr_picmr_access) + spr_picmr <= {spr_dat_i[31:OPTION_PIC_NMI_WIDTH], + {OPTION_PIC_NMI_WIDTH{1'b1}}}; + +endmodule // mor1kx_pic diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_rf_cappuccino.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_rf_cappuccino.v new file mode 100644 index 0000000..91096d3 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_rf_cappuccino.v @@ -0,0 +1,366 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Register file for cappuccino pipeline + Handles reading the register file rams and register bypassing. + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + Stefan Kristiansson + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_rf_cappuccino + #( + parameter FEATURE_FASTCONTEXTS = "NONE", + parameter OPTION_RF_CLEAR_ON_INIT = 0, + parameter OPTION_RF_NUM_SHADOW_GPR = 0, + parameter OPTION_RF_ADDR_WIDTH = 5, + parameter OPTION_RF_WORDS = 32, + parameter FEATURE_DEBUGUNIT = "NONE", + parameter OPTION_OPERAND_WIDTH = 32 + ) + ( + input clk, + input rst, + + // pipeline control signal in + input padv_decode_i, + input padv_execute_i, + input padv_ctrl_i, + + + input decode_valid_i, + + input fetch_rf_adr_valid_i, + + // GPR numbers + input [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfa_adr_i, + input [OPTION_RF_ADDR_WIDTH-1:0] fetch_rfb_adr_i, + + input [OPTION_RF_ADDR_WIDTH-1:0] decode_rfa_adr_i, + input [OPTION_RF_ADDR_WIDTH-1:0] decode_rfb_adr_i, + + input [OPTION_RF_ADDR_WIDTH-1:0] execute_rfd_adr_i, + input [OPTION_RF_ADDR_WIDTH-1:0] ctrl_rfd_adr_i, + input [OPTION_RF_ADDR_WIDTH-1:0] wb_rfd_adr_i, + + // SPR interface + input [15:0] spr_bus_addr_i, + input spr_bus_stb_i, + input spr_bus_we_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_bus_dat_i, + output spr_gpr_ack_o, + output [OPTION_OPERAND_WIDTH-1:0] spr_gpr_dat_o, + + // Write back signal indications + input execute_rf_wb_i, + input ctrl_rf_wb_i, + input wb_rf_wb_i, + + input [OPTION_OPERAND_WIDTH-1:0] result_i, + input [OPTION_OPERAND_WIDTH-1:0] ctrl_alu_result_i, + + input pipeline_flush_i, + + output [OPTION_OPERAND_WIDTH-1:0] decode_rfa_o, + output [OPTION_OPERAND_WIDTH-1:0] decode_rfb_o, + output [OPTION_OPERAND_WIDTH-1:0] execute_rfa_o, + output [OPTION_OPERAND_WIDTH-1:0] execute_rfb_o + ); + +`include "mor1kx_utils.vh" + localparam RF_ADDR_WIDTH = calc_rf_addr_width(OPTION_RF_ADDR_WIDTH, + OPTION_RF_NUM_SHADOW_GPR); + + wire [OPTION_OPERAND_WIDTH-1:0] rfa_ram_o; + wire [OPTION_OPERAND_WIDTH-1:0] rfb_ram_o; + + reg [OPTION_OPERAND_WIDTH-1:0] wb_hazard_result; + reg [OPTION_OPERAND_WIDTH-1:0] execute_rfa; + reg [OPTION_OPERAND_WIDTH-1:0] execute_rfb; + + wire [RF_ADDR_WIDTH-1:0] rfa_rdad; + wire [RF_ADDR_WIDTH-1:0] rfb_rdad; + + wire rfa_rden; + wire rfb_rden; + + wire rf_wren; + wire [RF_ADDR_WIDTH-1:0] rf_wradr; + wire [OPTION_OPERAND_WIDTH-1:0] rf_wrdat; + + reg flushing; + + // Keep track of the flush signal, this is needed to not wrongly assert + // execute_hazard after an exception (or rfe) has happened. + // What happens in that case is that the instruction in execute stage is + // invalid until the next padv_decode, so it's execute_rfd_adr can not be + // used to evaluate the execute_hazard. + always @(posedge clk) + if (pipeline_flush_i) + flushing <= 1; + else if (padv_decode_i) + flushing <= 0; + + // Detect hazards + reg execute_hazard_a; + reg execute_hazard_b; + always @(posedge clk) + if (pipeline_flush_i) begin + execute_hazard_a <= 0; + execute_hazard_b <= 0; + end else if (padv_decode_i & !flushing) begin + execute_hazard_a <= execute_rf_wb_i & + (execute_rfd_adr_i == decode_rfa_adr_i); + execute_hazard_b <= execute_rf_wb_i & + (execute_rfd_adr_i == decode_rfb_adr_i); + end + + reg [OPTION_OPERAND_WIDTH-1:0] execute_hazard_result_r; + always @(posedge clk) + if (decode_valid_i) + execute_hazard_result_r <= ctrl_alu_result_i; + + wire [OPTION_OPERAND_WIDTH-1:0] execute_hazard_result; + assign execute_hazard_result = decode_valid_i ? ctrl_alu_result_i : + execute_hazard_result_r; + + reg ctrl_hazard_a; + reg ctrl_hazard_b; + always @(posedge clk) + if (padv_decode_i) begin + ctrl_hazard_a <= ctrl_rf_wb_i & (ctrl_rfd_adr_i == decode_rfa_adr_i); + ctrl_hazard_b <= ctrl_rf_wb_i & (ctrl_rfd_adr_i == decode_rfb_adr_i); + end + + reg [OPTION_OPERAND_WIDTH-1:0] ctrl_hazard_result_r; + always @(posedge clk) + if (decode_valid_i) + ctrl_hazard_result_r <= result_i; + + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_hazard_result; + assign ctrl_hazard_result = decode_valid_i ? result_i : ctrl_hazard_result_r; + + reg wb_hazard_a; + reg wb_hazard_b; + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + wb_hazard_a <= 0; + wb_hazard_b <= 0; + end else if (padv_decode_i) begin + wb_hazard_a <= wb_rf_wb_i & (wb_rfd_adr_i == decode_rfa_adr_i); + wb_hazard_b <= wb_rf_wb_i & (wb_rfd_adr_i == decode_rfb_adr_i); + end + + always @(posedge clk) + if (padv_decode_i) + wb_hazard_result <= result_i; + + // Bypassing to decode stage + // + // Since the decode stage doesn't read from the register file, we have to + // save any writes to the current read addresses in decode stage until + // fetch latch in new values. + // When fetch latch in the new values, and a writeback happens at the + // same time, we bypass that value too. + + // Port A + reg use_last_wb_a; + reg wb_to_decode_bypass_a; + reg [OPTION_OPERAND_WIDTH-1:0] wb_to_decode_result_a; + always @(posedge clk) + if (fetch_rf_adr_valid_i) begin + wb_to_decode_result_a <= result_i; + wb_to_decode_bypass_a <= wb_rf_wb_i & (wb_rfd_adr_i == fetch_rfa_adr_i); + use_last_wb_a <= 0; + end else if (wb_rf_wb_i) begin + if (decode_rfa_adr_i == wb_rfd_adr_i) begin + wb_to_decode_result_a <= result_i; + use_last_wb_a <= 1; + end + end + + wire execute_to_decode_bypass_a; + assign execute_to_decode_bypass_a = ctrl_rf_wb_i & + (ctrl_rfd_adr_i == decode_rfa_adr_i); + + wire ctrl_to_decode_bypass_a; + assign ctrl_to_decode_bypass_a = use_last_wb_a | wb_rf_wb_i & + (wb_rfd_adr_i == decode_rfa_adr_i); + + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_to_decode_result_a; + assign ctrl_to_decode_result_a = use_last_wb_a ? + wb_to_decode_result_a : result_i; + + // Port B + reg use_last_wb_b; + reg wb_to_decode_bypass_b; + reg [OPTION_OPERAND_WIDTH-1:0] wb_to_decode_result_b; + always @(posedge clk) + if (fetch_rf_adr_valid_i) begin + wb_to_decode_result_b <= result_i; + wb_to_decode_bypass_b <= wb_rf_wb_i & (wb_rfd_adr_i == fetch_rfb_adr_i); + use_last_wb_b <= 0; + end else if (wb_rf_wb_i) begin + if (decode_rfb_adr_i == wb_rfd_adr_i) begin + wb_to_decode_result_b <= result_i; + use_last_wb_b <= 1; + end + end + + wire execute_to_decode_bypass_b; + assign execute_to_decode_bypass_b = ctrl_rf_wb_i & + (ctrl_rfd_adr_i == decode_rfb_adr_i); + + wire ctrl_to_decode_bypass_b; + assign ctrl_to_decode_bypass_b = use_last_wb_b | wb_rf_wb_i & + (wb_rfd_adr_i == decode_rfb_adr_i); + + wire [OPTION_OPERAND_WIDTH-1:0] ctrl_to_decode_result_b; + assign ctrl_to_decode_result_b = use_last_wb_b ? + wb_to_decode_result_b : result_i; + + + assign decode_rfa_o = execute_to_decode_bypass_a ? ctrl_alu_result_i : + ctrl_to_decode_bypass_a ? ctrl_to_decode_result_a : + wb_to_decode_bypass_a ? wb_to_decode_result_a : + rfa_ram_o; + + assign decode_rfb_o = execute_to_decode_bypass_b ? ctrl_alu_result_i : + ctrl_to_decode_bypass_b ? ctrl_to_decode_result_b : + wb_to_decode_bypass_b ? wb_to_decode_result_b : + rfb_ram_o; + + assign execute_rfa_o = execute_hazard_a ? execute_hazard_result : + ctrl_hazard_a ? ctrl_hazard_result : + wb_hazard_a ? wb_hazard_result : + execute_rfa; + + assign execute_rfb_o = execute_hazard_b ? execute_hazard_result : + ctrl_hazard_b ? ctrl_hazard_result : + wb_hazard_b ? wb_hazard_result : + execute_rfb; + + always @(posedge clk) + if (padv_decode_i) begin + execute_rfa <= decode_rfa_o; + execute_rfb <= decode_rfb_o; + end + +generate +if (FEATURE_DEBUGUNIT!="NONE" || FEATURE_FASTCONTEXTS!="NONE" || + OPTION_RF_NUM_SHADOW_GPR > 0) begin + wire spr_gpr_we; + wire spr_gpr_re; + assign spr_gpr_we = (spr_bus_addr_i[15:9] == 7'h2) & + spr_bus_stb_i & spr_bus_we_i; + assign spr_gpr_re = (spr_bus_addr_i[15:9] == 7'h2) & + spr_bus_stb_i & !spr_bus_we_i & !padv_ctrl_i; + + reg spr_gpr_read_ack; + always @(posedge clk) + spr_gpr_read_ack <= spr_gpr_re; + + assign spr_gpr_ack_o = spr_gpr_we & !wb_rf_wb_i | + spr_gpr_re & spr_gpr_read_ack; + + wire [RF_ADDR_WIDTH-1:0] wb_rfd_adr_expand; + assign wb_rfd_adr_expand[OPTION_RF_ADDR_WIDTH-1:0] = wb_rfd_adr_i; + + assign rf_wren = wb_rf_wb_i | spr_gpr_we; + assign rf_wradr = wb_rf_wb_i ? wb_rfd_adr_expand : spr_bus_addr_i[RF_ADDR_WIDTH-1:0]; + assign rf_wrdat = wb_rf_wb_i ? result_i : spr_bus_dat_i; + + // Zero-pad unused parts of vector + if (OPTION_RF_NUM_SHADOW_GPR > 0) begin + assign wb_rfd_adr_expand[RF_ADDR_WIDTH-1:OPTION_RF_ADDR_WIDTH] = + {(RF_ADDR_WIDTH-OPTION_RF_ADDR_WIDTH){1'b0}}; + assign rfa_rdad[RF_ADDR_WIDTH-1:OPTION_RF_ADDR_WIDTH] = + {(RF_ADDR_WIDTH-OPTION_RF_ADDR_WIDTH){1'b0}}; + assign rfb_rdad[RF_ADDR_WIDTH-1:OPTION_RF_ADDR_WIDTH] = + {(RF_ADDR_WIDTH-OPTION_RF_ADDR_WIDTH){1'b0}}; + end + +end else begin + assign spr_gpr_ack_o = 1; + + assign rf_wren = wb_rf_wb_i; + assign rf_wradr = wb_rfd_adr_i; + assign rf_wrdat = result_i; +end +endgenerate + + assign rfa_rdad[OPTION_RF_ADDR_WIDTH-1:0] = fetch_rfa_adr_i; + assign rfb_rdad[OPTION_RF_ADDR_WIDTH-1:0] = fetch_rfb_adr_i; + assign rfa_rden = fetch_rf_adr_valid_i; + assign rfb_rden = fetch_rf_adr_valid_i; + + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH (RF_ADDR_WIDTH), + .DATA_WIDTH (OPTION_OPERAND_WIDTH), + .CLEAR_ON_INIT (OPTION_RF_CLEAR_ON_INIT), + .ENABLE_BYPASS (0) + ) + rfa + ( + .clk (clk), + .dout (rfa_ram_o), + .raddr (rfa_rdad), + .re (rfa_rden), + .waddr (rf_wradr), + .we (rf_wren), + .din (rf_wrdat) + ); + + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH (RF_ADDR_WIDTH), + .DATA_WIDTH (OPTION_OPERAND_WIDTH), + .CLEAR_ON_INIT (OPTION_RF_CLEAR_ON_INIT), + .ENABLE_BYPASS (0) + ) + rfb + ( + .clk (clk), + .dout (rfb_ram_o), + .raddr (rfb_rdad), + .re (rfb_rden), + .waddr (rf_wradr), + .we (rf_wren), + .din (rf_wrdat) + ); + +generate +if (FEATURE_DEBUGUNIT!="NONE" || FEATURE_FASTCONTEXTS!="NONE" || + OPTION_RF_NUM_SHADOW_GPR > 0) begin : rfspr_gen + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH (RF_ADDR_WIDTH), + .DATA_WIDTH (OPTION_OPERAND_WIDTH), + .CLEAR_ON_INIT (OPTION_RF_CLEAR_ON_INIT), + .ENABLE_BYPASS (0) + ) + rfspr + ( + .clk (clk), + .dout (spr_gpr_dat_o), + .raddr (spr_bus_addr_i[RF_ADDR_WIDTH-1:0]), + .re (1'b1), + .waddr (rf_wradr), + .we (rf_wren), + .din (rf_wrdat) + ); +end else begin + assign spr_gpr_dat_o = 0; +end +endgenerate + +endmodule // mor1kx_rf_cappuccino diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_rf_espresso.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_rf_espresso.v new file mode 100644 index 0000000..25a91d6 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_rf_espresso.v @@ -0,0 +1,180 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: Register file for espresso pipeline + + We get addresses for A and B read directly in from the instruction bus + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_rf_espresso + (/*AUTOARG*/ + // Outputs + rfa_o, rfb_o, + // Inputs + clk, rst, rfd_adr_i, rfa_adr_i, rfb_adr_i, rf_we_i, rf_re_i, + result_i + ); + + parameter OPTION_RF_ADDR_WIDTH = 5; + parameter OPTION_RF_WORDS = 32; + + parameter OPTION_OPERAND_WIDTH = 32; + + input clk, rst; + + + // GPR addresses + // These two directly from insn bus + input [OPTION_RF_ADDR_WIDTH-1:0] rfd_adr_i; + input [OPTION_RF_ADDR_WIDTH-1:0] rfa_adr_i; + // This one from the decode stage + input [OPTION_RF_ADDR_WIDTH-1:0] rfb_adr_i; + + // WE strobe from control stage + input rf_we_i; + + // Read enable strobe + input rf_re_i; + + input [OPTION_OPERAND_WIDTH-1:0] result_i; + + + output [OPTION_OPERAND_WIDTH-1:0] rfa_o; + output [OPTION_OPERAND_WIDTH-1:0] rfb_o; + + wire [OPTION_OPERAND_WIDTH-1:0] rfa_o_mux; + wire [OPTION_OPERAND_WIDTH-1:0] rfb_o_mux; + + + wire [OPTION_OPERAND_WIDTH-1:0] rfa_ram_o; + wire [OPTION_OPERAND_WIDTH-1:0] rfb_ram_o; + + reg [OPTION_OPERAND_WIDTH-1:0] result_last; + reg [OPTION_RF_ADDR_WIDTH-1:0] rfd_last; + reg [OPTION_RF_ADDR_WIDTH-1:0] rfd_r; + reg [OPTION_RF_ADDR_WIDTH-1:0] rfa_r; + reg [OPTION_RF_ADDR_WIDTH-1:0] rfb_r; + + wire rfa_o_use_last; + wire rfb_o_use_last; + reg rfa_o_using_last; + reg rfb_o_using_last; + + wire rfa_rden; + wire rfb_rden; + + wire rf_wren; + + // Read enables to make sure the last write-while-read propagates through + // once the use_last signal goes away (we might rely on the value remaining + // what it was, but the last registered result might get written again) so + // this forces a read to get that value out. + wire rfa_rden_for_last; + wire rfb_rden_for_last; + + // Avoid read-write + // Use when this instruction actually will write to its destination + // register. + assign rfa_o_use_last = (rfd_last == rfa_r); + assign rfb_o_use_last = (rfd_last == rfb_r); + + assign rfa_o = rfa_o_use_last ? result_last : rfa_ram_o; + + assign rfb_o = rfb_o_use_last ? result_last : rfb_ram_o; + + assign rfa_rden_for_last = (rfa_o_use_last & !rf_re_i); + assign rfb_rden_for_last = (rfb_o_use_last & !rf_re_i); + + assign rfa_rden = rf_re_i | rfa_rden_for_last; + assign rfb_rden = rf_re_i | rfb_rden_for_last; + + assign rf_wren = rf_we_i; + + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + rfa_r <= 0; + rfb_r <= 0; + rfd_r <= 0; + end + else if (rf_re_i) + begin + rfa_r <= rfa_adr_i; + rfb_r <= rfb_adr_i; + rfd_r <= rfd_adr_i; + end + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + rfd_last <= 0; + else if (rf_wren) + rfd_last <= rfd_adr_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + result_last <= 0; + else if (rf_wren) + result_last <= result_i; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) begin + rfa_o_using_last <= 0; + rfb_o_using_last <= 0; + end + else begin + if (!rfa_o_using_last) + rfa_o_using_last <= rfa_o_use_last & !rfa_rden; + else if (rfa_rden) + rfa_o_using_last <= 0; + + if (!rfb_o_using_last) + rfb_o_using_last <= rfb_o_use_last & !rfb_rden; + else if (rfb_rden) + rfb_o_using_last <= 0; + end + + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH (OPTION_RF_ADDR_WIDTH), + .DATA_WIDTH (OPTION_OPERAND_WIDTH), + .ENABLE_BYPASS (0) + ) + rfa + ( + .clk (clk), + .dout (rfa_ram_o), + .raddr (rfa_adr_i), + .re (rfa_rden), + .waddr (rfd_adr_i), + .we (rf_wren), + .din (result_i) + ); + + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH (OPTION_RF_ADDR_WIDTH), + .DATA_WIDTH (OPTION_OPERAND_WIDTH), + .ENABLE_BYPASS (0) + ) + rfb + ( + .clk (clk), + .dout (rfb_ram_o), + .raddr (rfb_adr_i), + .re (rfb_rden), + .waddr (rfd_adr_i), + .we (rf_wren), + .din (result_i) + ); + +endmodule // mor1kx_execute_alu diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_simple_dpram_sclk.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_simple_dpram_sclk.v new file mode 100644 index 0000000..8af763f --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_simple_dpram_sclk.v @@ -0,0 +1,72 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: + Simple single clocked dual port ram (separate read and write ports), + with optional bypass logic. + + Copyright (C) 2012 Stefan Kristiansson + + ******************************************************************************/ + +module mor1kx_simple_dpram_sclk + #( + parameter ADDR_WIDTH = 32, + parameter DATA_WIDTH = 32, + parameter CLEAR_ON_INIT = 0, + parameter ENABLE_BYPASS = 1 + ) + ( + input clk, + input [ADDR_WIDTH-1:0] raddr, + input re, + input [ADDR_WIDTH-1:0] waddr, + input we, + input [DATA_WIDTH-1:0] din, + output [DATA_WIDTH-1:0] dout + ); + + reg [DATA_WIDTH-1:0] mem[(1< + + ******************************************************************************/ +`include "mor1kx-defines.v" + +module mor1kx_store_buffer + #( + parameter DEPTH_WIDTH = 4, + parameter OPTION_OPERAND_WIDTH = 32 + ) + ( + input clk, + input rst, + + input [OPTION_OPERAND_WIDTH-1:0] pc_i, + input [OPTION_OPERAND_WIDTH-1:0] adr_i, + input [OPTION_OPERAND_WIDTH-1:0] dat_i, + input [OPTION_OPERAND_WIDTH/8-1:0] bsel_i, + input atomic_i, + input write_i, + + output [OPTION_OPERAND_WIDTH-1:0] pc_o, + output [OPTION_OPERAND_WIDTH-1:0] adr_o, + output [OPTION_OPERAND_WIDTH-1:0] dat_o, + output [OPTION_OPERAND_WIDTH/8-1:0] bsel_o, + output atomic_o, + input read_i, + + output full_o, + output empty_o + ); + + // The fifo stores address + data + byte sel + pc + atomic + localparam FIFO_DATA_WIDTH = OPTION_OPERAND_WIDTH*3 + + OPTION_OPERAND_WIDTH/8 + 1; + + wire [FIFO_DATA_WIDTH-1:0] fifo_dout; + wire [FIFO_DATA_WIDTH-1:0] fifo_din; + + reg [DEPTH_WIDTH:0] write_pointer; + reg [DEPTH_WIDTH:0] read_pointer; + + assign fifo_din = {adr_i, dat_i, bsel_i, pc_i, atomic_i}; + assign {adr_o, dat_o, bsel_o, pc_o, atomic_o} = fifo_dout; + + assign full_o = (write_pointer[DEPTH_WIDTH] != read_pointer[DEPTH_WIDTH]) && + (write_pointer[DEPTH_WIDTH-1:0] == read_pointer[DEPTH_WIDTH-1:0]); + assign empty_o = write_pointer == read_pointer; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + write_pointer <= 0; + else if (write_i) + write_pointer <= write_pointer + 1'd1; + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + read_pointer <= 0; + else if (read_i) + read_pointer <= read_pointer + 1'd1; + + mor1kx_simple_dpram_sclk + #( + .ADDR_WIDTH(DEPTH_WIDTH), + .DATA_WIDTH(FIFO_DATA_WIDTH), + .ENABLE_BYPASS(1) + ) + fifo_ram + ( + .clk (clk), + .dout (fifo_dout), + .raddr (read_pointer[DEPTH_WIDTH-1:0]), + .re (read_i), + .waddr (write_pointer[DEPTH_WIDTH-1:0]), + .we (write_i), + .din (fifo_din) + ); + +endmodule diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ticktimer.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ticktimer.v new file mode 100644 index 0000000..f0eb5b5 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_ticktimer.v @@ -0,0 +1,90 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: mor1kx tick timer unit + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_ticktimer + ( + input clk, + input rst, + + output [31:0] spr_ttmr_o, + output [31:0] spr_ttcr_o, + + // SPR Bus interface + input spr_access_i, + input spr_we_i, + input [15:0] spr_addr_i, + input [31:0] spr_dat_i, + output spr_bus_ack, + output [31:0] spr_dat_o + ); + + // Registers + reg [31:0] spr_ttmr; + reg [31:0] spr_ttcr; + + wire spr_ttmr_access; + wire spr_ttcr_access; + + // ttcr control wires + wire ttcr_clear; + wire ttcr_run; + wire ttcr_match; + + assign spr_ttmr_o = spr_ttmr; + assign spr_ttcr_o = spr_ttcr; + + assign spr_ttmr_access = + spr_access_i & + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_TTMR_ADDR)); + assign spr_ttcr_access = + spr_access_i & + (`SPR_OFFSET(spr_addr_i) == `SPR_OFFSET(`OR1K_SPR_TTCR_ADDR)); + + assign spr_bus_ack = spr_access_i; + assign spr_dat_o = (spr_access_i & spr_ttcr_access) ? spr_ttcr : + (spr_access_i & spr_ttmr_access) ? spr_ttmr : 0; + + assign ttcr_match = spr_ttcr[27:0] == spr_ttmr[27:0]; + + // Timer SPR control + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_ttmr <= 0; + else if (spr_we_i & spr_ttmr_access) + spr_ttmr <= spr_dat_i[31:0]; + else if (ttcr_match & spr_ttmr[29]) + spr_ttmr[28] <= 1; // Generate interrupt + + // Modes (spr_ttmr[31:30]): + // 00 Tick timer is disabled. + // 01 Timer is restarted on ttcr_match. + // 10 Timer stops when ttcr_match is true. + // 11 Timer does not stop when ttcr_match is true + assign ttcr_clear = (spr_ttmr[31:30] == 2'b01) & ttcr_match; + assign ttcr_run = (spr_ttmr[31:30] != 2'b00) & !ttcr_match | + (spr_ttmr[31:30] == 2'b11); + + always @(posedge clk `OR_ASYNC_RST) + if (rst) + spr_ttcr <= 0; + else if (spr_we_i & spr_ttcr_access) + spr_ttcr <= spr_dat_i[31:0]; + else if (ttcr_clear) + spr_ttcr <= 0; + else if (ttcr_run) + spr_ttcr <= spr_ttcr + 1; + +endmodule // mor1kx_ticktimer diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_true_dpram_sclk.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_true_dpram_sclk.v new file mode 100644 index 0000000..ca2f02d --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_true_dpram_sclk.v @@ -0,0 +1,60 @@ +/****************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: True dual port ram with dual clock's + + Copyright (C) 2013 Stefan Kristiansson + + ******************************************************************************/ + +module mor1kx_true_dpram_sclk + #( + parameter ADDR_WIDTH = 32, + parameter DATA_WIDTH = 32 + ) + ( + /* Port A */ + input clk_a, + input [ADDR_WIDTH-1:0] addr_a, + input we_a, + input [DATA_WIDTH-1:0] din_a, + output [DATA_WIDTH-1:0] dout_a, + + /* Port B */ + input clk_b, + input [ADDR_WIDTH-1:0] addr_b, + input we_b, + input [DATA_WIDTH-1:0] din_b, + output [DATA_WIDTH-1:0] dout_b + ); + + reg [DATA_WIDTH-1:0] mem[(1< + + ******************************************************************************/ + +`ifndef _MOR1KX_UTILS_VH_ +`define _MOR1KX_UTILS_VH_ 1 +// +// clog2 - replacement for $clog for tools that doesn't support verilog 2005. +// However, icarus doesn't support constant user functions, so it has to be +// implemened with a bit of `define trickery. +// +`ifdef __ICARUS__ +`define clog2 $clog2 +`else +`define clog2 clog2 +`endif + +`endif // _MOR1KX_UTILS_VH_ + +function integer clog2; +input integer in; +begin + in = in - 1; + for (clog2 = 0; in > 0; clog2=clog2+1) + in = in >> 1; +end +endfunction + +// +// Find First 1 - Start from MSB and count downwards, returns 0 when no bit set +// +function integer ff1; +input integer in; +input integer width; +integer i; +begin + ff1 = 0; + for (i = width-1; i >= 0; i=i-1) begin + if (in[i]) + ff1 = i; + end +end +endfunction + +// +// Find Last 1 - Start from LSB and count upwards, returns 0 when no bit set +// +function integer fl1; +input integer in; +input integer width; +integer i; +begin + fl1 = 0; + for (i = 0; i < width; i=i+1) begin + if (in[i]) + fl1 = i; + end +end +endfunction + +// +// Reverse bits in a vector +// +function integer reverse_bits; +input integer in; +input integer width; +integer i; +begin + for (i = 0; i < width; i=i+1) begin + reverse_bits[width-i] = in[i]; + end +end +endfunction + +// +// Reverse bytes in a vector +// +function integer reverse_bytes; +input integer in; +input integer width; +integer i; +begin + for (i = 0; i < width; i=i+8) begin + reverse_bytes[(width-1)-i-:8] = in[i+:8]; + end +end +endfunction + +// +// Calculate register file address width, considers shadow registers, used in +// rf and cpu. +// +function integer calc_rf_addr_width; +input integer rf_addr_width; +input integer rf_num_shadow_gpr; +begin + if (rf_num_shadow_gpr == 0) + calc_rf_addr_width = rf_addr_width; + else + calc_rf_addr_width = rf_addr_width + + ((rf_num_shadow_gpr == 1) ? 1 : `clog2(rf_num_shadow_gpr)); +end +endfunction + + diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_wb_mux_cappuccino.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_wb_mux_cappuccino.v new file mode 100644 index 0000000..2d571b2 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_wb_mux_cappuccino.v @@ -0,0 +1,55 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: RF writeback mux + + Choose between ALU and LSU input. + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_wb_mux_cappuccino + #( + parameter OPTION_OPERAND_WIDTH = 32 + ) + ( + input clk, + input rst, + + input [OPTION_OPERAND_WIDTH-1:0] alu_result_i, + input [OPTION_OPERAND_WIDTH-1:0] lsu_result_i, + input [OPTION_OPERAND_WIDTH-1:0] mul_result_i, + input [OPTION_OPERAND_WIDTH-1:0] spr_i, + + output [OPTION_OPERAND_WIDTH-1:0] rf_result_o, + + input op_mul_i, + input op_lsu_load_i, + input op_mfspr_i + ); + + reg [OPTION_OPERAND_WIDTH-1:0] rf_result; + reg wb_op_mul; + + assign rf_result_o = wb_op_mul ? mul_result_i : rf_result; + + always @(posedge clk) + if (op_mfspr_i) + rf_result <= spr_i; + else if (op_lsu_load_i) + rf_result <= lsu_result_i; + else + rf_result <= alu_result_i; + + always @(posedge clk) + wb_op_mul <= op_mul_i; + +endmodule // mor1kx_wb_mux_cappuccino diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_wb_mux_espresso.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_wb_mux_espresso.v new file mode 100644 index 0000000..82a6996 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/mor1kx_wb_mux_espresso.v @@ -0,0 +1,53 @@ +/* **************************************************************************** + This Source Code Form is subject to the terms of the + Open Hardware Description License, v. 1.0. If a copy + of the OHDL was not distributed with this file, You + can obtain one at http://juliusbaxter.net/ohdl/ohdl.txt + + Description: RF writeback mux for espresso pipeline + + Choose between ALU and LSU input. All combinatorial + + Copyright (C) 2012 Authors + + Author(s): Julius Baxter + +***************************************************************************** */ + +`include "mor1kx-defines.v" + +module mor1kx_wb_mux_espresso + (/*AUTOARG*/ + // Outputs + rf_result_o, + // Inputs + clk, rst, alu_result_i, lsu_result_i, ppc_i, pc_fetch_next_i, + spr_i, op_jal_i, op_lsu_load_i, op_mfspr_i + ); + + parameter OPTION_OPERAND_WIDTH = 32; + + input clk, rst; + + input [OPTION_OPERAND_WIDTH-1:0] alu_result_i; + input [OPTION_OPERAND_WIDTH-1:0] lsu_result_i; + input [OPTION_OPERAND_WIDTH-1:0] ppc_i; + input [OPTION_OPERAND_WIDTH-1:0] pc_fetch_next_i; + input [OPTION_OPERAND_WIDTH-1:0] spr_i; + + output [OPTION_OPERAND_WIDTH-1:0] rf_result_o; + + input op_jal_i; + input op_lsu_load_i; + input op_mfspr_i; + + + assign rf_result_o = op_lsu_load_i ? lsu_result_i : + op_mfspr_i ? spr_i : + // Use the PC we've calcuated from the fetch unit + // to save inferring a 32-bit adder here like we + // would if we did "ppc_i + 8" + op_jal_i ? pc_fetch_next_i: + alu_result_i; + +endmodule // mor1kx_wb_mux_espresso diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_addsub.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_addsub.v new file mode 100644 index 0000000..9db4a46 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_addsub.v @@ -0,0 +1,338 @@ +////////////////////////////////////////////////////////////////////// +// // +// pfpu32_addsub // +// // +// This file is part of the mor1kx project // +// https://github.com/openrisc/mor1kx // +// // +// Description // +// addition/subtraction pipeline for single precision floating // +// point numbers // +// // +// Author(s): // +// - Original design (FPU100) - // +// Jidan Al-eryani, jidan@gmx.net // +// - Conv. to Verilog and inclusion in OR1200 - // +// Julius Baxter, julius@opencores.org // +// - Update for mor1kx, // +// bug fixing and further development - // +// Andrey Bacherov, avbacherov@opencores.org // +// // +////////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2006, 2010, 2014 // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY // +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS // +// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR // +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, // +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES // +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE // +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR // +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT // +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // +// POSSIBILITY OF SUCH DAMAGE. // +////////////////////////////////////////////////////////////////////// + +`include "mor1kx-defines.v" + + +module pfpu32_addsub +( + input clk, + input rst, + input flush_i, // flushe pipe + input adv_i, // advance pipe + input start_i, // start add/sub + input is_sub_i, // 1: substruction, 0: addition + // input 'a' related values + input signa_i, + input [9:0] exp10a_i, + input [23:0] fract24a_i, + input infa_i, + // input 'b' related values + input signb_i, + input [9:0] exp10b_i, + input [23:0] fract24b_i, + input infb_i, + // 'a'/'b' related + input snan_i, + input qnan_i, + input anan_sign_i, + input addsub_agtb_i, + input addsub_aeqb_i, + // outputs + output reg add_rdy_o, // ready + output reg add_sign_o, // signum + output reg add_sub_0_o, // flag that actual substruction is performed and result is zero + output reg [4:0] add_shl_o, // do left shift in align stage + output reg [9:0] add_exp10shl_o, // exponent for left shift align + output reg [9:0] add_exp10sh0_o, // exponent for no shift in align + output reg [27:0] add_fract28_o, // fractional with appended {r,s} bits + output reg add_inv_o, // invalid operation flag + output reg add_inf_o, // infinity output reg + output reg add_snan_o, // signaling NaN output reg + output reg add_qnan_o, // quiet NaN output reg + output reg add_anan_sign_o // signum for output nan +); + /* + Any stage's output is registered. + Definitions: + s??o_name - "S"tage number "??", "O"utput + s??t_name - "S"tage number "??", "T"emporary (internally) + */ + + /* Stage #1: pre addition / substruction align */ + + // detection of some exceptions + // inf - inf -> invalid operation; snan output + wire s1t_inv = infa_i & infb_i & + (signa_i ^ (is_sub_i ^ signb_i)); + // inf input + wire s1t_inf_i = infa_i | infb_i; + + // signums for calculation + wire s1t_calc_signa = signa_i; + wire s1t_calc_signb = (signb_i ^ is_sub_i); + + // not shifted operand and its signum + wire [23:0] s1t_fract24_nsh = + addsub_agtb_i ? fract24a_i : fract24b_i; + + // operand for right shift + wire [23:0] s1t_fract24_fsh = + addsub_agtb_i ? fract24b_i : fract24a_i; + + // shift amount + wire [9:0] s1t_exp_diff = + addsub_agtb_i ? (exp10a_i - exp10b_i) : + (exp10b_i - exp10a_i); + + // limiter by 31 + wire [4:0] s1t_shr = s1t_exp_diff[4:0] | {5{|s1t_exp_diff[9:5]}}; + + // stage #1 outputs + // input related + reg s1o_inv, s1o_inf_i, + s1o_snan_i, s1o_qnan_i, s1o_anan_i_sign; + // computation related + reg s1o_aeqb; + reg [4:0] s1o_shr; + reg s1o_sign_nsh; + reg s1o_op_sub; + reg [9:0] s1o_exp10c; + reg [23:0] s1o_fract24_nsh; + reg [23:0] s1o_fract24_fsh; + // registering + always @(posedge clk) begin + if(adv_i) begin + // input related + s1o_inv <= s1t_inv; + s1o_inf_i <= s1t_inf_i; + s1o_snan_i <= snan_i; + s1o_qnan_i <= qnan_i; + s1o_anan_i_sign <= anan_sign_i; + // computation related + s1o_aeqb <= addsub_aeqb_i; + s1o_shr <= s1t_shr & {5{~s1t_inf_i}}; + s1o_sign_nsh <= addsub_agtb_i ? s1t_calc_signa : s1t_calc_signb; + s1o_op_sub <= s1t_calc_signa ^ s1t_calc_signb; + s1o_exp10c <= addsub_agtb_i ? exp10a_i : exp10b_i; + s1o_fract24_nsh <= s1t_fract24_nsh & {24{~s1t_inf_i}}; + s1o_fract24_fsh <= s1t_fract24_fsh & {24{~s1t_inf_i}}; + end // advance + end // posedge clock + + // ready is special case + reg s1o_ready; + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + s1o_ready <= 0; + else if(flush_i) + s1o_ready <= 0; + else if(adv_i) + s1o_ready <= start_i; + end // posedge clock + + + /* Stage 2: multiplex and shift */ + + + // shifter + wire [25:0] s2t_fract26_fsh = {s1o_fract24_fsh,2'd0}; + wire [25:0] s2t_fract26_shr = s2t_fract26_fsh >> s1o_shr; + + // sticky + reg s2t_sticky; + always @(s1o_shr or s1o_fract24_fsh) begin + case(s1o_shr) + 5'd0, 5'd1, 5'd2 : s2t_sticky = 1'b0; // two added zero bits + 5'd3 : s2t_sticky = s1o_fract24_fsh[0]; + 5'd4 : s2t_sticky = |s1o_fract24_fsh[1:0]; + 5'd5 : s2t_sticky = |s1o_fract24_fsh[2:0]; + 5'd6 : s2t_sticky = |s1o_fract24_fsh[3:0]; + 5'd7 : s2t_sticky = |s1o_fract24_fsh[4:0]; + 5'd8 : s2t_sticky = |s1o_fract24_fsh[5:0]; + 5'd9 : s2t_sticky = |s1o_fract24_fsh[6:0]; + 5'd10: s2t_sticky = |s1o_fract24_fsh[7:0]; + 5'd11: s2t_sticky = |s1o_fract24_fsh[8:0]; + 5'd12: s2t_sticky = |s1o_fract24_fsh[9:0]; + 5'd13: s2t_sticky = |s1o_fract24_fsh[10:0]; + 5'd14: s2t_sticky = |s1o_fract24_fsh[11:0]; + 5'd15: s2t_sticky = |s1o_fract24_fsh[12:0]; + 5'd16: s2t_sticky = |s1o_fract24_fsh[13:0]; + 5'd17: s2t_sticky = |s1o_fract24_fsh[14:0]; + 5'd18: s2t_sticky = |s1o_fract24_fsh[15:0]; + 5'd19: s2t_sticky = |s1o_fract24_fsh[16:0]; + 5'd20: s2t_sticky = |s1o_fract24_fsh[17:0]; + 5'd21: s2t_sticky = |s1o_fract24_fsh[18:0]; + 5'd22: s2t_sticky = |s1o_fract24_fsh[19:0]; + 5'd23: s2t_sticky = |s1o_fract24_fsh[20:0]; + 5'd24: s2t_sticky = |s1o_fract24_fsh[21:0]; + 5'd25: s2t_sticky = |s1o_fract24_fsh[22:0]; + default: s2t_sticky = |s1o_fract24_fsh[23:0]; + endcase + end + + // add/sub of non-shifted and shifted operands + wire [27:0] s2t_fract28_shr = {1'b0,s2t_fract26_shr,s2t_sticky}; + + wire [27:0] s2t_fract28_add = {1'b0,s1o_fract24_nsh,3'd0} + + (s2t_fract28_shr ^ {28{s1o_op_sub}}) + + {27'd0,s1o_op_sub}; + + + // stage #2 outputs + // input related + reg s2o_inv, s2o_inf_i, + s2o_snan_i, s2o_qnan_i, s2o_anan_i_sign; + // computational related + reg s2o_signc; + reg [9:0] s2o_exp10c; + reg [26:0] s2o_fract27; + reg s2o_sub_0; // actual operation is substruction and the result is zero + reg s2o_sticky; // rounding support + // registering + always @(posedge clk) begin + if(adv_i) begin + // input related + s2o_inv <= s1o_inv; + s2o_inf_i <= s1o_inf_i; + s2o_snan_i <= s1o_snan_i; + s2o_qnan_i <= s1o_qnan_i; + s2o_anan_i_sign <= s1o_anan_i_sign; + // computation related + s2o_signc <= s1o_sign_nsh; + s2o_exp10c <= s1o_exp10c; + s2o_fract27 <= s2t_fract28_add[27:1]; + s2o_sub_0 <= s1o_aeqb & s1o_op_sub; + s2o_sticky <= s2t_sticky; + end // advance + end // posedge clock + + // ready is special case + reg s2o_ready; + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + s2o_ready <= 0; + else if(flush_i) + s2o_ready <= 0; + else if(adv_i) + s2o_ready <= s1o_ready; + end // posedge clock + + + /* Stage 4: update exponent */ + + + // for possible left shift + // [26] bit is right shift flag + reg [4:0] s3t_nlz; + always @(s2o_fract27) begin + casez(s2o_fract27) + 27'b1??????????????????????????: s3t_nlz <= 0; // [26] bit: shift right + 27'b01?????????????????????????: s3t_nlz <= 0; // 1 is in place + 27'b001????????????????????????: s3t_nlz <= 1; + 27'b0001???????????????????????: s3t_nlz <= 2; + 27'b00001??????????????????????: s3t_nlz <= 3; + 27'b000001?????????????????????: s3t_nlz <= 4; + 27'b0000001????????????????????: s3t_nlz <= 5; + 27'b00000001???????????????????: s3t_nlz <= 6; + 27'b000000001??????????????????: s3t_nlz <= 7; + 27'b0000000001?????????????????: s3t_nlz <= 8; + 27'b00000000001????????????????: s3t_nlz <= 9; + 27'b000000000001???????????????: s3t_nlz <= 10; + 27'b0000000000001??????????????: s3t_nlz <= 11; + 27'b00000000000001?????????????: s3t_nlz <= 12; + 27'b000000000000001????????????: s3t_nlz <= 13; + 27'b0000000000000001???????????: s3t_nlz <= 14; + 27'b00000000000000001??????????: s3t_nlz <= 15; + 27'b000000000000000001?????????: s3t_nlz <= 16; + 27'b0000000000000000001????????: s3t_nlz <= 17; + 27'b00000000000000000001???????: s3t_nlz <= 18; + 27'b000000000000000000001??????: s3t_nlz <= 19; + 27'b0000000000000000000001?????: s3t_nlz <= 20; + 27'b00000000000000000000001????: s3t_nlz <= 21; + 27'b000000000000000000000001???: s3t_nlz <= 22; + 27'b0000000000000000000000001??: s3t_nlz <= 23; + 27'b00000000000000000000000001?: s3t_nlz <= 24; + 27'b000000000000000000000000001: s3t_nlz <= 25; + 27'b000000000000000000000000000: s3t_nlz <= 0; // zero result + endcase + end // always + + // left shift amount and corrected exponent + wire [4:0] s3t_nlz_m1 = (s3t_nlz - 5'd1); + wire [9:0] s3t_exp10c_m1 = s2o_exp10c - 10'd1; + wire [9:0] s3t_exp10c_mz = s2o_exp10c - {5'd0,s3t_nlz}; + wire [4:0] s3t_shl; + wire [9:0] s3t_exp10shl; + assign {s3t_shl,s3t_exp10shl} = + // shift isn't needed or impossible + (~(|s3t_nlz) | (s2o_exp10c == 10'd1)) ? + {5'd0,s2o_exp10c} : + // normalization is possible + (s2o_exp10c > s3t_nlz) ? {s3t_nlz,s3t_exp10c_mz} : + // denormalized cases + (s2o_exp10c == s3t_nlz) ? {s3t_nlz_m1,10'd1} : + {s3t_exp10c_m1[4:0],10'd1}; + + + // registering output + always @(posedge clk) begin + if(adv_i) begin + // input related + add_inv_o <= s2o_inv; + add_inf_o <= s2o_inf_i; + add_snan_o <= s2o_snan_i; + add_qnan_o <= s2o_qnan_i; + add_anan_sign_o <= s2o_anan_i_sign; + // computation related + add_sign_o <= s2o_signc; + add_sub_0_o <= s2o_sub_0; + add_shl_o <= s3t_shl; + add_exp10shl_o <= s3t_exp10shl; + add_exp10sh0_o <= s2o_exp10c; + add_fract28_o <= {s2o_fract27,s2o_sticky}; + end // advance + end // posedge clock + + // ready is special case + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + add_rdy_o <= 0; + else if(flush_i) + add_rdy_o <= 0; + else if(adv_i) + add_rdy_o <= s2o_ready; + end // posedge clock + +endmodule // pfpu32_addsub diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_cmp.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_cmp.v new file mode 100644 index 0000000..7b51987 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_cmp.v @@ -0,0 +1,202 @@ +///////////////////////////////////////////////////////////////////// +//// //// +//// pfpu32_cmp //// +//// 32-bit floating point comparision //// +//// //// +//// Author: Rudolf Usselmann //// +//// rudi@asics.ws //// +//// //// +//// Modified by Julius Baxter, July, 2010 //// +//// julius.baxter@orsoc.se //// +//// //// +//// Update for mor1kx, bug fixing and further development //// +//// Andrey Bacherov, 2014, //// +//// avbacherov@opencores.org //// +//// //// +///////////////////////////////////////////////////////////////////// +//// //// +//// Copyright (C) 2000 Rudolf Usselmann //// +//// rudi@asics.ws //// +//// //// +//// This source file may be used and distributed without //// +//// restriction provided that this copyright statement is not //// +//// removed from the file and that any derivative work contains //// +//// the original copyright notice and the associated disclaimer.//// +//// //// +//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// +//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// +//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// +//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// +//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// +//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// +//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// +//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// +//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// +//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// +//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// +//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// +//// POSSIBILITY OF SUCH DAMAGE. //// +//// //// +///////////////////////////////////////////////////////////////////// + +`include "mor1kx-defines.v" + +/* completely combinatorial module */ + +module pfpu32_fcmp +( + input fpu_op_is_comp_i, + input [`OR1K_FPUOP_GENERIC_CMP_WIDTH-1:0] generic_cmp_opc_i, // ordered/unordered + input unordered_cmp_bit_i, // is unorderd + // operand 'a' related inputs + input signa_i, + input [9:0] exp10a_i, + input [23:0] fract24a_i, + input snana_i, + input qnana_i, + input infa_i, + input zeroa_i, + // operand 'b' related inputs + input signb_i, + input [9:0] exp10b_i, + input [23:0] fract24b_i, + input snanb_i, + input qnanb_i, + input infb_i, + input zerob_i, + // support addsub + output addsub_agtb_o, + output addsub_aeqb_o, + // outputs + output cmp_flag_o, inv_o, inf_o, ready_o +); + +// Full length ordered comparison opcodes +localparam [`OR1K_FPUOP_WIDTH-1:0] FPCOP_SFEQ = `OR1K_FPCOP_SFEQ; +localparam [`OR1K_FPUOP_WIDTH-1:0] FPCOP_SFNE = `OR1K_FPCOP_SFNE; +localparam [`OR1K_FPUOP_WIDTH-1:0] FPCOP_SFGT = `OR1K_FPCOP_SFGT; +localparam [`OR1K_FPUOP_WIDTH-1:0] FPCOP_SFGE = `OR1K_FPCOP_SFGE; +localparam [`OR1K_FPUOP_WIDTH-1:0] FPCOP_SFLT = `OR1K_FPCOP_SFLT; +localparam [`OR1K_FPUOP_WIDTH-1:0] FPCOP_SFLE = `OR1K_FPCOP_SFLE; + +// For ordered / unordered comparison +localparam [`OR1K_FPUOP_GENERIC_CMP_WIDTH-1:0] GENERIC_SFEQ = FPCOP_SFEQ[`OR1K_FPUOP_GENERIC_CMP_SELECT]; +localparam [`OR1K_FPUOP_GENERIC_CMP_WIDTH-1:0] GENERIC_SFNE = FPCOP_SFNE[`OR1K_FPUOP_GENERIC_CMP_SELECT]; +localparam [`OR1K_FPUOP_GENERIC_CMP_WIDTH-1:0] GENERIC_SFGT = FPCOP_SFGT[`OR1K_FPUOP_GENERIC_CMP_SELECT]; +localparam [`OR1K_FPUOP_GENERIC_CMP_WIDTH-1:0] GENERIC_SFGE = FPCOP_SFGE[`OR1K_FPUOP_GENERIC_CMP_SELECT]; +localparam [`OR1K_FPUOP_GENERIC_CMP_WIDTH-1:0] GENERIC_SFLT = FPCOP_SFLT[`OR1K_FPUOP_GENERIC_CMP_SELECT]; +localparam [`OR1K_FPUOP_GENERIC_CMP_WIDTH-1:0] GENERIC_SFLE = FPCOP_SFLE[`OR1K_FPUOP_GENERIC_CMP_SELECT]; + +//////////////////////////////////////////////////////////////////////// +// +// Exception Logic +// + +// Analysis of operands +wire qnan = qnana_i | qnanb_i; +wire snan = snana_i | snanb_i; +wire anan = qnan | snan; + +// Comparison is ordered/unordered EQ/NE +wire eqne = (generic_cmp_opc_i == GENERIC_SFEQ) | + (generic_cmp_opc_i == GENERIC_SFNE); + +// Comparison is invalid if: +// 1) sNaN is an operand of ordered/unordered EQ/NE comparison +// 2) NaN is an operand of ordered LT/LE/GT/GE comparison +wire inv_cmp = (eqne & snan) | ((~eqne) & anan & (~unordered_cmp_bit_i)); + + +//////////////////////////////////////////////////////////////////////// +// +// Comparison Logic +// +wire exp_gt = exp10a_i > exp10b_i; +wire exp_eq = exp10a_i == exp10b_i; +wire exp_lt = (~exp_gt) & (~exp_eq); // exp10a_i < exp10b_i; + +wire fract_gt = fract24a_i > fract24b_i; +wire fract_eq = fract24a_i == fract24b_i; +wire fract_lt = (~fract_gt) & (~fract_eq); // fract24a_i < fract24b_i; + +wire all_zero = zeroa_i & zerob_i; + +reg altb, blta, aeqb; + +always @( qnan or snan or infa_i or infb_i or signa_i or signb_i or + exp_eq or exp_gt or exp_lt or + fract_eq or fract_gt or fract_lt or all_zero) + + casez( {qnan, snan, infa_i, infb_i, signa_i, signb_i, + exp_eq, exp_gt, exp_lt, + fract_eq, fract_gt, fract_lt, all_zero}) + 13'b1?_??_??_???_???_?: {blta, altb, aeqb} = 3'b000; // qnan + 13'b?1_??_??_???_???_?: {blta, altb, aeqb} = 3'b000; // snan + + 13'b00_11_00_???_???_?: {blta, altb, aeqb} = 3'b001; // both op INF comparisson + 13'b00_11_01_???_???_?: {blta, altb, aeqb} = 3'b100; + 13'b00_11_10_???_???_?: {blta, altb, aeqb} = 3'b010; + 13'b00_11_11_???_???_?: {blta, altb, aeqb} = 3'b001; + + 13'b00_10_00_???_???_?: {blta, altb, aeqb} = 3'b100; // opa_i INF comparisson + 13'b00_10_01_???_???_?: {blta, altb, aeqb} = 3'b100; + 13'b00_10_10_???_???_?: {blta, altb, aeqb} = 3'b010; + 13'b00_10_11_???_???_?: {blta, altb, aeqb} = 3'b010; + + 13'b00_01_00_???_???_?: {blta, altb, aeqb} = 3'b010; // opb_i INF comparisson + 13'b00_01_01_???_???_?: {blta, altb, aeqb} = 3'b100; + 13'b00_01_10_???_???_?: {blta, altb, aeqb} = 3'b010; + 13'b00_01_11_???_???_?: {blta, altb, aeqb} = 3'b100; + + 13'b00_00_10_???_???_0: {blta, altb, aeqb} = 3'b010; //compare base on sign + 13'b00_00_01_???_???_0: {blta, altb, aeqb} = 3'b100; //compare base on sign + + 13'b00_00_??_???_???_1: {blta, altb, aeqb} = 3'b001; //compare base on sign both are zero + + 13'b00_00_00_010_???_?: {blta, altb, aeqb} = 3'b100; // cmp exp, equal sign + 13'b00_00_00_001_???_?: {blta, altb, aeqb} = 3'b010; + 13'b00_00_11_010_???_?: {blta, altb, aeqb} = 3'b010; + 13'b00_00_11_001_???_?: {blta, altb, aeqb} = 3'b100; + + 13'b00_00_00_100_010_?: {blta, altb, aeqb} = 3'b100; // compare fractions, equal sign, equal exp + 13'b00_00_00_100_001_?: {blta, altb, aeqb} = 3'b010; + 13'b00_00_11_100_010_?: {blta, altb, aeqb} = 3'b010; + 13'b00_00_11_100_001_?: {blta, altb, aeqb} = 3'b100; + + 13'b00_00_00_100_100_?: {blta, altb, aeqb} = 3'b001; + 13'b00_00_11_100_100_?: {blta, altb, aeqb} = 3'b001; + + default: {blta, altb, aeqb} = 3'b000; + endcase + + +//////////////////////////////////////////////////////////////////////// +// Comparison cmp_flag generation +reg generic_cmp_flag; // ordered / unordered +wire cmp_flag = (unordered_cmp_bit_i & anan) | generic_cmp_flag; +// --- +always @(altb or blta or aeqb or generic_cmp_opc_i) begin + case (generic_cmp_opc_i) // synthesis parallel_case + GENERIC_SFEQ: generic_cmp_flag = aeqb; + GENERIC_SFNE: generic_cmp_flag = ~aeqb; + GENERIC_SFGT: generic_cmp_flag = blta & ~aeqb; + GENERIC_SFGE: generic_cmp_flag = blta | aeqb; + GENERIC_SFLT: generic_cmp_flag = altb & ~aeqb; + GENERIC_SFLE: generic_cmp_flag = altb | aeqb; + default: generic_cmp_flag = 1'b0; + endcase +end // always@ * + + +//////////////////////////////////////////////////////////////////////// +// output (latching is perfommed on FPU top level) + +assign addsub_agtb_o = exp_gt | (exp_eq & fract_gt); +assign addsub_aeqb_o = exp_eq & fract_eq; + +assign cmp_flag_o = cmp_flag; +assign inv_o = inv_cmp; +assign inf_o = infa_i | infb_i; +assign ready_o = fpu_op_is_comp_i; + +endmodule // pfpu32_fcmp diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_f2i.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_f2i.v new file mode 100644 index 0000000..3d35071 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_f2i.v @@ -0,0 +1,110 @@ +///////////////////////////////////////////////////////////////////// +//// //// +//// pfpu32_f2i //// +//// 32-bit floating point to integer converter //// +//// //// +//// Author: Andrey Bacherov //// +//// avbacherov@opencores.org //// +//// //// +///////////////////////////////////////////////////////////////////// +//// //// +//// Copyright (C) 2014 Andrey Bacherov //// +//// avbacherov@opencores.org //// +//// //// +//// This source file may be used and distributed without //// +//// restriction provided that this copyright statement is not //// +//// removed from the file and that any derivative work contains //// +//// the original copyright notice and the associated disclaimer.//// +//// //// +//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// +//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// +//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// +//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// +//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// +//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// +//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// +//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// +//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// +//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// +//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// +//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// +//// POSSIBILITY OF SUCH DAMAGE. //// +//// //// +///////////////////////////////////////////////////////////////////// + +`include "mor1kx-defines.v" + +module pfpu32_f2i +( + input clk, + input rst, + input flush_i, // flush pipe + input adv_i, // advance pipe + input start_i, // start conversion + input signa_i, // input 'a' related values + input [9:0] exp10a_i, + input [23:0] fract24a_i, + input snan_i, // 'a'/'b' related + input qnan_i, + output reg f2i_rdy_o, // f2i is ready + output reg f2i_sign_o, // f2i signum + output reg [23:0] f2i_int24_o, // f2i fractional + output reg [4:0] f2i_shr_o, // f2i required shift right value + output reg [3:0] f2i_shl_o, // f2i required shift left value + output reg f2i_ovf_o, // f2i overflow flag + output reg f2i_snan_o // f2i signaling NaN output reg +); + + /* + Any stage's output is registered. + Definitions: + s??o_name - "S"tage number "??", "O"utput + s??t_name - "S"tage number "??", "T"emporary (internally) + */ + + // exponent after moving binary point at the end of mantissa + // bias is also removed + wire [9:0] s1t_exp10m = exp10a_i - 10'd150; // (- 127 - 23) + + // detect if now shift right is required + wire [9:0] s1t_shr_t = {10{s1t_exp10m[9]}} & (10'd150 - exp10a_i); + // limit right shift by 31 + wire [4:0] s1t_shr = s1t_shr_t[4:0] | {5{|s1t_shr_t[9:5]}}; + + // detect if left shift required for mantissa + // (limited by 15) + wire [3:0] s1t_shl = {4{~s1t_exp10m[9]}} & (s1t_exp10m[3:0] | {4{|s1t_exp10m[9:4]}}); + // check overflow + wire s1t_is_shl_gt8 = s1t_shl[3] & (|s1t_shl[2:0]); + wire s1t_is_shl_eq8 = s1t_shl[3] & (~(|s1t_shl[2:0])); + wire s1t_is_shl_ovf = + s1t_is_shl_gt8 | + (s1t_is_shl_eq8 & (~signa_i)) | + (s1t_is_shl_eq8 & signa_i & (|fract24a_i[22:0])); + + + // registering output + always @(posedge clk) begin + if(adv_i) begin + // input related + f2i_snan_o <= snan_i; + // computation related + f2i_sign_o <= signa_i & (!(qnan_i | snan_i)); // if 'a' is a NaN than ouput is max. positive + f2i_int24_o <= fract24a_i; + f2i_shr_o <= s1t_shr; + f2i_shl_o <= s1t_shl; + f2i_ovf_o <= s1t_is_shl_ovf; + end // (reset or flush) / advance + end // posedge clock + + // ready is special case + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + f2i_rdy_o <= 1'b0; + else if(flush_i) + f2i_rdy_o <= 1'b0; + else if(adv_i) + f2i_rdy_o <= start_i; + end // posedge clock + +endmodule // pfpu32_f2i diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_i2f.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_i2f.v new file mode 100644 index 0000000..e284828 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_i2f.v @@ -0,0 +1,144 @@ +///////////////////////////////////////////////////////////////////// +//// //// +//// pfpu32_i2f //// +//// 32-bit integer to floating point converter //// +//// //// +//// Author: Andrey Bacherov //// +//// avbacherov@opencores.org //// +//// //// +///////////////////////////////////////////////////////////////////// +//// //// +//// Copyright (C) 2014 Andrey Bacherov //// +//// avbacherov@opencores.org //// +//// //// +//// This source file may be used and distributed without //// +//// restriction provided that this copyright statement is not //// +//// removed from the file and that any derivative work contains //// +//// the original copyright notice and the associated disclaimer.//// +//// //// +//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// +//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// +//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// +//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// +//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// +//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// +//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// +//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// +//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// +//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// +//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// +//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// +//// POSSIBILITY OF SUCH DAMAGE. //// +//// //// +///////////////////////////////////////////////////////////////////// + +`include "mor1kx-defines.v" + +module pfpu32_i2f +( + input clk, + input rst, + input flush_i, // flush pipe + input adv_i, // advance pipe + input start_i, // start conversion + input [31:0] opa_i, + output reg i2f_rdy_o, // i2f is ready + output reg i2f_sign_o, // i2f signum + output reg [3:0] i2f_shr_o, + output reg [7:0] i2f_exp8shr_o, + output reg [4:0] i2f_shl_o, + output reg [7:0] i2f_exp8shl_o, + output reg [7:0] i2f_exp8sh0_o, + output reg [31:0] i2f_fract32_o +); + + /* + Any stage's output is registered. + Definitions: + s??o_name - "S"tage number "??", "O"utput + s??t_name - "S"tage number "??", "T"emporary (internally) + */ + + // signum of input + wire s1t_signa = opa_i[31]; + // magnitude (tow's complement for negative input) + wire [31:0] s1t_fract32 = + (opa_i ^ {32{s1t_signa}}) + {31'd0,s1t_signa}; + // normalization shifts + reg [3:0] s1t_shrx; + reg [4:0] s1t_shlx; + // shift goal: + // 23 22 0 + // | | | + // h fffffffffffffffffffffff + // right shift + always @(s1t_fract32[31:24]) begin + casez(s1t_fract32[31:24]) // synopsys full_case parallel_case + 8'b1???????: s1t_shrx = 4'd8; + 8'b01??????: s1t_shrx = 4'd7; + 8'b001?????: s1t_shrx = 4'd6; + 8'b0001????: s1t_shrx = 4'd5; + 8'b00001???: s1t_shrx = 4'd4; + 8'b000001??: s1t_shrx = 4'd3; + 8'b0000001?: s1t_shrx = 4'd2; + 8'b00000001: s1t_shrx = 4'd1; + 8'b00000000: s1t_shrx = 4'd0; + endcase + end + // left shift + always @(s1t_fract32[23:0]) begin + casez(s1t_fract32[23:0]) // synopsys full_case parallel_case + 24'b1???????????????????????: s1t_shlx = 5'd0; // hidden '1' is in its plase + 24'b01??????????????????????: s1t_shlx = 5'd1; + 24'b001?????????????????????: s1t_shlx = 5'd2; + 24'b0001????????????????????: s1t_shlx = 5'd3; + 24'b00001???????????????????: s1t_shlx = 5'd4; + 24'b000001??????????????????: s1t_shlx = 5'd5; + 24'b0000001?????????????????: s1t_shlx = 5'd6; + 24'b00000001????????????????: s1t_shlx = 5'd7; + 24'b000000001???????????????: s1t_shlx = 5'd8; + 24'b0000000001??????????????: s1t_shlx = 5'd9; + 24'b00000000001?????????????: s1t_shlx = 5'd10; + 24'b000000000001????????????: s1t_shlx = 5'd11; + 24'b0000000000001???????????: s1t_shlx = 5'd12; + 24'b00000000000001??????????: s1t_shlx = 5'd13; + 24'b000000000000001?????????: s1t_shlx = 5'd14; + 24'b0000000000000001????????: s1t_shlx = 5'd15; + 24'b00000000000000001???????: s1t_shlx = 5'd16; + 24'b000000000000000001??????: s1t_shlx = 5'd17; + 24'b0000000000000000001?????: s1t_shlx = 5'd18; + 24'b00000000000000000001????: s1t_shlx = 5'd19; + 24'b000000000000000000001???: s1t_shlx = 5'd20; + 24'b0000000000000000000001??: s1t_shlx = 5'd21; + 24'b00000000000000000000001?: s1t_shlx = 5'd22; + 24'b000000000000000000000001: s1t_shlx = 5'd23; + 24'b000000000000000000000000: s1t_shlx = 5'd0; + endcase + end + + + // registering output + always @(posedge clk) begin + if(adv_i) begin + // computation related + i2f_sign_o <= s1t_signa; + i2f_shr_o <= s1t_shrx; + i2f_exp8shr_o <= 8'd150 + {4'd0,s1t_shrx}; // 150=127+23 + i2f_shl_o <= s1t_shlx; + i2f_exp8shl_o <= 8'd150 - {3'd0,s1t_shlx}; + i2f_exp8sh0_o <= {8{s1t_fract32[23]}} & 8'd150; // "1" is in [23] / zero + i2f_fract32_o <= s1t_fract32; + end // advance + end // posedge clock + + // ready is special case + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + i2f_rdy_o <= 1'b0; + else if(flush_i) + i2f_rdy_o <= 1'b0; + else if(adv_i) + i2f_rdy_o <= start_i; + end // posedge clock + +endmodule // pfpu32_i2f diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_muldiv.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_muldiv.v new file mode 100644 index 0000000..d212677 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_muldiv.v @@ -0,0 +1,805 @@ +////////////////////////////////////////////////////////////////////// +// // +// pfpu32_muldiv // +// // +// This file is part of the mor1kx project // +// https://github.com/openrisc/mor1kx // +// // +// Description // +// combined multiplier/divisor pipeline for // +// single precision floating point numbers // +// // +// Author(s): // +// Andrey Bacherov, avbacherov@opencores.org // +// // +////////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2015 // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY // +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS // +// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR // +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, // +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES // +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE // +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR // +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT // +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // +// POSSIBILITY OF SUCH DAMAGE. // +////////////////////////////////////////////////////////////////////// + +`include "mor1kx-defines.v" + +module pfpu32_muldiv +( + input clk, + input rst, + input flush_i, // flushe pipe + input adv_i, // advance pipe + input start_i, // start + input is_div_i, // 1: division, 0: multiplication + // input 'a' related values + input signa_i, + input [9:0] exp10a_i, + input [23:0] fract24a_i, + input infa_i, + input zeroa_i, + // input 'b' related values + input signb_i, + input [9:0] exp10b_i, + input [23:0] fract24b_i, + input infb_i, + input zerob_i, + // 'a'/'b' related + input snan_i, + input qnan_i, + input anan_sign_i, + // MUL/DIV common outputs + output reg muldiv_rdy_o, // ready + output reg muldiv_sign_o, // signum + output reg [4:0] muldiv_shr_o, // do right shift in align stage + output reg [9:0] muldiv_exp10shr_o, // exponent for right shift align + output reg muldiv_shl_o, // do left shift in align stage + output reg [9:0] muldiv_exp10shl_o, // exponent for left shift align + output reg [9:0] muldiv_exp10sh0_o, // exponent for no shift in align + output reg [27:0] muldiv_fract28_o, // fractional with appended {r,s} bits + output reg muldiv_inv_o, // invalid operation flag + output reg muldiv_inf_o, // infinity output reg + output reg muldiv_snan_o, // signaling NaN output reg + output reg muldiv_qnan_o, // quiet NaN output reg + output reg muldiv_anan_sign_o, // signum for output nan + // DIV additional outputs + output reg div_op_o, // operation is division + output reg div_sign_rmnd_o, // signum of reminder for IEEE compliant rounding + output reg div_dbz_o // div division by zero flag +); + + /* + Any stage's output is registered. + Definitions: + s??o_name - "S"tage number "??", "O"utput + s??t_name - "S"tage number "??", "T"emporary (internally) + */ + + + /* Stage #1: pre-operation stage */ + + + // detection of some exceptions + wire s0t_inv = is_div_i ? ((zeroa_i & zerob_i) | (infa_i & infb_i)) : // div: 0/0, inf/inf -> invalid operation; snan output + ((zeroa_i & infb_i) | (zerob_i & infa_i)); // mul: 0 * inf -> invalid operation; snan output + // division by zero + wire s0t_dbz = is_div_i & (~zeroa_i) & (~infa_i) & zerob_i; + // inf input + wire s0t_inf_i = infa_i | (infb_i & (~is_div_i)); // for DIV only infA is used + + // force intermediate results to zero + wire s0t_opc_0 = zeroa_i | zerob_i | (is_div_i & (infa_i | infb_i)); + + // count leading zeros + reg [4:0] s0t_nlza; + always @(fract24a_i) begin + casez(fract24a_i) // synopsys full_case parallel_case + 24'b1???????????????????????: s0t_nlza = 0; + 24'b01??????????????????????: s0t_nlza = 1; + 24'b001?????????????????????: s0t_nlza = 2; + 24'b0001????????????????????: s0t_nlza = 3; + 24'b00001???????????????????: s0t_nlza = 4; + 24'b000001??????????????????: s0t_nlza = 5; + 24'b0000001?????????????????: s0t_nlza = 6; + 24'b00000001????????????????: s0t_nlza = 7; + 24'b000000001???????????????: s0t_nlza = 8; + 24'b0000000001??????????????: s0t_nlza = 9; + 24'b00000000001?????????????: s0t_nlza = 10; + 24'b000000000001????????????: s0t_nlza = 11; + 24'b0000000000001???????????: s0t_nlza = 12; + 24'b00000000000001??????????: s0t_nlza = 13; + 24'b000000000000001?????????: s0t_nlza = 14; + 24'b0000000000000001????????: s0t_nlza = 15; + 24'b00000000000000001???????: s0t_nlza = 16; + 24'b000000000000000001??????: s0t_nlza = 17; + 24'b0000000000000000001?????: s0t_nlza = 18; + 24'b00000000000000000001????: s0t_nlza = 19; + 24'b000000000000000000001???: s0t_nlza = 20; + 24'b0000000000000000000001??: s0t_nlza = 21; + 24'b00000000000000000000001?: s0t_nlza = 22; + 24'b000000000000000000000001: s0t_nlza = 23; + 24'b000000000000000000000000: s0t_nlza = 0; // zero rezult + endcase + end // nlz for 'a' + + // count leading zeros + reg [4:0] s0t_nlzb; + always @(fract24b_i) begin + casez(fract24b_i) // synopsys full_case parallel_case + 24'b1???????????????????????: s0t_nlzb = 0; + 24'b01??????????????????????: s0t_nlzb = 1; + 24'b001?????????????????????: s0t_nlzb = 2; + 24'b0001????????????????????: s0t_nlzb = 3; + 24'b00001???????????????????: s0t_nlzb = 4; + 24'b000001??????????????????: s0t_nlzb = 5; + 24'b0000001?????????????????: s0t_nlzb = 6; + 24'b00000001????????????????: s0t_nlzb = 7; + 24'b000000001???????????????: s0t_nlzb = 8; + 24'b0000000001??????????????: s0t_nlzb = 9; + 24'b00000000001?????????????: s0t_nlzb = 10; + 24'b000000000001????????????: s0t_nlzb = 11; + 24'b0000000000001???????????: s0t_nlzb = 12; + 24'b00000000000001??????????: s0t_nlzb = 13; + 24'b000000000000001?????????: s0t_nlzb = 14; + 24'b0000000000000001????????: s0t_nlzb = 15; + 24'b00000000000000001???????: s0t_nlzb = 16; + 24'b000000000000000001??????: s0t_nlzb = 17; + 24'b0000000000000000001?????: s0t_nlzb = 18; + 24'b00000000000000000001????: s0t_nlzb = 19; + 24'b000000000000000000001???: s0t_nlzb = 20; + 24'b0000000000000000000001??: s0t_nlzb = 21; + 24'b00000000000000000000001?: s0t_nlzb = 22; + 24'b000000000000000000000001: s0t_nlzb = 23; + 24'b000000000000000000000000: s0t_nlzb = 0; // zero result + endcase + end // nlz of 'b' + + + // pre-norm stage outputs + // input related + reg s0o_inv, s0o_inf_i, + s0o_snan_i, s0o_qnan_i, s0o_anan_i_sign; + // computation related + reg s0o_is_div; + reg s0o_opc_0; + reg s0o_signc; + reg [9:0] s0o_exp10a; + reg [23:0] s0o_fract24a; + reg [4:0] s0o_shla; + reg [9:0] s0o_exp10b; + reg [23:0] s0o_fract24b; + reg [4:0] s0o_shlb; + // DIV additional outputs + reg s0o_dbz; + // registering + always @(posedge clk) begin + if(adv_i) begin + // input related + s0o_inv <= s0t_inv; + s0o_inf_i <= s0t_inf_i; + s0o_snan_i <= snan_i; + s0o_qnan_i <= qnan_i; + s0o_anan_i_sign <= anan_sign_i; + // computation related + s0o_is_div <= is_div_i; + s0o_opc_0 <= s0t_opc_0; + s0o_signc <= signa_i ^ signb_i; + s0o_exp10a <= exp10a_i; + s0o_fract24a <= fract24a_i; + s0o_shla <= s0t_nlza; + s0o_exp10b <= exp10b_i; + s0o_fract24b <= fract24b_i; + s0o_shlb <= s0t_nlzb; + // DIV additional outputs + s0o_dbz <= s0t_dbz; + end // push pipe + end + + // route ready through side back + reg s0o_ready; + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + s0o_ready <= 0; + else if(flush_i) + s0o_ready <= 0; + else if(adv_i) + s0o_ready <= start_i; + end // posedge clock + + + // left-shift the dividend and divisor + wire [23:0] s1t_fract24a_shl = s0o_fract24a << s0o_shla; + wire [23:0] s1t_fract24b_shl = s0o_fract24b << s0o_shlb; + + // force result to zero + wire [23:0] s1t_fract24a = s1t_fract24a_shl & {24{~s0o_opc_0}}; + wire [23:0] s1t_fract24b = s1t_fract24b_shl & {24{~s0o_opc_0}}; + + // exponent + wire [9:0] s1t_exp10mux = + s0o_is_div ? (s0o_exp10a - {5'd0,s0o_shla} - s0o_exp10b + {5'd0,s0o_shlb} + 10'd127) : + (s0o_exp10a - {5'd0,s0o_shla} + s0o_exp10b - {5'd0,s0o_shlb} - 10'd127); + + // force result to zero + wire [9:0] s1t_exp10c = s1t_exp10mux & {10{~s0o_opc_0}}; + + + // Goldshmidt division iterations control + reg [10:0] itr_state; // iteration state indicator + // iteration characteristic points: + // quotient is computed + wire itr_rndQ = itr_state[10]; + // iteration in progress + wire itr_Proc = |itr_state; + // iteration control state machine + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + itr_state <= 11'd0; + else if(flush_i) + itr_state <= 11'd0; + else if(adv_i & s0o_ready & s0o_is_div) + itr_state <= 11'd1; + else if(adv_i) + itr_state <= {itr_state[9:0],1'b0}; + end // posedge clock + + // Multiplication operation flag + wire s1t_is_mul = s0o_ready & (~s0o_is_div); + + + // stage #1 outputs + // input related + reg s1o_inv, s1o_inf_i, + s1o_snan_i, s1o_qnan_i, s1o_anan_i_sign; + // computation related + reg s1o_opc_0; + reg s1o_signc; + reg [9:0] s1o_exp10c; + reg [23:0] s1o_fract24a; + reg [23:0] s1o_fract24b; + // DIV additional outputs + reg s1o_dbz; + // registering + always @(posedge clk) begin + if(adv_i & ~itr_Proc) begin + // input related + s1o_inv <= s0o_inv; + s1o_inf_i <= s0o_inf_i; + s1o_snan_i <= s0o_snan_i; + s1o_qnan_i <= s0o_qnan_i; + s1o_anan_i_sign <= s0o_anan_i_sign; + // computation related + s1o_opc_0 <= s0o_opc_0; + s1o_signc <= s0o_signc; + s1o_exp10c <= s1t_exp10c; + s1o_fract24a <= s1t_fract24a; + s1o_fract24b <= s1t_fract24b; + // DIV additional outputs + s1o_dbz <= s0o_dbz; + end // advance pipe + end // posedge clock + + // ready is special case + reg s1o_mul_ready; + reg s1o_div_ready; + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) begin + s1o_mul_ready <= 1'b0; + s1o_div_ready <= 1'b0; + end else if(flush_i) begin + s1o_mul_ready <= 1'b0; + s1o_div_ready <= 1'b0; + end else if(adv_i) begin + s1o_mul_ready <= s1t_is_mul; + s1o_div_ready <= itr_rndQ; + end + end // posedge clock + + + /* Stage #2: 1st part of multiplier */ + + + // rigt shift value + // and appropriatelly corrected exponent + wire s1o_exp10c_0 = ~(|s1o_exp10c); + wire [9:0] s2t_shr_of_neg_exp = 11'h401 - {1'b0,s1o_exp10c}; // 1024-v+1 + // variants: + wire [9:0] s2t_shr_t; + wire [9:0] s2t_exp10rx; + assign {s2t_shr_t,s2t_exp10rx} = + // force zero result + s1o_opc_0 ? {10'd0,10'd0} : + // negative exponent sum + // (!) takes 1x.xx case into account automatically + s1o_exp10c[9] ? {s2t_shr_of_neg_exp,10'd1} : + // (a) zero exponent sum (denorm. result potentially) + // (!) takes 1x.xx case into account automatically + // (b) normal case + // (!) 1x.xx case is processed in next stage + {{9'd0,s1o_exp10c_0},(s1o_exp10c | {9'd0,s1o_exp10c_0})}; + // limited by 31 and forced result to zero + wire [4:0] s2t_shrx = s2t_shr_t[4:0] | {5{|s2t_shr_t[9:5]}}; + + + // Support Goldshmidt iteration + // initial estimation of reciprocal + wire [8:0] itr_recip9b; + arecip_lut u_arlut + ( + .b_i(s1o_fract24b[22:16]), + .r_o(itr_recip9b) + ); + // support case: b==1 + wire b_eq_1 = s1o_fract24b[23] & (~(|s1o_fract24b[22:0])); + // reciprocal with restored leading 01 + wire [10:0] itr_recip11b = b_eq_1 ? 11'b10000000000 : + {2'b01,itr_recip9b}; + + // the subsequent two stages multiplier operates with 32-bit inputs + // 25-bits: fractionals (quotient is in range 0.5 to 1) + // 1-bit : rounding bit + // 6-bits: guard (due to truncations of intermediate results) + + // intermediate results: + // updated divisor (D) is rounded up while all other intermediate values + // are just truncated in according with directed rounding analysed in: + // Guy Even, Peter-M.Seidel, Warren E.Ferguson + // "A parametric error analysis of Goldschmidt’s division algorithm" + wire itr_rndD = itr_state[3] | itr_state[6]; + wire itr_rndDvsr; + // align resulting quotient to support subsequent IEEE-compliant rounding + wire [25:0] itr_res_qtnt26; // rounded quotient + // Updated quotient or divisor + wire [32:0] itr_qtnt33; + // 'F' (2-D) or 'Reminder' + wire [32:0] itr_rmnd33; + + + // control for multiplier's input 'A' + // the register also contains quotient to output + wire itr_uinA = s1t_is_mul | + itr_state[0] | itr_state[3] | + itr_state[6] | itr_rndQ; + // multiplexer for multiplier's input 'A' + wire [31:0] itr_mul32a = + s1t_is_mul ? {s1t_fract24a,8'd0} : + itr_state[0] ? {itr_recip11b,21'd0} : + itr_rndQ ? {itr_res_qtnt26,6'd0} : // truncate by 2^(-n-1) + itr_rmnd33[31:0]; + // register of multiplier's input 'A' + reg [15:0] s1o_mul16_al; + reg [15:0] s1o_mul16_ah; + // registering + always @(posedge clk) begin + if(adv_i & itr_uinA) begin + s1o_mul16_al <= itr_mul32a[15: 0]; + s1o_mul16_ah <= itr_mul32a[31:16]; + end + end // posedge clock + + + // control for multiplier's input 'B' + wire itr_uinB = s1t_is_mul | + itr_state[0] | itr_state[1] | + itr_state[3] | itr_state[4] | + itr_state[6] | itr_state[7] | + itr_rndQ; + // multiplexer for multiplier's input 'B' + wire [31:0] itr_mul32b = + s1t_is_mul ? {s1t_fract24b,8'd0} : + (itr_state[0] | itr_rndQ) ? {s1o_fract24b,8'd0} : + itr_state[1] ? {s1o_fract24a,8'd0} : + itr_qtnt33[31:0]; + // register of multiplier's input 'B' + reg [15:0] s1o_mul16_bl; + reg [15:0] s1o_mul16_bh; + always @(posedge clk) begin + if(adv_i & itr_uinB) begin + s1o_mul16_bl <= itr_mul32b[15: 0]; + s1o_mul16_bh <= itr_mul32b[31:16]; + end + end // posedge clock + + // stage #2 outputs + // input related + reg s2o_inv, s2o_inf_i, + s2o_snan_i, s2o_qnan_i, s2o_anan_i_sign; + // DIV additional outputs + reg s2o_dbz; + reg [23:0] s2o_fract24a; + // computation related + reg s2o_opc_0; + reg s2o_signc; + reg [9:0] s2o_exp10c; + reg [4:0] s2o_shrx; + reg s2o_is_shrx; + reg [9:0] s2o_exp10rx; + // multipliers + reg [31:0] s2o_fract32_albl; + reg [31:0] s2o_fract32_albh; + reg [31:0] s2o_fract32_ahbl; + reg [31:0] s2o_fract32_ahbh; + // registering + always @(posedge clk) begin + if(adv_i) begin + // input related + s2o_inv <= s1o_inv; + s2o_inf_i <= s1o_inf_i; + s2o_snan_i <= s1o_snan_i; + s2o_qnan_i <= s1o_qnan_i; + s2o_anan_i_sign <= s1o_anan_i_sign; + // DIV additional outputs + s2o_dbz <= s1o_dbz; + s2o_fract24a <= s1o_fract24a; + // computation related + s2o_opc_0 <= s1o_opc_0; + s2o_signc <= s1o_signc; + s2o_exp10c <= s1o_exp10c; + s2o_shrx <= s2t_shrx; + s2o_is_shrx <= (|s2t_shrx); + s2o_exp10rx <= s2t_exp10rx; + // multipliers + s2o_fract32_albl <= s1o_mul16_al * s1o_mul16_bl; + s2o_fract32_albh <= s1o_mul16_al * s1o_mul16_bh; + s2o_fract32_ahbl <= s1o_mul16_ah * s1o_mul16_bl; + s2o_fract32_ahbh <= s1o_mul16_ah * s1o_mul16_bh; + end // advance pipe + end // posedge clock + + // ready is special case + reg s2o_mul_ready; + reg s2o_div_ready; + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) begin + s2o_mul_ready <= 1'b0; + s2o_div_ready <= 1'b0; + end else if(flush_i) begin + s2o_mul_ready <= 1'b0; + s2o_div_ready <= 1'b0; + end else if(adv_i) begin + s2o_mul_ready <= s1o_mul_ready; + s2o_div_ready <= s1o_div_ready; + end + end // posedge clock + + + /* Stage #3: 2nd part of multiplier */ + + + // 2nd stage of multiplier + wire [47:0] s3t_fract48; + assign s3t_fract48 = {s2o_fract32_ahbh, 16'd0} + + {16'd0, s2o_fract32_ahbl} + + {16'd0, s2o_fract32_albh} + + {32'd0, s2o_fract32_albl[31:16]}; + + // stage #3 outputs (for division support) + + // full product + reg [32:0] s3o_mul33o; // output + reg s3o_mul33s; // sticky + // registering + always @(posedge clk) begin + if(adv_i) begin + s3o_mul33o <= s3t_fract48[47:15]; + s3o_mul33s <= (|s3t_fract48[14:0]) | (|s2o_fract32_albl[15:0]); + end + end // posedge clock + + // For pipelinization of division final stage + // input related + reg s3o_inv, s3o_inf_i, + s3o_snan_i, s3o_qnan_i, s3o_anan_i_sign; + // DIV computation related + reg s3o_dbz; + reg [23:0] s3o_fract24a; + reg s3o_opc_0; + reg s3o_signc; + reg [9:0] s3o_exp10c; + reg [4:0] s3o_shrx; + reg s3o_is_shrx; + reg [9:0] s3o_exp10rx; + // registering + always @(posedge clk) begin + if(adv_i) begin + // input related + s3o_inv <= s2o_inv; + s3o_inf_i <= s2o_inf_i; + s3o_snan_i <= s2o_snan_i; + s3o_qnan_i <= s2o_qnan_i; + s3o_anan_i_sign <= s2o_anan_i_sign; + // DIV computation related + s3o_dbz <= s2o_dbz; + s3o_fract24a <= s2o_fract24a; + s3o_opc_0 <= s2o_opc_0; + s3o_signc <= s2o_signc; + s3o_exp10c <= s2o_exp10c; + s3o_shrx <= s2o_shrx; + s3o_is_shrx <= s2o_is_shrx; + s3o_exp10rx <= s2o_exp10rx; + end // advance pipe + end // @clock + + // stage 3 ready makes sense for division only + reg s3o_div_ready; + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + s3o_div_ready <= 1'b0; + else if(flush_i) + s3o_div_ready <= 1'b0; + else if(adv_i) + s3o_div_ready <= s2o_div_ready; + end // posedge clock + + + // Feedback from multiplier's output with various rounding tecqs. + // +2^(-n-2) in case of rounding 1.xxx qutient + wire itr_rndQ1xx = s3o_mul33o[31]; + // +2^(-n-2) in case of rounding 0.1xx qutient + wire itr_rndQ01x = (~s3o_mul33o[31]); + // rounding mask: + wire [32:0] itr_rndM33 = // bits [6],[5] ... [0] + { 26'd0,(itr_rndQ & itr_rndQ1xx),(itr_rndQ & itr_rndQ01x), // round resulting quotient + 4'd0,(itr_rndD & s3o_mul33s) }; // round intermediate divisor + // rounding + assign itr_qtnt33 = s3o_mul33o + itr_rndM33; + + + // compute 2's complement or reminder (for sticky bit detection) + // binary point position is located just after bit [30] + wire [32:0] itr_AorT33 = + s3o_div_ready ? {1'b0,s3o_fract24a,8'd0} : // for reminder + {32'h80000000,1'b0}; // for two's complement + + // 'Reminder' / Two's complement + assign itr_rmnd33 = itr_AorT33 - itr_qtnt33; + + // Auxiliary flags: + // - truncated reminder isn't zero + wire s4t_rmnd33_n0 = |itr_rmnd33; + // - rounded quotient is exact + wire s4t_qtnt_exact = ~(s4t_rmnd33_n0 | s3o_mul33s); + // - signum of final reminder + wire s4t_sign_rmnd = itr_rmnd33[32] | ((~s4t_rmnd33_n0) & s3o_mul33s); + + + // Additionally store 26-bit of non-rounded (_raw_) and rounded (_res_) quotients. + // It is used for rounding in cases of denormalized result. + // Stiky bit is forced to be zero. + // The value are marked by stage #2 output + // raw + reg [25:0] s3o_raw_qtnt26; + // rounded + reg [25:0] s3o_res_qtnt26; + assign itr_res_qtnt26 = {itr_qtnt33[31:7],itr_qtnt33[6] & itr_rndQ01x}; + // latching + always @(posedge clk ) begin + if(itr_rndQ) begin + s3o_raw_qtnt26 <= s3o_mul33o[31:6]; + s3o_res_qtnt26 <= itr_res_qtnt26; + end + end + + // Possible left shift computation. + // In fact, as the dividend and divisor was normalized + // and the result is non-zero + // the '1' is maximum number of leading zeros in the quotient. + wire s4t_nlz = ~s3o_res_qtnt26[25]; + wire [9:0] s4t_exp10_m1 = s3o_exp10c - 10'd1; + // left shift flag and corrected exponent + wire s4t_shlx; + wire [9:0] s4t_exp10lx; + assign {s4t_shlx,s4t_exp10lx} = + // shift isn't needed (includes zero result) + (~s4t_nlz) ? {1'b0,s3o_exp10c} : + // normalization is possible + (s3o_exp10c > 10'd1) ? {1'b1,s4t_exp10_m1} : + // denormalized and zero cases + {1'b0,{9'd0,~s3o_opc_0}}; + + // check if quotient is denormalized + wire s4t_denorm = s3o_is_shrx | + ((~s3o_is_shrx) & (~s4t_shlx) & s4t_nlz); + // Select quotient for subsequent align and rounding + // The rounded (_res_) quotient is used: + // - for normalized result + // - exact result + // - non-exact but lesser than infinity precision result + wire [25:0] s4t_qtnt26 = + ( (~s4t_denorm) | s4t_qtnt_exact | + ((~s4t_qtnt_exact) & (~s4t_sign_rmnd)) ) ? s3o_res_qtnt26 : + s3o_raw_qtnt26; + + + // output + always @(posedge clk) begin + if(adv_i) begin + // input related + muldiv_inv_o <= s3o_div_ready ? s3o_inv : s2o_inv; + muldiv_inf_o <= s3o_div_ready ? s3o_inf_i : s2o_inf_i; + muldiv_snan_o <= s3o_div_ready ? s3o_snan_i : s2o_snan_i; + muldiv_qnan_o <= s3o_div_ready ? s3o_qnan_i : s2o_qnan_i; + muldiv_anan_sign_o <= s3o_div_ready ? s3o_anan_i_sign : s2o_anan_i_sign; + // computation related + muldiv_sign_o <= s3o_div_ready ? s3o_signc : s2o_signc; + muldiv_shr_o <= s3o_div_ready ? s3o_shrx : s2o_shrx; + muldiv_exp10shr_o <= s3o_div_ready ? s3o_exp10rx : s2o_exp10rx; + muldiv_shl_o <= s3o_div_ready & s4t_shlx; // makes sense for DIV only + muldiv_exp10shl_o <= {10{s3o_div_ready}} & s4t_exp10lx; // makes sense for DIV only + muldiv_exp10sh0_o <= s3o_div_ready ? s3o_exp10c : s2o_exp10c; + muldiv_fract28_o <= s3o_div_ready ? + {1'b0,s4t_qtnt26,~s4t_qtnt_exact} : // quotient + {s3t_fract48[47:21],|s3t_fract48[20:0]}; // product + // DIV additional outputs + div_op_o <= s3o_div_ready; + div_sign_rmnd_o <= s3o_div_ready & s4t_sign_rmnd; + div_dbz_o <= s3o_div_ready & s3o_dbz; + end // advance pipe + end // posedge clock + + // ready is special case + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + muldiv_rdy_o <= 0; + else if(flush_i) + muldiv_rdy_o <= 0; + else if(adv_i) + muldiv_rdy_o <= s2o_mul_ready | s3o_div_ready; + end // posedge clock + +endmodule // pfpu32_muldiv + + +// initial reciprocal approximation +module arecip_lut +( + input [6:0] b_i, + output reg [8:0] r_o +); + always @(b_i) begin + case(b_i) // synopsys full_case parallel_case + 7'd0 : r_o = 9'd508; + 7'd1 : r_o = 9'd500; + 7'd2 : r_o = 9'd492; + 7'd3 : r_o = 9'd485; + 7'd4 : r_o = 9'd477; + 7'd5 : r_o = 9'd470; + 7'd6 : r_o = 9'd463; + 7'd7 : r_o = 9'd455; + 7'd8 : r_o = 9'd448; + 7'd9 : r_o = 9'd441; + 7'd10 : r_o = 9'd434; + 7'd11 : r_o = 9'd428; + 7'd12 : r_o = 9'd421; + 7'd13 : r_o = 9'd414; + 7'd14 : r_o = 9'd408; + 7'd15 : r_o = 9'd401; + 7'd16 : r_o = 9'd395; + 7'd17 : r_o = 9'd389; + 7'd18 : r_o = 9'd383; + 7'd19 : r_o = 9'd377; + 7'd20 : r_o = 9'd371; + 7'd21 : r_o = 9'd365; + 7'd22 : r_o = 9'd359; + 7'd23 : r_o = 9'd353; + 7'd24 : r_o = 9'd347; + 7'd25 : r_o = 9'd342; + 7'd26 : r_o = 9'd336; + 7'd27 : r_o = 9'd331; + 7'd28 : r_o = 9'd326; + 7'd29 : r_o = 9'd320; + 7'd30 : r_o = 9'd315; + 7'd31 : r_o = 9'd310; + 7'd32 : r_o = 9'd305; + 7'd33 : r_o = 9'd300; + 7'd34 : r_o = 9'd295; + 7'd35 : r_o = 9'd290; + 7'd36 : r_o = 9'd285; + 7'd37 : r_o = 9'd280; + 7'd38 : r_o = 9'd275; + 7'd39 : r_o = 9'd271; + 7'd40 : r_o = 9'd266; + 7'd41 : r_o = 9'd261; + 7'd42 : r_o = 9'd257; + 7'd43 : r_o = 9'd252; + 7'd44 : r_o = 9'd248; + 7'd45 : r_o = 9'd243; + 7'd46 : r_o = 9'd239; + 7'd47 : r_o = 9'd235; + 7'd48 : r_o = 9'd231; + 7'd49 : r_o = 9'd226; + 7'd50 : r_o = 9'd222; + 7'd51 : r_o = 9'd218; + 7'd52 : r_o = 9'd214; + 7'd53 : r_o = 9'd210; + 7'd54 : r_o = 9'd206; + 7'd55 : r_o = 9'd202; + 7'd56 : r_o = 9'd198; + 7'd57 : r_o = 9'd195; + 7'd58 : r_o = 9'd191; + 7'd59 : r_o = 9'd187; + 7'd60 : r_o = 9'd183; + 7'd61 : r_o = 9'd180; + 7'd62 : r_o = 9'd176; + 7'd63 : r_o = 9'd172; + 7'd64 : r_o = 9'd169; + 7'd65 : r_o = 9'd165; + 7'd66 : r_o = 9'd162; + 7'd67 : r_o = 9'd158; + 7'd68 : r_o = 9'd155; + 7'd69 : r_o = 9'd152; + 7'd70 : r_o = 9'd148; + 7'd71 : r_o = 9'd145; + 7'd72 : r_o = 9'd142; + 7'd73 : r_o = 9'd138; + 7'd74 : r_o = 9'd135; + 7'd75 : r_o = 9'd132; + 7'd76 : r_o = 9'd129; + 7'd77 : r_o = 9'd126; + 7'd78 : r_o = 9'd123; + 7'd79 : r_o = 9'd120; + 7'd80 : r_o = 9'd117; + 7'd81 : r_o = 9'd114; + 7'd82 : r_o = 9'd111; + 7'd83 : r_o = 9'd108; + 7'd84 : r_o = 9'd105; + 7'd85 : r_o = 9'd102; + 7'd86 : r_o = 9'd99; + 7'd87 : r_o = 9'd96; + 7'd88 : r_o = 9'd93; + 7'd89 : r_o = 9'd91; + 7'd90 : r_o = 9'd88; + 7'd91 : r_o = 9'd85; + 7'd92 : r_o = 9'd82; + 7'd93 : r_o = 9'd80; + 7'd94 : r_o = 9'd77; + 7'd95 : r_o = 9'd74; + 7'd96 : r_o = 9'd72; + 7'd97 : r_o = 9'd69; + 7'd98 : r_o = 9'd67; + 7'd99 : r_o = 9'd64; + 7'd100 : r_o = 9'd62; + 7'd101 : r_o = 9'd59; + 7'd102 : r_o = 9'd57; + 7'd103 : r_o = 9'd54; + 7'd104 : r_o = 9'd52; + 7'd105 : r_o = 9'd49; + 7'd106 : r_o = 9'd47; + 7'd107 : r_o = 9'd45; + 7'd108 : r_o = 9'd42; + 7'd109 : r_o = 9'd40; + 7'd110 : r_o = 9'd38; + 7'd111 : r_o = 9'd35; + 7'd112 : r_o = 9'd33; + 7'd113 : r_o = 9'd31; + 7'd114 : r_o = 9'd29; + 7'd115 : r_o = 9'd26; + 7'd116 : r_o = 9'd24; + 7'd117 : r_o = 9'd22; + 7'd118 : r_o = 9'd20; + 7'd119 : r_o = 9'd18; + 7'd120 : r_o = 9'd15; + 7'd121 : r_o = 9'd13; + 7'd122 : r_o = 9'd11; + 7'd123 : r_o = 9'd9; + 7'd124 : r_o = 9'd7; + 7'd125 : r_o = 9'd5; + 7'd126 : r_o = 9'd3; + default: r_o = 9'd1; + endcase // LUT for initial approximation of reciprocal + end // always +endmodule diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_rnd.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_rnd.v new file mode 100644 index 0000000..e391e49 --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_rnd.v @@ -0,0 +1,438 @@ +///////////////////////////////////////////////////////////////////// +// // +// pfpu32_rnd // +// 32-bit common rounding module for FPU // +// // +// This file is part of the mor1kx project // +// https://github.com/openrisc/mor1kx // +// // +// Author: Andrey Bacherov // +// avbacherov@opencores.org // +// // +///////////////////////////////////////////////////////////////////// +// // +// Copyright (C) 2014 Andrey Bacherov // +// avbacherov@opencores.org // +// // +// This source file may be used and distributed without // +// restriction provided that this copyright statement is not // +// removed from the file and that any derivative work contains // +// the original copyright notice and the associated disclaimer. // +// // +// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY // +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED // +// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS // +// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR // +// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, // +// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES // +// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE // +// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR // +// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF // +// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT // +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT // +// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE // +// POSSIBILITY OF SUCH DAMAGE. // +// // +///////////////////////////////////////////////////////////////////// + +`include "mor1kx-defines.v" + +module pfpu32_rnd +#( + parameter OPTION_FTOI_ROUNDING = "CPP" // "CPP" / "IEEE" +) +( + // clocks, resets and other controls + input clk, + input rst, + input flush_i, // flush pipe + input adv_i, // advance pipe + input [1:0] rmode_i, // rounding mode + // input from add/sub + input add_rdy_i, // add/sub is ready + input add_sign_i, // add/sub signum + input add_sub_0_i, // flag that actual substruction is performed and result is zero + input [4:0] add_shl_i, // do left shift in align stage + input [9:0] add_exp10shl_i, // exponent for left shift align + input [9:0] add_exp10sh0_i, // exponent for no shift in align + input [27:0] add_fract28_i, // fractional with appended {r,s} bits + input add_inv_i, // add/sub invalid operation flag + input add_inf_i, // add/sub infinity input + input add_snan_i, // add/sub signaling NaN input + input add_qnan_i, // add/sub quiet NaN input + input add_anan_sign_i, // add/sub signum for output nan + // input from mul + input mul_rdy_i, // mul is ready + input mul_sign_i, // mul signum + input [4:0] mul_shr_i, // do right shift in align stage + input [9:0] mul_exp10shr_i, // exponent for right shift align + input mul_shl_i, // do left shift in align stage + input [9:0] mul_exp10shl_i, // exponent for left shift align + input [9:0] mul_exp10sh0_i, // exponent for no shift in align + input [27:0] mul_fract28_i, // fractional with appended {r,s} bits + input mul_inv_i, // mul invalid operation flag + input mul_inf_i, // mul infinity input + input mul_snan_i, // mul signaling NaN input + input mul_qnan_i, // mul quiet NaN input + input mul_anan_sign_i, // mul signum for output nan + // input from div + input div_op_i, // MUL/DIV output is division + input div_sign_rmnd_i, // signum or reminder for IEEE compliant rounding + input div_dbz_i, // division by zero flag + // input from i2f + input i2f_rdy_i, // i2f is ready + input i2f_sign_i, // i2f signum + input [3:0] i2f_shr_i, + input [7:0] i2f_exp8shr_i, + input [4:0] i2f_shl_i, + input [7:0] i2f_exp8shl_i, + input [7:0] i2f_exp8sh0_i, + input [31:0] i2f_fract32_i, + // input from f2i + input f2i_rdy_i, // f2i is ready + input f2i_sign_i, // f2i signum + input [23:0] f2i_int24_i, // f2i fractional + input [4:0] f2i_shr_i, // f2i required shift right value + input [3:0] f2i_shl_i, // f2i required shift left value + input f2i_ovf_i, // f2i overflow flag + input f2i_snan_i, // f2i signaling NaN input + // input from cmp + input cmp_rdy_i, // cmp is ready + input cmp_res_i, // cmp result + input cmp_inv_i, // cmp invalid flag + input cmp_inf_i, // cmp infinity flag + // outputs + // arithmetic part's outputs + output reg [31:0] fpu_result_o, + output reg fpu_arith_valid_o, + // comparator's outputs + output reg fpu_cmp_flag_o, + output reg fpu_cmp_valid_o, + // common output + output reg [`OR1K_FPCSR_WIDTH-1:0] fpcsr_o +); + + localparam INF = 31'b1111111100000000000000000000000; + localparam QNAN = 31'b1111111110000000000000000000000; + localparam SNAN = 31'b1111111101111111111111111111111; + + // rounding mode isn't require pipelinization + wire rm_nearest = (rmode_i==2'b00); + wire rm_to_zero = (rmode_i==2'b01); + wire rm_to_infp = (rmode_i==2'b10); + wire rm_to_infm = (rmode_i==2'b11); + + /* + Any stage's output is registered. + Definitions: + s??o_name - "S"tage number "??", "O"utput + s??t_name - "S"tage number "??", "T"emporary (internally) + */ + + /* Stage #1: common align */ + + wire s1t_sign; + wire [34:0] s1t_fract35; + wire s1t_inv; + wire s1t_inf; + wire s1t_snan; + wire s1t_qnan; + wire s1t_anan_sign; + wire [4:0] s1t_shr; + wire [4:0] s1t_shl; + + // multiplexer for signums and flags + wire s1t_add_sign = add_sub_0_i ? rm_to_infm : add_sign_i; + + assign {s1t_sign,s1t_inv,s1t_inf,s1t_snan,s1t_qnan,s1t_anan_sign} = + ({6{add_rdy_i}} & {s1t_add_sign,add_inv_i,add_inf_i,add_snan_i,add_qnan_i,add_anan_sign_i}) | + ({6{mul_rdy_i}} & {mul_sign_i,mul_inv_i,mul_inf_i,mul_snan_i,mul_qnan_i,mul_anan_sign_i}) | + ({6{f2i_rdy_i}} & {f2i_sign_i,1'b0,1'b0,f2i_snan_i,1'b0,f2i_sign_i}) | + ({6{i2f_rdy_i}} & {i2f_sign_i,1'b0,1'b0,1'b0,1'b0,1'b0}); + + // multiplexer for fractionals + assign s1t_fract35 = + ({35{add_rdy_i}} & {7'd0, add_fract28_i}) | + ({35{mul_rdy_i}} & {7'd0, mul_fract28_i}) | + ({35{f2i_rdy_i}} & {8'd0, f2i_int24_i, 3'd0}) | + ({35{i2f_rdy_i}} & {i2f_fract32_i,3'd0}); + + // overflow bit for add/mul + wire s1t_addmul_carry = (add_rdy_i & add_fract28_i[27]) | + (mul_rdy_i & mul_fract28_i[27]); + + // multiplexer for shift values + wire [4:0] s1t_shr_t; + assign {s1t_shr_t, s1t_shl} = + ({10{add_rdy_i}} & {5'd0, add_shl_i}) | + ({10{mul_rdy_i}} & {mul_shr_i, {4'd0,mul_shl_i}}) | + ({10{f2i_rdy_i}} & {f2i_shr_i, {1'b0,f2i_shl_i}}) | + ({10{i2f_rdy_i}} & {{1'b0,i2f_shr_i}, i2f_shl_i}); + + assign s1t_shr = (|s1t_shr_t) ? s1t_shr_t : {4'd0,s1t_addmul_carry}; + + // align + wire [34:0] s1t_fract35sh = + (|s1t_shr) ? (s1t_fract35 >> s1t_shr) : + (s1t_fract35 << s1t_shl); + + // update sticky bit for right shift case. + // maximum right shift value is : + // 27 for mul/div + // 8 for i2f + reg s1r_sticky; + always @(s1t_fract35 or s1t_shr) begin + case (s1t_shr) + 5'd0 : s1r_sticky = |s1t_fract35[ 1:0]; + 5'd1 : s1r_sticky = |s1t_fract35[ 2:0]; + 5'd2 : s1r_sticky = |s1t_fract35[ 3:0]; + 5'd3 : s1r_sticky = |s1t_fract35[ 4:0]; + 5'd4 : s1r_sticky = |s1t_fract35[ 5:0]; + 5'd5 : s1r_sticky = |s1t_fract35[ 6:0]; + 5'd6 : s1r_sticky = |s1t_fract35[ 7:0]; + 5'd7 : s1r_sticky = |s1t_fract35[ 8:0]; + 5'd8 : s1r_sticky = |s1t_fract35[ 9:0]; + 5'd9 : s1r_sticky = |s1t_fract35[10:0]; + 5'd10 : s1r_sticky = |s1t_fract35[11:0]; + 5'd11 : s1r_sticky = |s1t_fract35[12:0]; + 5'd12 : s1r_sticky = |s1t_fract35[13:0]; + 5'd13 : s1r_sticky = |s1t_fract35[14:0]; + 5'd14 : s1r_sticky = |s1t_fract35[15:0]; + 5'd15 : s1r_sticky = |s1t_fract35[16:0]; + 5'd16 : s1r_sticky = |s1t_fract35[17:0]; + 5'd17 : s1r_sticky = |s1t_fract35[18:0]; + 5'd18 : s1r_sticky = |s1t_fract35[19:0]; + 5'd19 : s1r_sticky = |s1t_fract35[20:0]; + 5'd20 : s1r_sticky = |s1t_fract35[21:0]; + 5'd21 : s1r_sticky = |s1t_fract35[22:0]; + 5'd22 : s1r_sticky = |s1t_fract35[23:0]; + 5'd23 : s1r_sticky = |s1t_fract35[24:0]; + 5'd24 : s1r_sticky = |s1t_fract35[25:0]; + 5'd25 : s1r_sticky = |s1t_fract35[26:0]; + default: s1r_sticky = |s1t_fract35[27:0]; + endcase + end // always + + // update sticky bit for left shift case. + reg s1l_sticky; + always @(s1t_fract35 or s1t_shl) begin + case (s1t_shl) + 5'd0 : s1l_sticky = |s1t_fract35[1:0]; + 5'd1 : s1l_sticky = s1t_fract35[0]; + default: s1l_sticky = 1'b0; + endcase + end // always + + wire s1t_sticky = (|s1t_shr) ? s1r_sticky : s1l_sticky; + + // two stage multiplexer for exponents + wire [9:0] s1t_exp10shr; + wire [9:0] s1t_exp10shl; + wire [9:0] s1t_exp10sh0; + assign {s1t_exp10shr, s1t_exp10shl, s1t_exp10sh0} = + ({30{add_rdy_i}} & {add_exp10sh0_i, add_exp10shl_i, add_exp10sh0_i}) | + ({30{mul_rdy_i}} & {mul_exp10shr_i, mul_exp10shl_i, mul_exp10sh0_i}) | + ({30{f2i_rdy_i}} & {10'd0, 10'd0, 10'd0}) | + ({30{i2f_rdy_i}} & {{2'd0,i2f_exp8shr_i},{2'd0,i2f_exp8shl_i},{2'd0,i2f_exp8sh0_i}}); + + wire [9:0] s1t_exp10 = + (|s1t_shr_t) ? s1t_exp10shr : + (~(|s1t_shl)) ? (s1t_exp10sh0 + {9'd0,s1t_addmul_carry}) : + s1t_exp10shl; + + // output of align stage + reg s1o_sign; + reg [9:0] s1o_exp10; + reg [31:0] s1o_fract32; + reg [1:0] s1o_rs; + reg s1o_inv; + reg s1o_inf; + reg s1o_snan_i; + reg s1o_qnan_i; + reg s1o_anan_sign_i; + reg s1o_div_op, s1o_div_sign_rmnd, s1o_div_dbz; + reg s1o_f2i_ovf, s1o_f2i; + // registering + always @(posedge clk) begin + if(adv_i) begin + s1o_sign <= s1t_sign; + s1o_exp10 <= s1t_exp10; + s1o_fract32 <= s1t_fract35sh[34:3]; + s1o_rs <= {s1t_fract35sh[2],s1t_sticky}; + // various flags: + s1o_inv <= s1t_inv; + s1o_inf <= s1t_inf; + s1o_snan_i <= s1t_snan; + s1o_qnan_i <= s1t_qnan; + s1o_anan_sign_i <= s1t_anan_sign; + // DIV specials + s1o_div_op <= mul_rdy_i & div_op_i; + s1o_div_sign_rmnd <= div_sign_rmnd_i; + s1o_div_dbz <= div_dbz_i; + // I2F specials + s1o_f2i_ovf <= f2i_ovf_i; + s1o_f2i <= f2i_rdy_i; + end // advance + end // posedge clock + + // ready is special case + reg s1o_ready; + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + s1o_ready <= 1'b0; + else if(flush_i) + s1o_ready <= 1'b0; + else if(adv_i) + s1o_ready <= (add_rdy_i | mul_rdy_i | f2i_rdy_i | i2f_rdy_i); + end // posedge clock + + + /* Stage #2: rounding */ + + + wire s2t_dbz = s1o_div_dbz; + + wire s2t_g = s1o_fract32[0]; + wire s2t_r = s1o_rs[1]; + wire s2t_s = s1o_rs[0]; + wire s2t_lost = s2t_r | s2t_s; + + wire s2t_rnd_up = (rm_nearest & s2t_r & s2t_s) | + (rm_nearest & s2t_g & s2t_r & (~s2t_s)) | + (rm_to_infp & (~s1o_sign) & s2t_lost) | + (rm_to_infm & s1o_sign & s2t_lost); + + // IEEE compliance rounding for qutient + wire s2t_div_rnd_up = + (rm_nearest & s2t_r & s2t_s & (~s1o_div_sign_rmnd)) | + ( ((rm_to_infp & (~s1o_sign)) | (rm_to_infm & s1o_sign)) & + ((s2t_r & s2t_s) | ((~s2t_r) & s2t_s & (~s1o_div_sign_rmnd))) ); + wire s2t_div_rnd_dn = (~s2t_r) & s2t_s & s1o_div_sign_rmnd & + ( (rm_to_infp & s1o_sign) | + (rm_to_infm & (~s1o_sign)) | + rm_to_zero ); + + // set resulting direction of rounding + // a) normalized quotient is rounded by quotient related rules + // b) de-normalized quotient is rounded by common rules + wire s2t_rnd_n_qtnt = s1o_div_op & s1o_fract32[23]; // normalized quotient + wire s2t_set_rnd_up = s2t_rnd_n_qtnt ? s2t_div_rnd_up : s2t_rnd_up; + wire s2t_set_rnd_dn = s2t_rnd_n_qtnt ? s2t_div_rnd_dn : 1'b0; + + // define value for rounding adder + wire [31:0] s2t_rnd_v32 = + s2t_set_rnd_up ? 32'd1 : // +1 + s2t_set_rnd_dn ? 32'hFFFFFFFF : // -1 + 32'd0; // no rounding + // rounded fractional + wire [31:0] s2t_fract32_rnd = s1o_fract32 + s2t_rnd_v32; + + + // floating point output + wire s2t_f32_shr = s2t_fract32_rnd[24]; + // update exponent and fraction + wire [9:0] s2t_f32_exp10 = s1o_exp10 + {9'd0,s2t_f32_shr}; + wire [23:0] s2t_f32_fract24 = s2t_f32_shr ? s2t_fract32_rnd[24:1] : + s2t_fract32_rnd[23:0]; + // denormalized or zero + wire s2t_f32_fract24_dn = ~s2t_f32_fract24[23]; + + + // integer output (f2i) + wire s2t_i32_carry_rnd; + wire s2t_i32_inv; + wire [31:0] s2t_i32_int32; + generate + /* verilator lint_on WIDTH */ + if (OPTION_FTOI_ROUNDING == "CPP") begin : ftoi_cpp_truncate + /* verilator lint_off WIDTH */ + assign s2t_i32_carry_rnd = s1o_fract32[31]; + assign s2t_i32_inv = ((~s1o_sign) & s2t_i32_carry_rnd) | s1o_f2i_ovf; + // two's complement for negative number + assign s2t_i32_int32 = (s1o_fract32 ^ {32{s1o_sign}}) + {31'd0,s1o_sign}; + end + else begin : ftoi_ieee_rounding + assign s2t_i32_carry_rnd = s2t_fract32_rnd[31]; + assign s2t_i32_inv = ((~s1o_sign) & s2t_i32_carry_rnd) | s1o_f2i_ovf; + // two's complement for negative number + assign s2t_i32_int32 = (s2t_fract32_rnd ^ {32{s1o_sign}}) + {31'd0,s1o_sign}; + end + endgenerate + // zero + wire s2t_i32_int32_00 = (~s2t_i32_inv) & (~(|s2t_i32_int32)); + // int32 output + wire [31:0] s2t_i32_opc; + assign s2t_i32_opc = + s2t_i32_inv ? (32'h7fffffff ^ {32{s1o_sign}}) : s2t_i32_int32; + + + // Generate result and flags + wire s2t_ine, s2t_ovf, s2t_inf, s2t_unf, s2t_zer; + wire [31:0] s2t_opc; + assign {s2t_opc,s2t_ine,s2t_ovf,s2t_inf,s2t_unf,s2t_zer} = + // f2i + s1o_f2i ? // ine ovf inf unf zer + {s2t_i32_opc,s2t_lost,1'b0,1'b0,1'b0,s2t_i32_int32_00} : + // qnan output + (s1o_snan_i | s1o_qnan_i) ? // ine ovf inf unf zer + {{s1o_anan_sign_i,QNAN}, 1'b0,1'b0,1'b0,1'b0,1'b0} : + // snan output + s1o_inv ? // ine ovf inf unf zer + {{s1o_sign,SNAN},1'b0,1'b0,1'b0,1'b0,1'b0} : + // overflow and infinity + ((s2t_f32_exp10 > 10'd254) | s1o_inf | s2t_dbz) ? // ine ovf inf unf zer + {{s1o_sign,INF},((s2t_lost | (~s1o_inf)) & (~s2t_dbz)),((~s1o_inf) & (~s2t_dbz)),1'b1,1'b0,1'b0} : + // denormalized or zero + (s2t_f32_fract24_dn) ? // ine ovf inf + {{s1o_sign,8'd0,s2t_f32_fract24[22:0]},s2t_lost,1'b0,1'b0, + // unf zer + (s2t_lost & s2t_f32_fract24_dn),~(|s2t_f32_fract24)} : + // normal result ine ovf inf unf zer + {{s1o_sign,s2t_f32_exp10[7:0],s2t_f32_fract24[22:0]},s2t_lost,1'b0,1'b0,1'b0,1'b0}; + + + // Output Register + always @(posedge clk `OR_ASYNC_RST) begin + if (rst) begin + // arithmetic results + fpu_result_o <= 32'd0; + fpu_arith_valid_o <= 1'b0; + // comparison specials + fpu_cmp_flag_o <= 1'b0; + fpu_cmp_valid_o <= 1'b0; + // exeptions + fpcsr_o <= {`OR1K_FPCSR_WIDTH{1'b0}}; + end + else if(flush_i) begin + // arithmetic results + fpu_result_o <= 32'd0; + fpu_arith_valid_o <= 1'b0; + // comparison specials + fpu_cmp_flag_o <= 1'b0; + fpu_cmp_valid_o <= 1'b0; + // exeptions + fpcsr_o <= {`OR1K_FPCSR_WIDTH{1'b0}}; + end + else if(adv_i) begin + // arithmetic results + fpu_result_o <= s2t_opc; + fpu_arith_valid_o <= s1o_ready; + // comparison specials + fpu_cmp_flag_o <= cmp_res_i; + fpu_cmp_valid_o <= cmp_rdy_i; + // exeptions + fpcsr_o[`OR1K_FPCSR_OVF] <= s2t_ovf; + fpcsr_o[`OR1K_FPCSR_UNF] <= s2t_unf; + fpcsr_o[`OR1K_FPCSR_SNF] <= s1o_inv | (s1o_snan_i & s1o_f2i); + fpcsr_o[`OR1K_FPCSR_QNF] <= s1o_qnan_i; + fpcsr_o[`OR1K_FPCSR_ZF] <= s2t_zer; + fpcsr_o[`OR1K_FPCSR_IXF] <= s2t_ine; + fpcsr_o[`OR1K_FPCSR_IVF] <= (s1o_inv | (s2t_i32_inv & s1o_f2i) | s1o_snan_i) | + (cmp_inv_i & cmp_rdy_i); + fpcsr_o[`OR1K_FPCSR_INF] <= s2t_inf | + (cmp_inf_i & cmp_rdy_i); + fpcsr_o[`OR1K_FPCSR_DZF] <= s2t_dbz; + end + end // posedge clock + +endmodule // pfpu32_rnd diff --git a/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_top.v b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_top.v new file mode 100644 index 0000000..66b60ea --- /dev/null +++ b/pythondata_cpu_mor1kx/verilog/rtl/verilog/pfpu32/pfpu32_top.v @@ -0,0 +1,450 @@ +///////////////////////////////////////////////////////////////////// +//// //// +//// pfpu32_top //// +//// 32-bit floating point top level //// +//// //// +//// Author: Andrey Bacherov //// +//// avbacherov@opencores.org //// +//// //// +///////////////////////////////////////////////////////////////////// +//// //// +//// Copyright (C) 2014 Andrey Bacherov //// +//// avbacherov@opencores.org //// +//// //// +//// This source file may be used and distributed without //// +//// restriction provided that this copyright statement is not //// +//// removed from the file and that any derivative work contains //// +//// the original copyright notice and the associated disclaimer.//// +//// //// +//// THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY //// +//// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED //// +//// TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS //// +//// FOR A PARTICULAR PURPOSE. IN NO EVENT SHALL THE AUTHOR //// +//// OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, //// +//// INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES //// +//// (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE //// +//// GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR //// +//// BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF //// +//// LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT //// +//// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT //// +//// OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE //// +//// POSSIBILITY OF SUCH DAMAGE. //// +//// //// +///////////////////////////////////////////////////////////////////// + +// fpu operations: +// ========================== +// 0000 = add, +// 0001 = substract, +// 0010 = multiply, +// 0011 = divide, +// 0100 = i2f +// 0101 = f2i +// 0110 = unused (rem) +// 0111 = reserved +// 1xxx = comparison + +`include "mor1kx-defines.v" + +module pfpu32_top +#( + parameter OPTION_OPERAND_WIDTH = 32, + parameter OPTION_FTOI_ROUNDING = "CPP" // "CPP" / "IEEE" +) +( + input clk, + input rst, + input flush_i, + input padv_decode_i, + input padv_execute_i, + input [`OR1K_FPUOP_WIDTH-1:0] op_fpu_i, + input [`OR1K_FPCSR_RM_SIZE-1:0] round_mode_i, + input [OPTION_OPERAND_WIDTH-1:0] rfa_i, + input [OPTION_OPERAND_WIDTH-1:0] rfb_i, + output [OPTION_OPERAND_WIDTH-1:0] fpu_result_o, + output fpu_arith_valid_o, + output fpu_cmp_flag_o, + output fpu_cmp_valid_o, + output [`OR1K_FPCSR_WIDTH-1:0] fpcsr_o +); + +// MSB (set by decode stage) indicates FPU instruction +// Get rid of top bit - is FPU op valid bit +wire is_op_fpu = op_fpu_i[`OR1K_FPUOP_WIDTH-1]; +wire [`OR1K_FPUOP_WIDTH-1:0] op_fpu = {1'b0,op_fpu_i[`OR1K_FPUOP_WIDTH-2:0]}; +wire [2:0] op_arith_conv = op_fpu_i[2:0]; // alias +wire a_cmp = op_fpu_i[3]; // alias for compare bit of fpu's opcode + +// advance FPU units +wire padv_fpu_units = padv_execute_i | + ((~fpu_arith_valid_o) & (~fpu_cmp_valid_o)); + +// start logic +reg new_data; +always @(posedge clk `OR_ASYNC_RST) begin + if (rst) + new_data <= 1'b0; + else if(flush_i) + new_data <= 1'b0; + else if(padv_decode_i) + new_data <= 1'b1; + else if(padv_fpu_units) + new_data <= 1'b0; +end // posedge clock + +wire new_fpu_data = new_data & is_op_fpu; + + +// analysis of input values +// split input a +wire in_signa = rfa_i[31]; +wire [7:0] in_expa = rfa_i[30:23]; +wire [22:0] in_fracta = rfa_i[22:0]; +// detect infinity a +wire in_expa_ff = &in_expa; +wire in_infa = in_expa_ff & (~(|in_fracta)); +// signaling NaN: exponent is 8hff, [22] is zero, +// rest of fract is non-zero +// quiet NaN: exponent is 8hff, [22] is 1 +wire in_snan_a = in_expa_ff & (~in_fracta[22]) & (|in_fracta[21:0]); +wire in_qnan_a = in_expa_ff & in_fracta[22]; +// denormalized/zero of a +wire in_opa_0 = ~(|rfa_i[30:0]); +wire in_opa_dn = (~(|in_expa)) & (|in_fracta); + +// split input b +wire in_signb = rfb_i[31]; +wire [7:0] in_expb = rfb_i[30:23]; +wire [22:0] in_fractb = rfb_i[22:0]; +// detect infinity b +wire in_expb_ff = &in_expb; +wire in_infb = in_expb_ff & (~(|in_fractb)); +// detect NaNs in b +wire in_snan_b = in_expb_ff & (~in_fractb[22]) & (|in_fractb[21:0]); +wire in_qnan_b = in_expb_ff & in_fractb[22]; +// denormalized/zero of a +wire in_opb_0 = ~(|rfb_i[30:0]); +wire in_opb_dn = (~(|in_expb)) & (|in_fractb); + +// detection of some exceptions +// a nan input -> qnan output +wire in_snan = in_snan_a | in_snan_b; +wire in_qnan = in_qnan_a | in_qnan_b; +// sign of output nan +wire in_anan_sign = (in_snan_a | in_qnan_a) ? in_signa : + in_signb; + +// restored exponents +wire [9:0] in_exp10a = {2'd0,in_expa[7:1],(in_expa[0] | in_opa_dn)}; +wire [9:0] in_exp10b = {2'd0,in_expb[7:1],(in_expb[0] | in_opb_dn)}; +// restored fractionals +wire [23:0] in_fract24a = {((~in_opa_dn) & (~in_opa_0)),in_fracta}; +wire [23:0] in_fract24b = {((~in_opb_dn) & (~in_opb_0)),in_fractb}; + + +// comparator +// inputs & outputs +wire op_cmp = a_cmp & + new_fpu_data; +wire addsub_agtb_o, addsub_aeqb_o; +wire cmp_result, cmp_ready, + cmp_inv, cmp_inf; +// module istance +pfpu32_fcmp u_f32_cmp +( + .fpu_op_is_comp_i(op_cmp), + .generic_cmp_opc_i(op_fpu[`OR1K_FPUOP_GENERIC_CMP_SELECT]), + .unordered_cmp_bit_i(op_fpu[`OR1K_FPUOP_UNORDERED_CMP_BIT]), + // operand 'a' related inputs + .signa_i(in_signa), + .exp10a_i(in_exp10a), + .fract24a_i(in_fract24a), + .snana_i(in_snan_a), + .qnana_i(in_qnan_a), + .infa_i(in_infa), + .zeroa_i(in_opa_0), + // operand 'b' related inputs + .signb_i(in_signb), + .exp10b_i(in_exp10b), + .fract24b_i(in_fract24b), + .snanb_i(in_snan_b), + .qnanb_i(in_qnan_b), + .infb_i(in_infb), + .zerob_i(in_opb_0), + // support addsub + .addsub_agtb_o(addsub_agtb_o), + .addsub_aeqb_o(addsub_aeqb_o), + // outputs + .cmp_flag_o(cmp_result), + .inv_o(cmp_inv), + .inf_o(cmp_inf), + .ready_o(cmp_ready) +); + + +// addition / substraction +// inputs & outputs +wire the_sub = (op_arith_conv == 3'd1); +wire op_add = (~a_cmp) & ((op_arith_conv == 3'd0) | the_sub); +wire add_start = op_add & + new_fpu_data; +wire add_rdy_o; // add/sub is ready +wire add_sign_o; // add/sub signum +wire add_sub_0_o; // flag that actual substruction is performed and result is zero +wire [4:0] add_shl_o; // do left shift in align stage +wire [9:0] add_exp10shl_o; // exponent for left shift align +wire [9:0] add_exp10sh0_o; // exponent for no shift in align +wire [27:0] add_fract28_o; // fractional with appended {r,s} bits +wire add_inv_o; // add/sub invalid operation flag +wire add_inf_o; // add/sub infinity output reg +wire add_snan_o; // add/sub signaling NaN output reg +wire add_qnan_o; // add/sub quiet NaN output reg +wire add_anan_sign_o; // add/sub signum for output nan +// module istance +pfpu32_addsub u_f32_addsub +( + .clk (clk), + .rst (rst), + .flush_i (flush_i), // flushe pipe + .adv_i (padv_fpu_units), // advance pipe + .start_i (add_start), + .is_sub_i (the_sub), // 1: substruction, 0: addition + // input 'a' related values + .signa_i (in_signa), + .exp10a_i (in_exp10a), + .fract24a_i (in_fract24a), + .infa_i (in_infa), + // input 'b' related values + .signb_i (in_signb), + .exp10b_i (in_exp10b), + .fract24b_i (in_fract24b), + .infb_i (in_infb), + // 'a'/'b' related + .snan_i (in_snan), + .qnan_i (in_qnan), + .anan_sign_i (in_anan_sign), + .addsub_agtb_i (addsub_agtb_o), + .addsub_aeqb_i (addsub_aeqb_o), + // outputs + .add_rdy_o (add_rdy_o), // add/sub is ready + .add_sign_o (add_sign_o), // add/sub signum + .add_sub_0_o (add_sub_0_o), // flag that actual substruction is performed and result is zero + .add_shl_o (add_shl_o), // do left shift in align stage + .add_exp10shl_o (add_exp10shl_o), // exponent for left shift align + .add_exp10sh0_o (add_exp10sh0_o), // exponent for no shift in align + .add_fract28_o (add_fract28_o), // fractional with appended {r,s} bits + .add_inv_o (add_inv_o), // add/sub invalid operation flag + .add_inf_o (add_inf_o), // add/sub infinity output reg + .add_snan_o (add_snan_o), // add/sub signaling NaN output reg + .add_qnan_o (add_qnan_o), // add/sub quiet NaN output reg + .add_anan_sign_o (add_anan_sign_o) // add/sub signum for output nan +); + +// MUL/DIV combined pipeline +// inputs & outputs +wire op_mul = (~a_cmp) & (op_arith_conv == 3'd2); +wire op_div = (~a_cmp) & (op_arith_conv == 3'd3); +wire mul_start = (op_mul | op_div) & + new_fpu_data; +// MUL/DIV common outputs +wire mul_rdy_o; // mul is ready +wire mul_sign_o; // mul signum +wire [4:0] mul_shr_o; // do right shift in align stage +wire [9:0] mul_exp10shr_o; // exponent for right shift align +wire mul_shl_o; // do left shift in align stage +wire [9:0] mul_exp10shl_o; // exponent for left shift align +wire [9:0] mul_exp10sh0_o; // exponent for no shift in align +wire [27:0] mul_fract28_o; // fractional with appended {r,s} bits +wire mul_inv_o; // mul invalid operation flag +wire mul_inf_o; // mul infinity output reg +wire mul_snan_o; // mul signaling NaN output reg +wire mul_qnan_o; // mul quiet NaN output reg +wire mul_anan_sign_o; // mul signum for output nan +// DIV additional outputs +wire div_op_o; // operation is division +wire div_sign_rmnd_o; // signum or reminder for IEEE compliant rounding +wire div_dbz_o; // division by zero flag +// module istance +pfpu32_muldiv u_f32_muldiv +( + .clk (clk), + .rst (rst), + .flush_i (flush_i), // flushe pipe + .adv_i (padv_fpu_units), // advance pipe + .start_i (mul_start), + .is_div_i (op_div), + // input 'a' related values + .signa_i (in_signa), + .exp10a_i (in_exp10a), + .fract24a_i (in_fract24a), + .infa_i (in_infa), + .zeroa_i (in_opa_0), + // input 'b' related values + .signb_i (in_signb), + .exp10b_i (in_exp10b), + .fract24b_i (in_fract24b), + .infb_i (in_infb), + .zerob_i (in_opb_0), + // 'a'/'b' related + .snan_i (in_snan), + .qnan_i (in_qnan), + .anan_sign_i (in_anan_sign), + // MUL/DIV common outputs + .muldiv_rdy_o (mul_rdy_o), // mul is ready + .muldiv_sign_o (mul_sign_o), // mul signum + .muldiv_shr_o (mul_shr_o), // do right shift in align stage + .muldiv_exp10shr_o (mul_exp10shr_o), // exponent for right shift align + .muldiv_shl_o (mul_shl_o), // do left shift in align stage + .muldiv_exp10shl_o (mul_exp10shl_o), // exponent for left shift align + .muldiv_exp10sh0_o (mul_exp10sh0_o), // exponent for no shift in align + .muldiv_fract28_o (mul_fract28_o), // fractional with appended {r,s} bits + .muldiv_inv_o (mul_inv_o), // mul invalid operation flag + .muldiv_inf_o (mul_inf_o), // mul infinity output reg + .muldiv_snan_o (mul_snan_o), // mul signaling NaN output reg + .muldiv_qnan_o (mul_qnan_o), // mul quiet NaN output reg + .muldiv_anan_sign_o (mul_anan_sign_o), // mul signum for output nan + // DIV additional outputs + .div_op_o(div_op_o), // operation is division + .div_sign_rmnd_o(div_sign_rmnd_o), // signum of reminder for IEEE compliant rounding + .div_dbz_o(div_dbz_o) // division by zero flag +); + +// convertor +// i2f signals +wire op_i2f_cnv = (~a_cmp) & (op_arith_conv == 3'd4); +wire i2f_start = op_i2f_cnv & + new_fpu_data; +wire i2f_rdy_o; // i2f is ready +wire i2f_sign_o; // i2f signum +wire [3:0] i2f_shr_o; +wire [7:0] i2f_exp8shr_o; +wire [4:0] i2f_shl_o; +wire [7:0] i2f_exp8shl_o; +wire [7:0] i2f_exp8sh0_o; +wire [31:0] i2f_fract32_o; +// i2f module instance +pfpu32_i2f u_i2f_cnv +( + .clk (clk), + .rst (rst), + .flush_i (flush_i), // flush pipe + .adv_i (padv_fpu_units), // advance pipe + .start_i (i2f_start), // start conversion + .opa_i (rfa_i), + .i2f_rdy_o (i2f_rdy_o), // i2f is ready + .i2f_sign_o (i2f_sign_o), // i2f signum + .i2f_shr_o (i2f_shr_o), + .i2f_exp8shr_o (i2f_exp8shr_o), + .i2f_shl_o (i2f_shl_o), + .i2f_exp8shl_o (i2f_exp8shl_o), + .i2f_exp8sh0_o (i2f_exp8sh0_o), + .i2f_fract32_o (i2f_fract32_o) +); +// f2i signals +wire op_f2i_cnv = (~a_cmp) & (op_arith_conv == 3'd5); +wire f2i_start = op_f2i_cnv & + new_fpu_data; +wire f2i_rdy_o; // f2i is ready +wire f2i_sign_o; // f2i signum +wire [23:0] f2i_int24_o; // f2i fractional +wire [4:0] f2i_shr_o; // f2i required shift right value +wire [3:0] f2i_shl_o; // f2i required shift left value +wire f2i_ovf_o; // f2i overflow flag +wire f2i_snan_o; // f2i signaling NaN output reg +// f2i module instance +pfpu32_f2i u_f2i_cnv +( + .clk (clk), + .rst (rst), + .flush_i (flush_i), // flush pipe + .adv_i (padv_fpu_units), // advance pipe + .start_i (f2i_start), // start conversion + .signa_i (in_signa), // input 'a' related values + .exp10a_i (in_exp10a), + .fract24a_i (in_fract24a), + .snan_i (in_snan), // 'a'/'b' related + .qnan_i (in_qnan), + .f2i_rdy_o (f2i_rdy_o), // f2i is ready + .f2i_sign_o (f2i_sign_o), // f2i signum + .f2i_int24_o (f2i_int24_o), // f2i fractional + .f2i_shr_o (f2i_shr_o), // f2i required shift right value + .f2i_shl_o (f2i_shl_o), // f2i required shift left value + .f2i_ovf_o (f2i_ovf_o), // f2i overflow flag + .f2i_snan_o (f2i_snan_o) // f2i signaling NaN output reg +); + + +// multiplexing and rounding +pfpu32_rnd +#( + .OPTION_FTOI_ROUNDING (OPTION_FTOI_ROUNDING) // rounding instance +) +u_f32_rnd +( + // clocks, resets and other controls + .clk (clk), + .rst (rst), + .flush_i (flush_i), // flush pipe + .adv_i (padv_fpu_units), // advance pipe + .rmode_i (round_mode_i), // rounding mode + // from add/sub + .add_rdy_i (add_rdy_o), // add/sub is ready + .add_sign_i (add_sign_o), // add/sub signum + .add_sub_0_i (add_sub_0_o), // flag that actual substruction is performed and result is zero + .add_shl_i (add_shl_o), // do left shift in align stage + .add_exp10shl_i (add_exp10shl_o), // exponent for left shift align + .add_exp10sh0_i (add_exp10sh0_o), // exponent for no shift in align + .add_fract28_i (add_fract28_o), // fractional with appended {r,s} bits + .add_inv_i (add_inv_o), // add/sub invalid operation flag + .add_inf_i (add_inf_o), // add/sub infinity + .add_snan_i (add_snan_o), // add/sub signaling NaN + .add_qnan_i (add_qnan_o), // add/sub quiet NaN + .add_anan_sign_i (add_anan_sign_o), // add/sub signum for output nan + // from mul + .mul_rdy_i (mul_rdy_o), // mul is ready + .mul_sign_i (mul_sign_o), // mul signum + .mul_shr_i (mul_shr_o), // do right shift in align stage + .mul_exp10shr_i (mul_exp10shr_o), // exponent for right shift align + .mul_shl_i (mul_shl_o), // do left shift in align stage + .mul_exp10shl_i (mul_exp10shl_o), // exponent for left shift align + .mul_exp10sh0_i (mul_exp10sh0_o), // exponent for no shift in align + .mul_fract28_i (mul_fract28_o), // fractional with appended {r,s} bits + .mul_inv_i (mul_inv_o), // mul invalid operation flag + .mul_inf_i (mul_inf_o), // mul infinity + .mul_snan_i (mul_snan_o), // mul signaling NaN + .mul_qnan_i (mul_qnan_o), // mul quiet NaN + .mul_anan_sign_i (mul_anan_sign_o), // mul signum for output nan + .div_op_i (div_op_o), // MUL/DIV output is division + .div_sign_rmnd_i (div_sign_rmnd_o), // signum or reminder for IEEE compliant rounding + .div_dbz_i (div_dbz_o), // division by zero flag + // from i2f + .i2f_rdy_i (i2f_rdy_o), // i2f is ready + .i2f_sign_i (i2f_sign_o), // i2f signum + .i2f_shr_i (i2f_shr_o), + .i2f_exp8shr_i (i2f_exp8shr_o), + .i2f_shl_i (i2f_shl_o), + .i2f_exp8shl_i (i2f_exp8shl_o), + .i2f_exp8sh0_i (i2f_exp8sh0_o), + .i2f_fract32_i (i2f_fract32_o), + // from f2i + .f2i_rdy_i (f2i_rdy_o), // f2i is ready + .f2i_sign_i (f2i_sign_o), // f2i signum + .f2i_int24_i (f2i_int24_o), // f2i fractional + .f2i_shr_i (f2i_shr_o), // f2i required shift right value + .f2i_shl_i (f2i_shl_o), // f2i required shift left value + .f2i_ovf_i (f2i_ovf_o), // f2i overflow flag + .f2i_snan_i (f2i_snan_o), // f2i signaling NaN + // from cmp + .cmp_rdy_i (cmp_ready), // cmp is ready + .cmp_res_i (cmp_result), // cmp result + .cmp_inv_i (cmp_inv), // cmp invalid flag + .cmp_inf_i (cmp_inf), // cmp infinity flag + // outputs + .fpu_result_o (fpu_result_o), + .fpu_arith_valid_o (fpu_arith_valid_o), + .fpu_cmp_flag_o (fpu_cmp_flag_o), + .fpu_cmp_valid_o (fpu_cmp_valid_o), + .fpcsr_o (fpcsr_o) +); + +endmodule // pfpu32_top