From 36bb83f4148201caa25acb83897735f8e661eefd Mon Sep 17 00:00:00 2001 From: Lara Herzog Date: Wed, 6 Mar 2024 11:19:59 +0100 Subject: [PATCH] Add dual-port storage, von Neumann architecture --- clash-vexriscv-sim/app/VexRiscvSimulation.hs | 19 ++--- clash-vexriscv-sim/src/Utils/Cpu.hs | 56 ++++++++------ clash-vexriscv-sim/src/Utils/ProgramLoad.hs | 40 ++++++++++ clash-vexriscv-sim/src/Utils/Storage.hs | 77 ++++++++++++++++++++ clash-vexriscv-sim/tests/tests.hs | 5 +- clash-vexriscv/Makefile | 2 +- clash-vexriscv/example-cpu/ExampleCpu.yaml | 1 + clash-vexriscv/src/ffi/impl.cpp | 2 +- 8 files changed, 164 insertions(+), 38 deletions(-) diff --git a/clash-vexriscv-sim/app/VexRiscvSimulation.hs b/clash-vexriscv-sim/app/VexRiscvSimulation.hs index 5298542..0979a7c 100644 --- a/clash-vexriscv-sim/app/VexRiscvSimulation.hs +++ b/clash-vexriscv-sim/app/VexRiscvSimulation.hs @@ -1,4 +1,4 @@ --- SPDX-FileCopyrightText: 2022 Google LLC +-- SPDX-FileCopyrightText: 2022-2024 Google LLC -- -- SPDX-License-Identifier: Apache-2.0 {-# LANGUAGE NumericUnderscores #-} @@ -16,13 +16,13 @@ import qualified Data.List as L import Control.Monad (forM_, when) import Data.Char (chr) -import Data.Maybe (catMaybes, fromMaybe) +import Data.Maybe (catMaybes) import System.Environment (getArgs) import System.IO (putChar, hFlush, stdout) import Text.Printf (printf) -import Utils.ProgramLoad (loadProgram) +import Utils.ProgramLoad (loadProgramDmem) import Utils.Cpu (cpu) import System.Exit (exitFailure) @@ -78,21 +78,18 @@ main = do (iMem, dMem) <- withClockResetEnable @System clockGen resetGen enableGen $ - loadProgram @System elfFile + loadProgramDmem @System elfFile let cpuOut@(unbundle -> (_circuit, writes, _iBus, _dBus)) = withClockResetEnable @System clockGen (resetGenN (SNat @2)) enableGen $ let (circ, writes1, iBus, dBus) = cpu (Just 7894) iMem dMem - dBus' = register Nothing dBus + dBus' = register emptyWishboneS2M dBus in bundle (circ, writes1, iBus, dBus') case debugConfig of RunCharacterDevice -> forM_ (sample_lazy @System (bundle (register @System (unpack 0) cpuOut, cpuOut))) $ - \((_out, write, dS2M0, iS2M0), (out1, _write, _dS2M, _iS2M)) -> do - let - iS2M = fromMaybe emptyWishboneS2M iS2M0 - dS2M = fromMaybe emptyWishboneS2M dS2M0 + \((_out, write, dS2M, iS2M), (out1, _write, _dS2M, _iS2M)) -> do when (err dS2M) $ do let dBusM2S = dBusWbM2S out1 @@ -125,10 +122,8 @@ main = do let total = initCycles + uninteresting + nInteresting in L.zip [0 ..] $ L.take total $ sample_lazy @System cpuOut - forM_ sampled $ \(i, (out, _, iBusS2M0, dBusS2M0)) -> do + forM_ sampled $ \(i, (out, _, iBusS2M, dBusS2M)) -> do let - iBusS2M = fromMaybe emptyWishboneS2M iBusS2M0 - dBusS2M = fromMaybe emptyWishboneS2M dBusS2M0 doPrint = i >= skipTotal -- I-bus interactions diff --git a/clash-vexriscv-sim/src/Utils/Cpu.hs b/clash-vexriscv-sim/src/Utils/Cpu.hs index 6c90b0f..1adff32 100644 --- a/clash-vexriscv-sim/src/Utils/Cpu.hs +++ b/clash-vexriscv-sim/src/Utils/Cpu.hs @@ -19,7 +19,7 @@ import VexRiscv.VecToTuple (vecToTuple) import GHC.Stack (HasCallStack) -import Utils.ProgramLoad (Memory) +import Utils.ProgramLoad (Memory, DMemory) import Utils.Interconnect (interconnectTwo) createDomain vXilinxSystem{vName="Basic50", vPeriod= hzToPeriod 50_000_000} @@ -34,21 +34,21 @@ Address space cpu :: (HasCallStack, HiddenClockResetEnable dom) => Maybe Integer -> - Memory dom -> - Memory dom -> + DMemory dom -> + DMemory dom -> ( Signal dom CpuOut, -- writes Signal dom (Maybe (BitVector 32, BitVector 32)), -- iBus responses - Signal dom (Maybe (WishboneS2M (BitVector 32))), + Signal dom (WishboneS2M (BitVector 32)), -- dBus responses - Signal dom (Maybe (WishboneS2M (BitVector 32))) + Signal dom (WishboneS2M (BitVector 32)) ) cpu jtagPort bootIMem bootDMem = ( output , writes - , mux validI (Just <$> iS2M) (pure Nothing) - , mux validD (Just <$> dS2M) (pure Nothing) + , iS2M + , dS2M ) where (output, jtagOut) = vexRiscv hasClock hasReset input jtagIn @@ -56,27 +56,39 @@ cpu jtagPort bootIMem bootDMem = jtagIn = case jtagPort of Just port -> vexrJtagBridge (fromInteger port) jtagOut Nothing -> pure JTag.defaultIn - -- (unbundle -> (jtagIn', _debugReset)) = unsafePerformIO $ jtagTcpBridge' 7894 hasReset (jtagOut <$> output) - dM2S = dBusWbM2S <$> output - validD = fmap busCycle dM2S .&&. fmap strobe dM2S + {- + 00000000 - dummy area + 20000000 - instruction memory + 40000000 - data memory + -} + + -- The I-bus is only split once, for the D-mem and everything below. + (iS2M, unbundle -> (iMemIM2S :> dMemIM2S :> Nil)) = interconnectTwo + (unBusAddr . iBusWbM2S <$> output) + ((0x0000_0000, iMemIS2M) :> (0x4000_0000, dMemIS2M) :> Nil) - iM2S = unBusAddr . iBusWbM2S <$> output - validI = fmap busCycle iM2S .&&. fmap strobe iM2S + -- Because the dummy region should never be accessed by the instruction bus + -- it's just "ignored" + (iMemIS2M, iMemDS2M) = bootIMem (mapAddr (\x -> complement 0x2000_0000 .&. x) <$> iMemIM2S) iMemDM2S - iS2M = bootIMem (mapAddr (\x -> -- trace (printf "I-addr = % 8X (% 8X)\n" (toInteger $ x - 0x2000_0000) (toInteger x)) - x - 0x2000_0000) <$> iM2S) + -- needed for 'writes' below + dM2S = dBusWbM2S <$> output - dummy = dummyWb + -- because of the memory map having the dummy at 0x0.., then instructions + -- and then data memory, the D-bus is split in an "upper" and "lower" region, + -- where the "upper" region is just the D-mem, the "lower" region gets split + -- again for the instruction memory and the dummy + (dS2M, unbundle -> (dLowerRegionM2S :> dUpperRegionM2S :> Nil)) = interconnectTwo + (unBusAddr <$> dM2S) + ((0x0000_0000, dLowerRegionS2M) :> (0x4000_0000, dUpperRegionS2M) :> Nil) - dummyS2M = dummy dummyM2S - bootDS2M = bootDMem bootDM2S + (dLowerRegionS2M, unbundle -> (dDummyM2S :> iMemDM2S :> Nil)) = interconnectTwo + dLowerRegionM2S + ((0x0000_0000, dDummyS2M) :> (0x2000_0000, iMemDS2M) :> Nil) - (dS2M, vecToTuple . unbundle -> (dummyM2S, bootDM2S)) = interconnectTwo - ((\x -> - -- trace (printf "DBUS %08X" (toInteger (addr x))) - x) <$> (unBusAddr <$> dM2S)) - ((0x0000_0000, dummyS2M) :> (0x4000_0000, bootDS2M) :> Nil) + (dUpperRegionS2M, dMemIS2M) = bootDMem dUpperRegionM2S dMemIM2S + dDummyS2M = dummyWb dDummyM2S input = ( \iBus dBus -> diff --git a/clash-vexriscv-sim/src/Utils/ProgramLoad.hs b/clash-vexriscv-sim/src/Utils/ProgramLoad.hs index 5c74cc3..a5ff81e 100644 --- a/clash-vexriscv-sim/src/Utils/ProgramLoad.hs +++ b/clash-vexriscv-sim/src/Utils/ProgramLoad.hs @@ -18,6 +18,46 @@ import Control.Exception (assert) import Utils.ReadElf import Utils.Storage +type DMemory dom = + Signal dom (WishboneM2S 32 4 (BitVector 32)) -> + Signal dom (WishboneM2S 32 4 (BitVector 32)) -> + (Signal dom (WishboneS2M (BitVector 32)), Signal dom (WishboneS2M (BitVector 32))) + +loadProgramDmem :: (HiddenClockResetEnable dom) => FilePath -> IO (DMemory dom, DMemory dom) +loadProgramDmem path = do + elfBytes <- BS.readFile path + let (entry, iMem, dMem) = readElfFromMemory elfBytes + + assert (entry == 0x2000_0000) (pure ()) + + let + endianSwap dat = + L.concatMap (\[a, b, c, d] -> [d, c, b, a]) $ + chunkFill 4 0 dat + + -- endian swap instructions + iMemContents = endianSwap $ + content iMem <> [0, 0, 0, 0, 0, 0, 0, 0] + dMemContents = endianSwap $ + content dMem <> [0, 0, 0, 0, 0, 0, 0, 0] + + + let instrMem = dualPortStorage iMemContents + dataMem = dualPortStorage dMemContents + + pure (instrMem, dataMem) + where + content :: BinaryData -> [BitVector 8] + content bin = L.map snd $ I.toAscList bin + + chunkFill :: Int -> a -> [a] -> [[a]] + chunkFill _ _ [] = [] + chunkFill n fill xs = + let (first0, rest) = L.splitAt n xs + first1 = first0 <> L.replicate (n - L.length first0) fill + in first1 : chunkFill n fill rest + + type Memory dom = ( Signal dom (WishboneM2S 32 4 (BitVector 32)) -> diff --git a/clash-vexriscv-sim/src/Utils/Storage.hs b/clash-vexriscv-sim/src/Utils/Storage.hs index b053779..900b871 100644 --- a/clash-vexriscv-sim/src/Utils/Storage.hs +++ b/clash-vexriscv-sim/src/Utils/Storage.hs @@ -7,9 +7,11 @@ module Utils.Storage ( storage + , dualPortStorage ) where import Clash.Prelude +import Clash.Signal.Internal (Signal((:-))) import Data.Either (isLeft) import Protocols.Wishbone @@ -40,6 +42,81 @@ instance NFDataX MappedMemory where -- 'BitVector'. rnfX x = seq x () +dualPortStorage :: + forall dom. + ( KnownDomain dom, + HiddenClockResetEnable dom + ) => + [BitVector 8] -> + -- ^ contents + Signal dom (WishboneM2S 32 4 (BitVector 32)) -> + -- ^ in A + Signal dom (WishboneM2S 32 4 (BitVector 32)) -> + -- ^ in B + ( Signal dom (WishboneS2M (BitVector 32)) + -- ^ out A + , Signal dom (WishboneS2M (BitVector 32)) + -- ^ out B + ) +dualPortStorage contents portA portB = (aReply, bReply) + where + actualResult = storage contents inSignal + + (port, inSignal, aReply, bReply) = go A portA portB actualResult + + go currentPort (a :- inA) (b :- inB) ~(res :- actualResult) + -- neither active, just say A is current, do nothing + | not aActive && not bActive = + ( A :- restPorts + , a :- restInSignal + , res :- aReplies + , emptyWishboneS2M :- bReplies + ) + -- A current, A active -> do A + | currentPort == A && aActive = + ( A :- restPorts + , a :- restInSignal + , res :- aReplies + , emptyWishboneS2M :- bReplies + ) + -- current A, A not active but B is, do B and switch to B + | currentPort == A && not aActive && bActive = + ( B :- restPorts + , b :- restInSignal + , emptyWishboneS2M :- aReplies + , res :- bReplies + ) + -- current B, B active -> do B + | currentPort == B && bActive = + ( B :- restPorts + , b :- restInSignal + , emptyWishboneS2M :- aReplies + , res :- bReplies + ) + -- current B, B not active, but A is, do A and switch to A + | currentPort == B && not bActive && aActive = + ( A :- restPorts + , a :- restInSignal + , res :- aReplies + , emptyWishboneS2M :- bReplies + ) + where + aActive = strobe a && busCycle a + bActive = strobe b && busCycle b + + nextPort = case (currentPort, aActive, bActive) of + (_, False, False) -> A + (A, False, True) -> B + (A, True, _) -> A + (B, _, True) -> B + (B, True, False) -> A + + ~(restPorts, restInSignal, aReplies, bReplies) = go nextPort inA inB actualResult + +data AorB = A | B deriving (Generic, NFDataX, Eq) + + + storage :: forall dom. ( KnownDomain dom, diff --git a/clash-vexriscv-sim/tests/tests.hs b/clash-vexriscv-sim/tests/tests.hs index 23283e4..8166768 100644 --- a/clash-vexriscv-sim/tests/tests.hs +++ b/clash-vexriscv-sim/tests/tests.hs @@ -21,7 +21,7 @@ import System.IO.Temp (withSystemTempFile) import Test.Tasty import Test.Tasty.HUnit (Assertion, testCase, (@?=)) -import Utils.ProgramLoad (loadProgram) +import Utils.ProgramLoad (loadProgramDmem) import Utils.Cpu (cpu) @@ -35,7 +35,8 @@ runProgramExpect :: Assertion runProgramExpect act n expected = withSystemTempFile "ELF" $ \fp _ -> do act fp - (iMem, dMem) <- withClockResetEnable @System clockGen (resetGenN (SNat @2)) enableGen $ loadProgram fp + (iMem, dMem) <- withClockResetEnable @System clockGen (resetGenN (SNat @2)) enableGen $ + loadProgramDmem fp let _all@(unbundle -> (_circuit, writes, _iBus, _dBus)) = withClockResetEnable @System clockGen (resetGenN (SNat @2)) enableGen $ diff --git a/clash-vexriscv/Makefile b/clash-vexriscv/Makefile index a909e6b..8f1498c 100644 --- a/clash-vexriscv/Makefile +++ b/clash-vexriscv/Makefile @@ -7,7 +7,7 @@ OUT_DIR = build_out_dir VERILATOR_DIR = $(OUT_DIR)/verilator_output FFI_DIR = src/ffi -VERILATOR_FLAGS = -CFLAGS '-O3 -fPIC' -Wno-fatal +1364-2001ext+v --trace +VERILATOR_FLAGS = -CFLAGS '-O3 -fPIC' -Wno-fatal +1364-2001ext+v --trace --threads 1 VERILATOR_CFLAGS = $(shell pkg-config --cflags verilator) FFI_CPPFLAGS = $(VERILATOR_CFLAGS) -fPIC -O3 -I$(VERILATOR_DIR) diff --git a/clash-vexriscv/example-cpu/ExampleCpu.yaml b/clash-vexriscv/example-cpu/ExampleCpu.yaml index 3dd011b..3bc26be 100644 --- a/clash-vexriscv/example-cpu/ExampleCpu.yaml +++ b/clash-vexriscv/example-cpu/ExampleCpu.yaml @@ -1,4 +1,5 @@ # SPDX-FileCopyrightText: 2024 Google LLC # # SPDX-License-Identifier: CC0-1.0 + debug: !!vexriscv.DebugReport {hardwareBreakpointCount: 5} diff --git a/clash-vexriscv/src/ffi/impl.cpp b/clash-vexriscv/src/ffi/impl.cpp index e305f3c..26f2f3e 100644 --- a/clash-vexriscv/src/ffi/impl.cpp +++ b/clash-vexriscv/src/ffi/impl.cpp @@ -297,7 +297,7 @@ void vexr_jtag_bridge_step(vexr_jtag_bridge_data *d, const JTAG_OUTPUT *output, } } } - d->timer = 27; // 3; value used by VexRiscv regression test + d->timer = 3; } void vexr_jtag_bridge_shutdown(vexr_jtag_bridge_data *bridge_data)