From 8401717964abe249b3b7b0b5e432f778c6f45b6f Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Wed, 16 Feb 2022 12:33:11 +0000 Subject: [PATCH 01/47] fix ECP5DDRPHY cs declaration --- gram/phy/ecp5ddrphy.py | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 752d021..17098b1 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -139,7 +139,9 @@ def __init__(self, pads, sys_clk_freq=100e6): addressbits = len(self.pads.a.o0) bankbits = len(self.pads.ba.o0) - nranks = 1 if not hasattr(self.pads, "cs") else len(self.pads.cs.o0) + nranks = 1 + if hasattr(self.pads, "cs") and hasattr(self.pads.cs, "o0"): + nranks = len(self.pads.cs.o0) databits = len(self.pads.dq.io) self.dfi = Interface(addressbits, bankbits, nranks, 4*databits, 4) @@ -147,7 +149,6 @@ def __init__(self, pads, sys_clk_freq=100e6): tck = 1/(2*self._sys_clk_freq) nphases = 2 databits = len(self.pads.dq.io) - nranks = 1 if not hasattr(self.pads, "cs") else len(self.pads.cs.o0) cl, cwl = get_cl_cw("DDR3", tck) cl_sys_latency = get_sys_latency(nphases, cl) cwl_sys_latency = get_sys_latency(nphases, cwl) @@ -241,16 +242,20 @@ def elaborate(self, platform): if hasattr(self.pads, "cs"): controls.append("cs") for name in controls: + print ("clock", name, getattr(self.pads, name)) + pad = getattr(self.pads, name) + if not hasattr(pad, "o_clk"): + continue m.d.comb += [ - getattr(self.pads, name).o_clk.eq(ClockSignal("dramsync")), - getattr(self.pads, name).o_fclk.eq(ClockSignal("sync2x")), + pad.o_clk.eq(ClockSignal("dramsync")), + pad.o_fclk.eq(ClockSignal("sync2x")), ] - for i in range(len(getattr(self.pads, name).o0)): + for i in range(len(pad.o0)): m.d.comb += [ - getattr(self.pads, name).o0[i].eq(getattr(dfi.phases[0], name)[i]), - getattr(self.pads, name).o1[i].eq(getattr(dfi.phases[0], name)[i]), - getattr(self.pads, name).o2[i].eq(getattr(dfi.phases[1], name)[i]), - getattr(self.pads, name).o3[i].eq(getattr(dfi.phases[1], name)[i]), + pad.o0[i].eq(getattr(dfi.phases[0], name)[i]), + pad.o1[i].eq(getattr(dfi.phases[0], name)[i]), + pad.o2[i].eq(getattr(dfi.phases[1], name)[i]), + pad.o3[i].eq(getattr(dfi.phases[1], name)[i]), ] # DQ --------------------------------------------------------------------------------------- From e1f067d8be26fce56d4d3109a8b5fa174120db07 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sat, 19 Feb 2022 23:48:32 +0000 Subject: [PATCH 02/47] fix gram unit test imports --- gram/test/test_soc.py | 6 ++++++ gram/test/utils.py | 2 +- 2 files changed, 7 insertions(+), 1 deletion(-) diff --git a/gram/test/test_soc.py b/gram/test/test_soc.py index 2a96b3d..ceb8f66 100644 --- a/gram/test/test_soc.py +++ b/gram/test/test_soc.py @@ -1,6 +1,7 @@ # This file is Copyright (c) 2020 LambdaConcept import random +import unittest from nmigen import * from nmigen.asserts import Assert, Assume @@ -19,6 +20,7 @@ from gram.core.multiplexer import _AntiStarvation from gram.test.utils import * + class DDR3SoC(SoC, Elaboratable): def __init__(self, *, clk_freq, dramcore_addr, ddr_addr): @@ -221,3 +223,7 @@ def process(): self.assertEqual(0xFACE0000 | i, (yield from wb_read(soc.bus, (0x10000000 >> 2) + i, 0xF, 256))) runSimulation(soc, process, "test_soc_continuous_memtest.vcd") + + +if __name__ == '__main__': + unittest.main() diff --git a/gram/test/utils.py b/gram/test/utils.py index ed34d11..73fb96e 100644 --- a/gram/test/utils.py +++ b/gram/test/utils.py @@ -9,7 +9,7 @@ from contextlib import contextmanager from nmigen import * -from nmigen.sim.pysim import * +from nmigen.sim import * from nmigen.hdl.ir import Fragment from nmigen.back import rtlil from nmigen._toolchain import require_tool From ce19ad035d8a960cb0ca786b299f80da738e22e8 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 20 Feb 2022 01:04:53 +0000 Subject: [PATCH 03/47] add name to DFI Interface (helps gtkwave traces) --- gram/dfii.py | 12 +++++++++--- gram/phy/dfi.py | 8 +++++--- 2 files changed, 14 insertions(+), 6 deletions(-) diff --git a/gram/dfii.py b/gram/dfii.py index f3e9884..07a0a21 100644 --- a/gram/dfii.py +++ b/gram/dfii.py @@ -61,9 +61,15 @@ class DFIInjector(Elaboratable): def __init__(self, csr_bank, addressbits, bankbits, nranks, databits, nphases=1): self._nranks = nranks - self._inti = dfi.Interface(addressbits, bankbits, nranks, databits, nphases) - self.slave = dfi.Interface(addressbits, bankbits, nranks, databits, nphases) - self.master = dfi.Interface(addressbits, bankbits, nranks, databits, nphases) + self._inti = dfi.Interface(addressbits, bankbits, + nranks, databits, nphases, + name="inti") + self.slave = dfi.Interface(addressbits, bankbits, + nranks, databits, nphases, + name="slave") + self.master = dfi.Interface(addressbits, bankbits, + nranks, databits, nphases, + name="master") self._control = csr_bank.csr(4, "w") # sel, clk_en, odt, reset diff --git a/gram/phy/dfi.py b/gram/phy/dfi.py index 7e58b99..c2bdbbf 100644 --- a/gram/phy/dfi.py +++ b/gram/phy/dfi.py @@ -32,11 +32,13 @@ def phase_description(addressbits, bankbits, nranks, databits): class Interface: - def __init__(self, addressbits, bankbits, nranks, databits, nphases=1): + def __init__(self, addressbits, bankbits, nranks, databits, nphases=1, + name=None): self.phases = [] for p in range(nphases): - p = Record(phase_description( - addressbits, bankbits, nranks, databits)) + p = Record(phase_description(addressbits, bankbits, + nranks, databits), + name=name) self.phases += [p] p.reset.reset = 1 From 8f91ce425bc840f99ae3144267a235734d22073a Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 20 Feb 2022 01:32:09 +0000 Subject: [PATCH 04/47] add dfii submodules so they get explicit names --- gram/dfii.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/gram/dfii.py b/gram/dfii.py index 07a0a21..895e654 100644 --- a/gram/dfii.py +++ b/gram/dfii.py @@ -81,7 +81,8 @@ def __init__(self, csr_bank, addressbits, bankbits, nranks, databits, nphases=1) def elaborate(self, platform): m = Module() - m.submodules += self._phases + for n, phase in enumerate(self._phases): + m.submodules['phase_%d' % n] = phase with m.If(self._control.w_data[0]): m.d.comb += self.slave.connect(self.master) From 8a966e2e6771ca5b207db618319f96e841dd63fe Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 21 Feb 2022 18:41:09 +0000 Subject: [PATCH 05/47] add a debug verilog dump of one of the FakePHY SocTest cases to see what is going on --- gram/test/test_soc.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gram/test/test_soc.py b/gram/test/test_soc.py index ceb8f66..8187b3f 100644 --- a/gram/test/test_soc.py +++ b/gram/test/test_soc.py @@ -7,6 +7,7 @@ from nmigen.asserts import Assert, Assume from nmigen_soc import wishbone, memory from nmigen.lib.cdc import ResetSynchronizer +from nmigen.cli import verilog from lambdasoc.periph import Peripheral from lambdasoc.soc.base import SoC @@ -141,6 +142,10 @@ def test_multiple_reads(self): dramcore_addr=0x00000000, ddr_addr=0x10000000) + vl = verilog.convert(soc, ports=None) + with open("test_soc_multiple_reads.v", "w") as f: + f.write(vl) + def process(): yield from SocTestCase.init_seq(soc.bus) From 059639bb49c6984b8aa138df62157cc27a84b898 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 21 Feb 2022 18:41:52 +0000 Subject: [PATCH 06/47] add debug print statements to investigate FakePHY add some more names on dfi.Interface instances, again to see what is going on in gtkwave traces of SocTest nmigen simulation --- gram/core/controller.py | 3 ++- gram/dfii.py | 4 ++++ gram/phy/dfi.py | 3 +++ gram/phy/fakephy.py | 7 ++++--- 4 files changed, 13 insertions(+), 4 deletions(-) diff --git a/gram/core/controller.py b/gram/core/controller.py index 15aaaf7..9693893 100644 --- a/gram/core/controller.py +++ b/gram/core/controller.py @@ -64,7 +64,8 @@ def __init__(self, phy_settings, geom_settings, timing_settings, clk_freq, bankbits=geom_settings.bankbits, nranks=phy_settings.nranks, databits=phy_settings.dfi_databits, - nphases=phy_settings.nphases) + nphases=phy_settings.nphases, + name="mem_dfi") self._clk_freq = clk_freq diff --git a/gram/dfii.py b/gram/dfii.py index 895e654..323ba97 100644 --- a/gram/dfii.py +++ b/gram/dfii.py @@ -59,6 +59,7 @@ def elaborate(self, platform): class DFIInjector(Elaboratable): def __init__(self, csr_bank, addressbits, bankbits, nranks, databits, nphases=1): + print ("nranks", nranks, "nphases", nphases) self._nranks = nranks self._inti = dfi.Interface(addressbits, bankbits, @@ -84,6 +85,9 @@ def elaborate(self, platform): for n, phase in enumerate(self._phases): m.submodules['phase_%d' % n] = phase + for phase in self._inti.phases: + print ("phase", phase) + with m.If(self._control.w_data[0]): m.d.comb += self.slave.connect(self.master) with m.Else(): diff --git a/gram/phy/dfi.py b/gram/phy/dfi.py index c2bdbbf..a436fee 100644 --- a/gram/phy/dfi.py +++ b/gram/phy/dfi.py @@ -34,6 +34,9 @@ def phase_description(addressbits, bankbits, nranks, databits): class Interface: def __init__(self, addressbits, bankbits, nranks, databits, nphases=1, name=None): + print ("DFI Interface", name, "addr", addressbits, + "bankbits", bankbits, "nranks", nranks, "data", databits, + "phases", nphases) self.phases = [] for p in range(nphases): p = Record(phase_description(addressbits, bankbits, diff --git a/gram/phy/fakephy.py b/gram/phy/fakephy.py index d1c4885..3b134cd 100644 --- a/gram/phy/fakephy.py +++ b/gram/phy/fakephy.py @@ -22,8 +22,8 @@ SDRAM_VERBOSE_STD = 1 SDRAM_VERBOSE_DBG = 2 -def Display(*args): - return Signal().eq(0) +#def Display(*args): +# return Signal().eq(0) def Assert(*args): return Signal().eq(0) @@ -511,7 +511,8 @@ def __init__(self, module, settings, clk_freq=100e6, bankbits = self.bankbits, nranks = self.settings.nranks, databits = self.settings.dfi_databits, - nphases = self.settings.nphases + nphases = self.settings.nphases, + name="phy" ) def elaborate(self, platform): From 8610c6e21e45ff68c8c3e8a8747768c34daaf560 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Tue, 22 Feb 2022 12:56:59 +0000 Subject: [PATCH 07/47] remove continue/skip and add comment that all control pins have to be requested "xdr:4" --- gram/phy/ecp5ddrphy.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 17098b1..336b063 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -235,7 +235,9 @@ def elaborate(self, platform): self.pads.ba.o3[i].eq(dfi.phases[1].bank[i]), ] - # Control pins + # Control pins: all of thees have to be declared "xdr 4" when + # requesting the resource: + # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... }) controls = ["ras", "cas", "we", "clk_en", "odt"] if hasattr(self.pads, "reset"): controls.append("reset") @@ -244,8 +246,6 @@ def elaborate(self, platform): for name in controls: print ("clock", name, getattr(self.pads, name)) pad = getattr(self.pads, name) - if not hasattr(pad, "o_clk"): - continue m.d.comb += [ pad.o_clk.eq(ClockSignal("dramsync")), pad.o_fclk.eq(ClockSignal("sync2x")), From 44ff21cdbd27ca17ff0aa83ba337c57667bb950d Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 24 Feb 2022 17:52:16 +0000 Subject: [PATCH 08/47] add CSRs to FakePHY which allows at least testing of firmware as-is the burstdet and read-delay get read and written, do nothing, but it is better than having to modify the dram firmware --- gram/phy/fakephy.py | 91 +++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 87 insertions(+), 4 deletions(-) diff --git a/gram/phy/fakephy.py b/gram/phy/fakephy.py index 3b134cd..6a409c4 100644 --- a/gram/phy/fakephy.py +++ b/gram/phy/fakephy.py @@ -13,6 +13,8 @@ from gram.phy.dfi import * from gram.modules import _speedgrade_timings, _technology_timings +from lambdasoc.periph import Peripheral + from functools import reduce from operator import or_ @@ -28,6 +30,7 @@ def Assert(*args): return Signal().eq(0) + # Bank Model --------------------------------------------------------------------------------------- class BankModel(Elaboratable): @@ -106,6 +109,52 @@ def elaborate(self, platform): return m + +class _DQSBUFMSettingManager(Elaboratable): + """DQSBUFM setting manager. + + The DQSBUFM primitive requires a very basic sequence when updating + read delay or other parameters. This elaboratable generates this + sequence from CSR events. + + Parameters + ---------- + rdly_slr : CSR + CSR storing the rdly value. + + Attributes + ---------- + pause : Signal(), out + Pause signal for DQSBUFM. + readclksel : Signal(3), out + Readclksel signal for DQSBUFM. + """ + def __init__(self, rdly_csr): + self.rdly_csr = rdly_csr + + self.pause = Signal() + self.readclksel = Signal(3) + + def elaborate(self, platform): + m = Module() + + with m.FSM(): + with m.State("Idle"): + with m.If(self.rdly_csr.w_stb): + m.d.sync += self.pause.eq(1) + m.next = "RdlyUpdateRequested" + + with m.State("RdlyUpdateRequested"): + m.d.sync += self.readclksel.eq(self.rdly_csr.w_data) + m.next = "ResetPause" + + with m.State("ResetPause"): + m.d.sync += self.pause.eq(0) + m.next = "Idle" + + return m + + # DFI Phase Model ---------------------------------------------------------------------------------- class DFIPhaseModel(Elaboratable): @@ -421,7 +470,8 @@ def elaborate(self, platform): return m -class FakePHY(Elaboratable): + +class FakePHY(Peripheral, Elaboratable): def __prepare_bank_init_data(self, init, nbanks, nrows, ncols, data_width, address_mapping): mem_size = (self.settings.databits//8)*(nrows*ncols*nbanks) bank_size = mem_size // nbanks @@ -480,6 +530,7 @@ def __init__(self, module, settings, clk_freq=100e6, init = [], address_mapping = "ROW_BANK_COL", verbosity = SDRAM_VERBOSE_OFF): + super().__init__(name="fakephy") # Parameters ------------------------------------------------------------------------------- self.burst_length = { @@ -505,6 +556,19 @@ def __init__(self, module, settings, clk_freq=100e6, self.init = init + # CSR + databits = self.settings.dfi_databits + bank = self.csr_bank() + + self.burstdet = bank.csr(databits//8, "rw") + + self.rdly = [] + self.rdly += [bank.csr(3, "rw", name="rdly_p0")] + self.rdly += [bank.csr(3, "rw", name="rdly_p1")] + + self._bridge = self.bridge(data_width=32, granularity=8, alignment=2) + self.bus = self._bridge.bus + # DFI Interface ---------------------------------------------------------------------------- self.dfi = Interface( addressbits = self.addressbits, @@ -524,9 +588,28 @@ def elaborate(self, platform): ncols = 2**self.colbits data_width = self.settings.dfi_databits*self.settings.nphases - # DFI phases ------------------------------------------------------------------------------- - phases = [DFIPhaseModel(self.dfi, n) for n in range(self.settings.nphases)] - m.submodules += phases + # CSR Bridge + m.submodules.bridge = self._bridge + + # fake burstdet + databits = self.settings.dfi_databits + burstdet_reg = Signal(databits//8, reset_less=True) + m.d.comb += self.burstdet.r_data.eq(burstdet_reg) + + # Burstdet clear + with m.If(self.burstdet.w_stb): + m.d.sync += burstdet_reg.eq(0) + + + # DFI phases --------------------------------------------------- + phases = [] + for i in range(self.settings.nphases): + phase = DFIPhaseModel(self.dfi, i) + m.submodules['phase%d' % i] = phase + phases.append(phase) + + dqsbufm_manager = _DQSBUFMSettingManager(self.rdly[i]) + m.submodules["dqsbufm_manager%i" % i] = dqsbufm_manager # DFI timing checker ----------------------------------------------------------------------- if self.verbosity > SDRAM_VERBOSE_OFF: From e9a4f8b13e75ae57429dc97f929b69e550b07c8e Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 24 Feb 2022 17:55:05 +0000 Subject: [PATCH 09/47] replace the simulation Clock-Reset-Generator with one that is more general. the icarus verilog simulation now passes where previously it did not --- gram/simulation/crg.py | 180 +++++++++++++++++++++++++++++++++-------- 1 file changed, 146 insertions(+), 34 deletions(-) diff --git a/gram/simulation/crg.py b/gram/simulation/crg.py index deab928..aa44c37 100644 --- a/gram/simulation/crg.py +++ b/gram/simulation/crg.py @@ -1,34 +1,135 @@ -# This file is Copyright (c) 2020 LambdaConcept +# Copyright (c) 2020 LambdaConcept +# Copyright (c) 2021 Luke Kenneth Casson Leighton +# Copyright (c) 2018-2020 Florent Kermarrec +# Copyright (c) 2019 Michael Betz +# +# Based on code from LambaConcept, from the gram example which is BSD-2-License +# https://github.com/jeanthom/gram/tree/master/examples +# +# Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER +# under EU Grants 871528 and 957073, under the LGPLv3+ License -from nmigen import * + +from nmigen import (Elaboratable, Module, Signal, ClockDomain, Instance, + ClockSignal, ResetSignal) __ALL__ = ["ECPIX5CRG"] class PLL(Elaboratable): - def __init__(self, clkin, clksel=Signal(shape=2, reset=2), clkout1=Signal(), clkout2=Signal(), clkout3=Signal(), clkout4=Signal(), lock=Signal(), CLKI_DIV=1, CLKFB_DIV=2, CLK1_DIV=3, CLK2_DIV=24): + nclkouts_max = 3 + clki_div_range = (1, 128+1) + clkfb_div_range = (1, 128+1) + clko_div_range = (1, 128+1) + clki_freq_range = ( 8e6, 400e6) + clko_freq_range = (3.125e6, 400e6) + vco_freq_range = ( 400e6, 800e6) + + def __init__(self, clkin, + clksel=Signal(shape=2, reset=2), + reset=Signal(reset_less=True), + locked=Signal()): self.clkin = clkin - self.clkout1 = clkout1 - self.clkout2 = clkout2 - self.clkout3 = clkout3 - self.clkout4 = clkout4 + self.clkin_freq = None self.clksel = clksel - self.lock = lock - self.CLKI_DIV = CLKI_DIV - self.CLKFB_DIV = CLKFB_DIV - self.CLKOP_DIV = CLK1_DIV - self.CLKOS_DIV = CLK2_DIV - self.ports = [ + self.locked = locked + self.reset = reset + self.nclkouts = 0 + self.clkouts = {} + self.config = {} + self.params = {} + + def ports(self): + return [ self.clkin, - self.clkout1, - self.clkout2, - self.clkout3, - self.clkout4, self.clksel, self.lock, - ] + ] + list(self.clkouts.values()) + + def set_clkin_freq(self, freq): + (clki_freq_min, clki_freq_max) = self.clki_freq_range + assert freq >= clki_freq_min + assert freq <= clki_freq_max + self.clkin_freq = freq + + def create_clkout(self, cd, freq, phase=0, margin=1e-2): + (clko_freq_min, clko_freq_max) = self.clko_freq_range + assert freq >= clko_freq_min + assert freq <= clko_freq_max + assert self.nclkouts < self.nclkouts_max + self.clkouts[self.nclkouts] = (cd, freq, phase, margin) + #create_clkout_log(self.logger, cd.name, freq, margin, self.nclkouts) + print("clock domain", cd.domain, freq, margin, self.nclkouts) + self.nclkouts += 1 + + def compute_config(self): + config = {} + for clki_div in range(*self.clki_div_range): + config["clki_div"] = clki_div + for clkfb_div in range(*self.clkfb_div_range): + all_valid = True + vco_freq = self.clkin_freq/clki_div*clkfb_div*1 # clkos3_div=1 + (vco_freq_min, vco_freq_max) = self.vco_freq_range + if vco_freq >= vco_freq_min and vco_freq <= vco_freq_max: + for n, (clk, f, p, m) in sorted(self.clkouts.items()): + valid = False + for d in range(*self.clko_div_range): + clk_freq = vco_freq/d + if abs(clk_freq - f) <= f*m: + config["clko{}_freq".format(n)] = clk_freq + config["clko{}_div".format(n)] = d + config["clko{}_phase".format(n)] = p + valid = True + break + if not valid: + all_valid = False + else: + all_valid = False + if all_valid: + config["vco"] = vco_freq + config["clkfb_div"] = clkfb_div + #compute_config_log(self.logger, config) + print ("PLL config", config) + return config + raise ValueError("No PLL config found") def elaborate(self, platform): + config = self.compute_config() clkfb = Signal() + self.params.update( + # attributes + a_FREQUENCY_PIN_CLKI = str(self.clkin_freq/1e6), + a_ICP_CURRENT = "6", + a_LPF_RESISTOR = "16", + a_MFG_ENABLE_FILTEROPAMP = "1", + a_MFG_GMCREF_SEL = "2", + # parameters + p_FEEDBK_PATH = "INT_OS3", # CLKOS3 rsvd for feedback with div=1. + p_CLKOS3_ENABLE = "ENABLED", + p_CLKOS3_DIV = 1, + p_CLKFB_DIV = config["clkfb_div"], + p_CLKI_DIV = config["clki_div"], + # reset, input clock, lock-achieved output + i_RST = self.reset, + i_CLKI = self.clkin, + o_LOCK = self.locked, + ) + # for each clock-out, set additional parameters + for n, (clk, f, p, m) in sorted(self.clkouts.items()): + n_to_l = {0: "P", 1: "S", 2: "S2"} + div = config["clko{}_div".format(n)] + cphase = int(p*(div + 1)/360 + div) + self.params["p_CLKO{}_ENABLE".format(n_to_l[n])] = "ENABLED" + self.params["p_CLKO{}_DIV".format(n_to_l[n])] = div + self.params["p_CLKO{}_FPHASE".format(n_to_l[n])] = 0 + self.params["p_CLKO{}_CPHASE".format(n_to_l[n])] = cphase + self.params["o_CLKO{}".format(n_to_l[n])] = clk + + m = Module() + print ("params", self.params) + pll = Instance("EHXPLLL", **self.params) + m.submodules.pll = pll + return m + pll = Instance("EHXPLLL", p_OUTDIVIDER_MUXA='DIVA', p_OUTDIVIDER_MUXB='DIVB', @@ -64,14 +165,11 @@ def elaborate(self, platform): o_CLKOS3=self.clkout4, o_LOCK=self.lock, ) - m = Module() - m.submodules += pll - return m class ECPIX5CRG(Elaboratable): - def __init__(self): - ... + def __init__(self, sys_clk_freq=100e6): + self.sys_clk_freq = sys_clk_freq def elaborate(self, platform): m = Module() @@ -88,13 +186,20 @@ def elaborate(self, platform): gsr1 = Signal() m.submodules += [ - Instance("FD1S3AX", p_GSR="DISABLED", i_CK=ClockSignal("rawclk"), i_D=~reset, o_Q=gsr0), - Instance("FD1S3AX", p_GSR="DISABLED", i_CK=ClockSignal("rawclk"), i_D=gsr0, o_Q=gsr1), - Instance("SGSR", i_CLK=ClockSignal("rawclk"), i_GSR=gsr1), + Instance("FD1S3AX", p_GSR="DISABLED", + i_CK=ClockSignal("rawclk"), + i_D=reset, + o_Q=gsr0), + Instance("FD1S3AX", p_GSR="DISABLED", + i_CK=ClockSignal("rawclk"), + i_D=gsr0, + o_Q=gsr1), + Instance("SGSR", i_CLK=ClockSignal("rawclk"), + i_GSR=gsr1), ] # Power-on delay (655us) - podcnt = Signal(3, reset=2**3-1) + podcnt = Signal(3, reset=-1) pod_done = Signal() with m.If(podcnt != 0): m.d.rawclk += podcnt.eq(podcnt-1) @@ -102,12 +207,15 @@ def elaborate(self, platform): # Generating sync2x (200Mhz) and init (25Mhz) from clk100 cd_sync2x = ClockDomain("sync2x", local=False) - cd_sync2x_unbuf = ClockDomain("sync2x_unbuf", local=False, reset_less=True) + cd_sync2x_unbuf = ClockDomain("sync2x_unbuf", + local=False, reset_less=True) cd_init = ClockDomain("init", local=False) cd_sync = ClockDomain("sync", local=False) cd_dramsync = ClockDomain("dramsync", local=False) - m.submodules.pll = pll = PLL(ClockSignal("rawclk"), CLKI_DIV=1, CLKFB_DIV=2, CLK1_DIV=1, CLK2_DIV=4, - clkout1=ClockSignal("sync2x_unbuf"), clkout2=ClockSignal("init")) + m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~reset) + pll.set_clkin_freq(100e6) + pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq) + pll.create_clkout(ClockSignal("init"), 25e6) m.submodules += Instance("ECLKSYNCB", i_ECLKI = ClockSignal("sync2x_unbuf"), i_STOP = 0, @@ -117,18 +225,22 @@ def elaborate(self, platform): m.domains += cd_init m.domains += cd_sync m.domains += cd_dramsync - m.d.comb += ResetSignal("init").eq(~pll.lock|~pod_done) - m.d.comb += ResetSignal("sync").eq(~pll.lock|~pod_done) - m.d.comb += ResetSignal("dramsync").eq(~pll.lock|~pod_done) + reset_ok = Signal(reset_less=True) + m.d.comb += reset_ok.eq(~pll.locked|~pod_done) + m.d.comb += ResetSignal("init").eq(reset_ok) + m.d.comb += ResetSignal("sync").eq(reset_ok) + m.d.comb += ResetSignal("dramsync").eq(reset_ok) # # Generating sync (100Mhz) from sync2x - + m.submodules += Instance("CLKDIVF", p_DIV="2.0", i_ALIGNWD=0, i_CLKI=ClockSignal("sync2x"), i_RST=0, o_CDIVX=ClockSignal("sync")) + + # temporarily set dram sync clock exactly equal to main sync m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync")) return m From 391fdaafbfe94d0c81de6a29955cc3b161316f24 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 24 Feb 2022 17:55:22 +0000 Subject: [PATCH 10/47] add a BitSlip module --- gram/common.py | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) diff --git a/gram/common.py b/gram/common.py index 1b53722..183f9ce 100644 --- a/gram/common.py +++ b/gram/common.py @@ -60,6 +60,46 @@ def get_sys_phases(nphases, sys_latency, cas_latency): cmd_phase = (dat_phase - 1) % nphases return cmd_phase, dat_phase +# BitSlip --------------------------------------------------------------- + +class BitSlip(Elaboratable): + """BitSlip: provides a delay-buffer by N clock cycles for data of width dw + * rst will reset the delay back to zero + * slp will increment the counter. it must be held for {cycles} cycles + for the input data to appear on the output buffer + """ + def __init__(self, dw, rst=None, slp=None, cycles=1): + self.i = Signal(dw) + self.o = Signal(dw) + self.rst = Signal() if rst is None else rst + self.slp = Signal() if slp is None else slp + self.dw = dw + self.cycles = cycles + + def elaborate(self, platform): + m = Module() + comb, sync = m.d.comb, m.d.sync + vcount = self.cycles * self.dw + value = Signal(vcount.bit_length()) + + with m.If(self.rst): + sync += value.eq(0) + with m.Elif(self.slp): + sync += value.eq(value+1) + + # Shift Register using input i. + r = Signal((self.cycles+1)*self.dw, reset_less=True) + sync += r.eq(Cat(r[self.dw:], self.i)) + + # note the slightly strange arrangement: whilst the shift register + # shuffles along by {dw} bits, if dw is not 1, the output can contain + # parts of data from previous clocks. + with m.Switch(value): + for i in range(self.cycles*self.dw): + with m.Case(i): + comb += self.o.eq(r[i:self.dw+i]) + return m + # Settings ----------------------------------------------------------------------------------------- From b5553cc55a60c36c1068e0b90f0ad2065d0a495e Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 25 Feb 2022 01:21:42 +0000 Subject: [PATCH 11/47] allow DDR3 reset (rst) signal to be controlled by DFI commands, update icarus simulation to match, and rename dfi.Interface reset signal to reset_n --- gram/core/multiplexer.py | 4 ++-- gram/dfii.py | 4 ++-- gram/phy/dfi.py | 3 +-- gram/phy/ecp5ddrphy.py | 10 ++++++++-- gram/simulation/icarusecpix5platform.py | 1 + gram/simulation/simsoc.py | 3 ++- gram/simulation/simsoctb.v | 13 +++++-------- gram/test/test_dfii.py | 4 ++-- 8 files changed, 23 insertions(+), 19 deletions(-) diff --git a/gram/core/multiplexer.py b/gram/core/multiplexer.py index 0817d35..458c301 100644 --- a/gram/core/multiplexer.py +++ b/gram/core/multiplexer.py @@ -159,8 +159,8 @@ def valid_and(cmd, attr): for i, (phase, sel) in enumerate(zip(self.dfi.phases, self.sel)): nranks = len(phase.cs) rankbits = log2_int(nranks) - if hasattr(phase, "reset"): - m.d.comb += phase.reset.eq(0) + if hasattr(phase, "reset_n"): + m.d.comb += phase.reset_n.eq(1) m.d.comb += phase.clk_en.eq(Repl(1, nranks)) if hasattr(phase, "odt"): # FIXME: add dynamic drive for multi-rank (will be needed for high frequencies) diff --git a/gram/dfii.py b/gram/dfii.py index 323ba97..a1f5ad4 100644 --- a/gram/dfii.py +++ b/gram/dfii.py @@ -98,7 +98,7 @@ def elaborate(self, platform): for phase in self._inti.phases] m.d.comb += [phase.odt[i].eq(self._control.w_data[2]) for phase in self._inti.phases if hasattr(phase, "odt")] - m.d.comb += [phase.reset.eq(self._control.w_data[3]) - for phase in self._inti.phases if hasattr(phase, "reset")] + m.d.comb += [phase.reset_n.eq(self._control.w_data[3]) + for phase in self._inti.phases if hasattr(phase, "reset_n")] return m diff --git a/gram/phy/dfi.py b/gram/phy/dfi.py index a436fee..91c4799 100644 --- a/gram/phy/dfi.py +++ b/gram/phy/dfi.py @@ -18,7 +18,7 @@ def phase_description(addressbits, bankbits, nranks, databits): ("we", 1, DIR_FANOUT), ("clk_en", nranks, DIR_FANOUT), ("odt", nranks, DIR_FANOUT), - ("reset", 1, DIR_FANOUT), + ("reset_n", 1, DIR_FANOUT), ("act", 1, DIR_FANOUT), # wrdata description ("wrdata", databits, DIR_FANOUT), @@ -43,7 +43,6 @@ def __init__(self, addressbits, bankbits, nranks, databits, nphases=1, nranks, databits), name=name) self.phases += [p] - p.reset.reset = 1 def connect(self, target): if not isinstance(target, Interface): diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 336b063..672d8b2 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -239,13 +239,19 @@ def elaborate(self, platform): # requesting the resource: # ddr_pins = platform.request("ddr3", 0, xdr={"clk":4, "odt":4, ... }) controls = ["ras", "cas", "we", "clk_en", "odt"] - if hasattr(self.pads, "reset"): - controls.append("reset") + if hasattr(self.pads, "rst"): # this gets renamed later to match dfi + controls.append("rst") + if hasattr(self.pads, "reset_n"): + controls.append("reset_n") if hasattr(self.pads, "cs"): controls.append("cs") for name in controls: print ("clock", name, getattr(self.pads, name)) pad = getattr(self.pads, name) + # sigh, convention in nmigen_boards is "rst" but in + # dfi.Interface it is "reset" + if name == 'rst': + name = 'reset_n' m.d.comb += [ pad.o_clk.eq(ClockSignal("dramsync")), pad.o_fclk.eq(ClockSignal("sync2x")), diff --git a/gram/simulation/icarusecpix5platform.py b/gram/simulation/icarusecpix5platform.py index d2155b2..e3520c0 100644 --- a/gram/simulation/icarusecpix5platform.py +++ b/gram/simulation/icarusecpix5platform.py @@ -27,6 +27,7 @@ class IcarusECPIX5Platform(LatticeECP5Platform): ), Resource("ddr3", 0, + Subsignal("rst", Pins("fake", dir="o")), # for sim Subsignal("clk", Pins("H3", dir="o")), #Subsignal("clk", DiffPairs("H3", "J3", dir="o"), Attrs(IO_TYPE="SSTL135D_I")), Subsignal("clk_en", Pins("P1", dir="o")), diff --git a/gram/simulation/simsoc.py b/gram/simulation/simsoc.py index 2379715..ba3c839 100644 --- a/gram/simulation/simsoc.py +++ b/gram/simulation/simsoc.py @@ -25,7 +25,8 @@ def __init__(self, *, clk_freq, features={"cti", "bte"}) ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"}, - xdr={"clk":4, "a":4, "ba":4, "clk_en":4, "we_n":4, "odt":4, "ras":4, "cas":4, "we":4}) + xdr={"rst": 4, "clk":4, "a":4, "ba":4, "clk_en":4, "we_n":4, + "odt":4, "ras":4, "cas":4, "we":4}) self.ddrphy = DomainRenamer("dramsync")(ECP5DDRPHY(ddr_pins)) self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) diff --git a/gram/simulation/simsoctb.v b/gram/simulation/simsoctb.v index a866cc0..3ef4ad9 100644 --- a/gram/simulation/simsoctb.v +++ b/gram/simulation/simsoctb.v @@ -40,12 +40,12 @@ module simsoctb; wire [1:0] dram_dm; wire dram_odt; wire [1:0] dram_tdqs_n; - reg dram_rst = 0; + wire dram_rst; ddr3 #( .check_strict_timing(0) ) ram_chip ( - .rst_n(~dram_rst), + .rst_n(dram_rst), .ck(dram_ck), .ck_n(~dram_ck), .cke(dram_cke), @@ -78,6 +78,7 @@ module simsoctb; //defparam ram_chip. top simsoctop ( + .ddr3_0__rst__io(dram_rst), .ddr3_0__dq__io(dram_dq), .ddr3_0__dqs__p(dram_dqs), .ddr3_0__clk__io(dram_ck), @@ -105,6 +106,7 @@ module simsoctb; begin $dumpfile("simsoc.fst"); $dumpvars(0, clkin); + $dumpvars(0, dram_rst); $dumpvars(0, dram_dq); $dumpvars(0, dram_dqs); $dumpvars(0, dram_ck); @@ -132,18 +134,13 @@ module simsoctb; reg [31:0] tmp; initial begin - dram_rst = 1; #350; // Wait for RESET and POR - // Software control - dram_rst = 0; - - #10; - $display("Release RESET_N"); wishbone_write(32'h0000900c >> 2, 32'h0); // p0 address wishbone_write(32'h00009010 >> 2, 32'h0); // p0 baddress wishbone_write(32'h00009000 >> 2, 8'h0C); // DFII_CONTROL_ODT|DFII_CONTROL_RESET_N + $display("Enable CKE"); wishbone_write(32'h00009000 >> 2, 8'h0E); // DFII_CONTROL_ODT|DFII_CONTROL_RESET_N|DFI_CONTROL_CKE if (dram_cke != 1) diff --git a/gram/test/test_dfii.py b/gram/test/test_dfii.py index 7016da1..4f5cc9b 100644 --- a/gram/test/test_dfii.py +++ b/gram/test/test_dfii.py @@ -162,10 +162,10 @@ def test_reset(self): def process(): yield from wb_write(csrhost.bus, DFII_CONTROL_ADDR >> 2, (1 << 3), sel=0xF) yield - self.assertTrue((yield dut.master.phases[0].reset)) + self.assertTrue((yield dut.master.phases[0].reset_n)) yield from wb_write(csrhost.bus, DFII_CONTROL_ADDR >> 2, 0, sel=0xF) yield - self.assertFalse((yield dut.master.phases[0].reset)) + self.assertFalse((yield dut.master.phases[0].reset_n)) runSimulation(m, process, "test_dfiinjector.vcd") From aa34bd0bc5d7e81c50f23da036110cfe7740fea1 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 25 Feb 2022 18:20:49 +0000 Subject: [PATCH 12/47] set name of DFI interface to ecp5phy in ECP5DDRPHY --- gram/dfii.py | 2 +- gram/phy/ecp5ddrphy.py | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/gram/dfii.py b/gram/dfii.py index a1f5ad4..f9d4de5 100644 --- a/gram/dfii.py +++ b/gram/dfii.py @@ -59,7 +59,7 @@ def elaborate(self, platform): class DFIInjector(Elaboratable): def __init__(self, csr_bank, addressbits, bankbits, nranks, databits, nphases=1): - print ("nranks", nranks, "nphases", nphases) + print ("nranks", nranks, "nphases", nphases, "addressbits", addressbits) self._nranks = nranks self._inti = dfi.Interface(addressbits, bankbits, diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 672d8b2..f35b760 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -143,7 +143,8 @@ def __init__(self, pads, sys_clk_freq=100e6): if hasattr(self.pads, "cs") and hasattr(self.pads.cs, "o0"): nranks = len(self.pads.cs.o0) databits = len(self.pads.dq.io) - self.dfi = Interface(addressbits, bankbits, nranks, 4*databits, 4) + self.dfi = Interface(addressbits, bankbits, nranks, 4*databits, 4, + name="ecp5phy") # PHY settings ----------------------------------------------------------------------------- tck = 1/(2*self._sys_clk_freq) From 2d038760d79f47e69cb1ca0920a858f29fcd1e09 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 25 Feb 2022 18:31:02 +0000 Subject: [PATCH 13/47] restore naming convention "cs_r" on DFI Interface --- gram/core/multiplexer.py | 10 +++++----- gram/dfii.py | 5 +++-- gram/phy/dfi.py | 2 +- gram/phy/fakephy.py | 7 ++++--- 4 files changed, 13 insertions(+), 11 deletions(-) diff --git a/gram/core/multiplexer.py b/gram/core/multiplexer.py index 458c301..69d9fb4 100644 --- a/gram/core/multiplexer.py +++ b/gram/core/multiplexer.py @@ -157,7 +157,7 @@ def valid_and(cmd, attr): return cmd.valid & cmd.ready & getattr(cmd, attr) for i, (phase, sel) in enumerate(zip(self.dfi.phases, self.sel)): - nranks = len(phase.cs) + nranks = len(phase.cs_n) rankbits = log2_int(nranks) if hasattr(phase, "reset_n"): m.d.comb += phase.reset_n.eq(1) @@ -171,15 +171,15 @@ def valid_and(cmd, attr): m.d.comb += rank_decoder.i.eq((Array(cmd.ba[-rankbits:] for cmd in self.commands)[sel])) if i == 0: # Select all ranks on refresh. with m.If(sel == STEER_REFRESH): - m.d.sync += phase.cs.eq(1) + m.d.sync += phase.cs_n.eq(0) with m.Else(): - m.d.sync += phase.cs.eq(rank_decoder.o) + m.d.sync += phase.cs_n.eq(rank_decoder.o) else: - m.d.sync += phase.cs.eq(rank_decoder.o) + m.d.sync += phase.cs_n.eq(rank_decoder.o) m.d.sync += phase.bank.eq(Array(cmd.ba[:-rankbits] for cmd in self.commands)[sel]) else: m.d.sync += [ - phase.cs.eq(1), + phase.cs_n.eq(0), phase.bank.eq(Array(cmd.ba for cmd in self.commands)[sel]), ] diff --git a/gram/dfii.py b/gram/dfii.py index f9d4de5..8b5537f 100644 --- a/gram/dfii.py +++ b/gram/dfii.py @@ -36,14 +36,15 @@ def elaborate(self, platform): with m.If(self._command_issue.w_stb): m.d.comb += [ - self._phase.cs.eq(Repl(value=self._command.w_data[0], count=len(self._phase.cs))), + self._phase.cs_n.eq(Repl(value=~self._command.w_data[0], + count=len(self._phase.cs_n))), self._phase.we.eq(self._command.w_data[1]), self._phase.cas.eq(self._command.w_data[2]), self._phase.ras.eq(self._command.w_data[3]), ] with m.Else(): m.d.comb += [ - self._phase.cs.eq(Repl(value=0, count=len(self._phase.cs))), + self._phase.cs_n.eq(Repl(value=1, count=len(self._phase.cs_n))), self._phase.we.eq(0), self._phase.cas.eq(0), self._phase.ras.eq(0), diff --git a/gram/phy/dfi.py b/gram/phy/dfi.py index 91c4799..d76b764 100644 --- a/gram/phy/dfi.py +++ b/gram/phy/dfi.py @@ -13,7 +13,7 @@ def phase_description(addressbits, bankbits, nranks, databits): ("address", addressbits, DIR_FANOUT), ("bank", bankbits, DIR_FANOUT), ("cas", 1, DIR_FANOUT), - ("cs", nranks, DIR_FANOUT), + ("cs_n", nranks, DIR_FANOUT), ("ras", 1, DIR_FANOUT), ("we", 1, DIR_FANOUT), ("clk_en", nranks, DIR_FANOUT), diff --git a/gram/phy/fakephy.py b/gram/phy/fakephy.py index 6a409c4..5feb583 100644 --- a/gram/phy/fakephy.py +++ b/gram/phy/fakephy.py @@ -178,13 +178,13 @@ def __init__(self, dfi, n): def elaborate(self, platform): m = Module() - with m.If(self.phase.cs & self.phase.ras & ~self.phase.cas): + with m.If(~self.phase.cs_n & self.phase.ras & ~self.phase.cas): m.d.comb += [ self.activate.eq(~self.phase.we), self.precharge.eq(self.phase.we), ] - with m.If(self.phase.cs & ~self.phase.ras & self.phase.cas): + with m.If(~self.phase.cs_n & ~self.phase.ras & self.phase.cas): m.d.comb += [ self.write.eq(self.phase.we), self.read.eq(~self.phase.we), @@ -340,7 +340,8 @@ def elaborate(self, platform): ps = Signal().like(cnt) m.d.comb += ps.eq((cnt + np)*int(self.timings["tCK"])) state = Signal(4) - m.d.comb += state.eq(Cat(phase.we, phase.cas, phase.ras, phase.cs)) + m.d.comb += state.eq(Cat(phase.we, phase.cas, phase.ras, + phase.cs_n)) all_banks = Signal() m.d.comb += all_banks.eq( From 3b6f611b0d3a615f73b97824ff792b712a34177d Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 25 Feb 2022 18:48:34 +0000 Subject: [PATCH 14/47] get chipselect (cs_n) name right in ECP5DDRPHY has to have a minor workaround to adjust for DFI Interface being named "cs_n" but nmigen-boards convention being "cs" --- gram/phy/ecp5ddrphy.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index f35b760..c644edd 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -140,8 +140,8 @@ def __init__(self, pads, sys_clk_freq=100e6): addressbits = len(self.pads.a.o0) bankbits = len(self.pads.ba.o0) nranks = 1 - if hasattr(self.pads, "cs") and hasattr(self.pads.cs, "o0"): - nranks = len(self.pads.cs.o0) + if hasattr(self.pads, "cs_n") and hasattr(self.pads.cs_n, "o0"): + nranks = len(self.pads.cs_n.o0) databits = len(self.pads.dq.io) self.dfi = Interface(addressbits, bankbits, nranks, 4*databits, 4, name="ecp5phy") @@ -253,6 +253,9 @@ def elaborate(self, platform): # dfi.Interface it is "reset" if name == 'rst': name = 'reset_n' + # sigh same for cs + if name == 'cs': + name = 'cs_n' m.d.comb += [ pad.o_clk.eq(ClockSignal("dramsync")), pad.o_fclk.eq(ClockSignal("sync2x")), From 67f6ece9bb6a04b8c087527899c81daa895e07b8 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sat, 26 Feb 2022 13:54:43 +0000 Subject: [PATCH 15/47] add missing reset-HI values to cas_n, cs_n, we_n and act_n --- gram/phy/dfi.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/gram/phy/dfi.py b/gram/phy/dfi.py index d76b764..814ad1b 100644 --- a/gram/phy/dfi.py +++ b/gram/phy/dfi.py @@ -43,6 +43,11 @@ def __init__(self, addressbits, bankbits, nranks, databits, nphases=1, nranks, databits), name=name) self.phases += [p] + # set all logic-inverted x_n signal resets to on at power-up + p.cas.reset = 1 + p.cs_n.reset = -1 + p.we.reset = 1 + p.act.reset = 1 def connect(self, target): if not isinstance(target, Interface): From 65fd6521be8da42dc73d64985e3ebd9a33e3e357 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sat, 26 Feb 2022 14:09:27 +0000 Subject: [PATCH 16/47] use dict for lookup of DFI to pads names add ras.reset=1 --- gram/phy/dfi.py | 1 + gram/phy/ecp5ddrphy.py | 7 ++----- 2 files changed, 3 insertions(+), 5 deletions(-) diff --git a/gram/phy/dfi.py b/gram/phy/dfi.py index 814ad1b..8f76c6d 100644 --- a/gram/phy/dfi.py +++ b/gram/phy/dfi.py @@ -45,6 +45,7 @@ def __init__(self, addressbits, bankbits, nranks, databits, nphases=1, self.phases += [p] # set all logic-inverted x_n signal resets to on at power-up p.cas.reset = 1 + p.ras.reset = 1 p.cs_n.reset = -1 p.we.reset = 1 p.act.reset = 1 diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index c644edd..ce13c99 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -251,11 +251,8 @@ def elaborate(self, platform): pad = getattr(self.pads, name) # sigh, convention in nmigen_boards is "rst" but in # dfi.Interface it is "reset" - if name == 'rst': - name = 'reset_n' - # sigh same for cs - if name == 'cs': - name = 'cs_n' + dfi2pads = {'rst': 'reset_n', 'cs': 'cs_n'} + name = dfi2pads.get(name, name) # remap if exists m.d.comb += [ pad.o_clk.eq(ClockSignal("dramsync")), pad.o_fclk.eq(ClockSignal("sync2x")), From 01df3a4b09d11c893a1ee1f658728f9a78d70c30 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Mon, 28 Feb 2022 11:04:32 +0000 Subject: [PATCH 17/47] remove unneeded import --- gram/compat.py | 1 - 1 file changed, 1 deletion(-) diff --git a/gram/compat.py b/gram/compat.py index c053006..bf19993 100644 --- a/gram/compat.py +++ b/gram/compat.py @@ -4,7 +4,6 @@ from nmigen import * from nmigen import tracer -from nmigen.compat import Case from nmigen.back.pysim import * __ALL__ = ["delayed_enter", "Timeline", "CSRPrefixProxy"] From d20b197326aabbe769dac42793721ba1c0c4e750 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Tue, 1 Mar 2022 15:22:20 +0000 Subject: [PATCH 18/47] fix up simulation to be more like VERSA_ECP5 * use MT4164M16 instead of MT41256M16 * add a Chip-Select line (dram_cs_n) which is currently inverted * reduce the number of address lines in the simulated platform --- gram/simulation/icarusecpix5platform.py | 8 +++++--- gram/simulation/simsoc.py | 7 ++++--- gram/simulation/simsoctb.v | 11 ++++++++++- 3 files changed, 19 insertions(+), 7 deletions(-) diff --git a/gram/simulation/icarusecpix5platform.py b/gram/simulation/icarusecpix5platform.py index e3520c0..b3b5a1b 100644 --- a/gram/simulation/icarusecpix5platform.py +++ b/gram/simulation/icarusecpix5platform.py @@ -28,13 +28,15 @@ class IcarusECPIX5Platform(LatticeECP5Platform): Resource("ddr3", 0, Subsignal("rst", Pins("fake", dir="o")), # for sim - Subsignal("clk", Pins("H3", dir="o")), - #Subsignal("clk", DiffPairs("H3", "J3", dir="o"), Attrs(IO_TYPE="SSTL135D_I")), + #Subsignal("clk", Pins("H3", dir="o")), + Subsignal("clk", DiffPairs("H3", "J3", dir="o"), Attrs(IO_TYPE="SSTL135D_I")), Subsignal("clk_en", Pins("P1", dir="o")), Subsignal("we", PinsN("R3", dir="o")), + Subsignal("cs", PinsN("fake2", dir="o")), # for sim Subsignal("ras", PinsN("T3", dir="o")), Subsignal("cas", PinsN("P2", dir="o")), - Subsignal("a", Pins("T5 M3 L3 V6 K2 W6 K3 L1 H2 L2 N1 J1 M1 K1", dir="o")), + #Subsignal("a", Pins("T5 M3 L3 V6 K2 W6 K3 L1 H2 L2 N1 J1 M1 K1", dir="o")), + Subsignal("a", Pins("T5 M3 L3 V6 K2 W6 K3 L1 H2 L2 N1 J1 M1", dir="o")), Subsignal("ba", Pins("U6 N3 N4", dir="o")), Subsignal("dqs", DiffPairs("V4 V1", "U5 U2", dir="io"), Attrs(IO_TYPE="SSTL135D_I")), Subsignal("dq", Pins("T4 W4 R4 W5 R6 P6 P5 P4 R1 W3 T2 V3 U3 W1 T1 W2", dir="io")), diff --git a/gram/simulation/simsoc.py b/gram/simulation/simsoc.py index ba3c839..29aa35b 100644 --- a/gram/simulation/simsoc.py +++ b/gram/simulation/simsoc.py @@ -9,7 +9,7 @@ from gram.core import gramCore from gram.phy.ecp5ddrphy import ECP5DDRPHY -from gram.modules import MT41K256M16 +from gram.modules import (MT41K256M16, MT41K64M16) from gram.frontend.wishbone import gramWishbone from icarusecpix5platform import IcarusECPIX5Platform @@ -26,11 +26,12 @@ def __init__(self, *, clk_freq, ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"}, xdr={"rst": 4, "clk":4, "a":4, "ba":4, "clk_en":4, "we_n":4, - "odt":4, "ras":4, "cas":4, "we":4}) + "cs": 4, "odt":4, "ras":4, "cas":4, "we":4}) self.ddrphy = DomainRenamer("dramsync")(ECP5DDRPHY(ddr_pins)) self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) - ddrmodule = MT41K256M16(clk_freq, "1:2") + #ddrmodule = MT41K256M16(clk_freq, "1:2") + ddrmodule = MT41K64M16(clk_freq, "1:2") self.dramcore = DomainRenamer("dramsync")(gramCore( phy=self.ddrphy, diff --git a/gram/simulation/simsoctb.v b/gram/simulation/simsoctb.v index 3ef4ad9..54c76e4 100644 --- a/gram/simulation/simsoctb.v +++ b/gram/simulation/simsoctb.v @@ -30,6 +30,7 @@ module simsoctb; wire dram_ck; wire dram_cke; wire dram_we_n; + wire dram_cs_n; wire dram_ras_n; wire dram_cas_n; wire [15:0] dram_dq; @@ -49,7 +50,7 @@ module simsoctb; .ck(dram_ck), .ck_n(~dram_ck), .cke(dram_cke), - .cs_n(1'b0), + .cs_n(~dram_cs_n), .ras_n(dram_ras_n), .cas_n(dram_cas_n), .we_n(dram_we_n), @@ -84,6 +85,7 @@ module simsoctb; .ddr3_0__clk__io(dram_ck), .ddr3_0__clk_en__io(dram_cke), .ddr3_0__we__io(dram_we_n), + .ddr3_0__cs__io(dram_cs_n), .ddr3_0__ras__io(dram_ras_n), .ddr3_0__cas__io(dram_cas_n), .ddr3_0__a__io(dram_a), @@ -111,6 +113,7 @@ module simsoctb; $dumpvars(0, dram_dqs); $dumpvars(0, dram_ck); $dumpvars(0, dram_cke); + $dumpvars(0, dram_cs_n); $dumpvars(0, dram_we_n); $dumpvars(0, dram_ras_n); $dumpvars(0, dram_cas_n); @@ -194,6 +197,12 @@ module simsoctb; wishbone_write(32'h00009000 >> 2, 8'h01); // DFII_CONTROL_SEL #2000; + // reset burst detect + //wishbone_write(32'h00008000 >> 2, 0); // burst detect reset + + // read on burst detect + //wishbone_read(32'h00008000 >> 2, tmp); // burst detect + // Read test on provisioned data, row 0, col 0-7 wishbone_read(32'h10000000 >> 2, tmp); assert_equal_32(tmp, 32'hFACECA8C); From 7dcba5b696076481deb43295ec721f253adf2d5c Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 10 Mar 2022 12:17:36 +0000 Subject: [PATCH 19/47] code-cleanup and copyright notices --- gram/frontend/wishbone.py | 49 +++++++++++++++++++++++++-------------- 1 file changed, 31 insertions(+), 18 deletions(-) diff --git a/gram/frontend/wishbone.py b/gram/frontend/wishbone.py index 611be50..a68e8cc 100644 --- a/gram/frontend/wishbone.py +++ b/gram/frontend/wishbone.py @@ -1,18 +1,32 @@ # This file is Copyright (c) 2020 LambdaConcept # License: BSD +# Copyright (c) 2020 LambdaConcept +# Copyright (c) 2021 Luke Kenneth Casson Leighton +# +# Code from LambaConcept is Licensed BSD +# Code from Luke Kenneth Casson Leighton is Licensed LGPLv3+ +# +# Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER +# under EU Grants 871528 and 957073 from math import log2 -from nmigen import * +from nmigen import (Module, Elaboratable, Signal, Repl) from nmigen.utils import log2_int from nmigen_soc import wishbone from nmigen_soc.memory import MemoryMap from lambdasoc.periph import Peripheral - +# XXX +# WARNING - THIS CODE CANNOT COPE WITH WISHBONE 4.0 PIPELINE MODE +# THE ADDRESS MAY CHANGE AFTER EACH STB AND THIS IS AN ASSUMPTION +# FROM WISHBONE 3.0 CLASSIC. USE THE COMPATIBILITY MODE stall=cyc&~ack +# XXX class gramWishbone(Peripheral, Elaboratable): - def __init__(self, core, data_width=32, granularity=8): + def __init__(self, core, data_width=32, granularity=8, + features=frozenset()): + super().__init__(name="wishbone") self.native_port = core.crossbar.get_native_port() @@ -29,11 +43,12 @@ def __init__(self, core, data_width=32, granularity=8): def elaborate(self, platform): m = Module() + cmd = self.native_port.cmd + wdata = self.native_port.wdata + rdata = self.native_port.rdata # Write datapath - m.d.comb += [ - self.native_port.wdata.valid.eq(self.bus.cyc & self.bus.stb & self.bus.we), - ] + m.d.comb += wdata.valid.eq(self.bus.cyc & self.bus.stb & self.bus.we) ratio_bitmask = Repl(1, log2_int(self.ratio)) @@ -46,44 +61,42 @@ def elaborate(self, platform): with m.Switch(self.bus.adr & ratio_bitmask): for i in range(self.ratio): with m.Case(i): - m.d.comb += self.native_port.wdata.we.eq(Repl(sel, self.bus.granularity//8) << (self.ratio*i)) + m.d.comb += wdata.we.eq(Repl(sel, self.bus.granularity//8) << (self.ratio*i)) with m.Switch(self.bus.adr & ratio_bitmask): for i in range(self.ratio): with m.Case(i): - m.d.comb += self.native_port.wdata.data.eq(self.bus.dat_w << (self.bus.data_width*i)) + m.d.comb += wdata.data.eq(self.bus.dat_w << (self.bus.data_width*i)) # Read datapath - m.d.comb += [ - self.native_port.rdata.ready.eq(1), - ] + m.d.comb += rdata.ready.eq(1) with m.Switch(self.bus.adr & ratio_bitmask): for i in range(self.ratio): with m.Case(i): - m.d.comb += self.bus.dat_r.eq(self.native_port.rdata.data >> (self.bus.data_width*i)) + m.d.comb += self.bus.dat_r.eq(rdata.data >> (self.bus.data_width*i)) with m.FSM(): with m.State("Send-Cmd"): m.d.comb += [ - self.native_port.cmd.valid.eq(self.bus.cyc & self.bus.stb), - self.native_port.cmd.we.eq(self.bus.we), - self.native_port.cmd.addr.eq(self.bus.adr >> log2_int(self.bus.data_width//self.bus.granularity)), + cmd.valid.eq(self.bus.cyc & self.bus.stb), + cmd.we.eq(self.bus.we), + cmd.addr.eq(self.bus.adr >> log2_int(self.bus.data_width//self.bus.granularity)), ] - with m.If(self.native_port.cmd.valid & self.native_port.cmd.ready): + with m.If(cmd.valid & cmd.ready): with m.If(self.bus.we): m.next = "Wait-Write" with m.Else(): m.next = "Wait-Read" with m.State("Wait-Read"): - with m.If(self.native_port.rdata.valid): + with m.If(rdata.valid): m.d.comb += self.bus.ack.eq(1) m.next = "Send-Cmd" with m.State("Wait-Write"): - with m.If(self.native_port.wdata.ready): + with m.If(wdata.ready): m.d.comb += self.bus.ack.eq(1) m.next = "Send-Cmd" From 08f04d1b6290b588fc2a334dca27b63026e50576 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 10 Mar 2022 12:20:24 +0000 Subject: [PATCH 20/47] tidy up gramWishbone constructor, pass Wishbone features to bus --- gram/frontend/wishbone.py | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) diff --git a/gram/frontend/wishbone.py b/gram/frontend/wishbone.py index a68e8cc..0b04444 100644 --- a/gram/frontend/wishbone.py +++ b/gram/frontend/wishbone.py @@ -22,6 +22,7 @@ # WARNING - THIS CODE CANNOT COPE WITH WISHBONE 4.0 PIPELINE MODE # THE ADDRESS MAY CHANGE AFTER EACH STB AND THIS IS AN ASSUMPTION # FROM WISHBONE 3.0 CLASSIC. USE THE COMPATIBILITY MODE stall=cyc&~ack +# OR USE BURST-MODE ONLY # XXX class gramWishbone(Peripheral, Elaboratable): def __init__(self, core, data_width=32, granularity=8, @@ -32,14 +33,18 @@ def __init__(self, core, data_width=32, granularity=8, self.native_port = core.crossbar.get_native_port() self.ratio = self.native_port.data_width//data_width - - addr_width = log2_int(core.size//(self.native_port.data_width//data_width)) - self.bus = wishbone.Interface(addr_width=addr_width+log2_int(self.ratio), - data_width=data_width, granularity=granularity) - - map = MemoryMap(addr_width=addr_width+log2_int(self.ratio)+log2_int(data_width//granularity), - data_width=granularity) - self.bus.memory_map = map + addr_width = log2_int(core.size//self.ratio) + addr_width_r = addr_width + log2_int(self.ratio) + self.dsize = log2_int(data_width//granularity) + self.bus = wishbone.Interface(addr_width=addr_width_r, + data_width=data_width, + granularity=granularity, + features=features) + + mmap = MemoryMap(addr_width=addr_width_r+self.dsize, + data_width=granularity) + + self.bus.memory_map = mmap def elaborate(self, platform): m = Module() From 180026c72f0e1d3ef365b2214288d4a543a238dd Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 10 Mar 2022 12:33:15 +0000 Subject: [PATCH 21/47] tidyup on gramWishbone class, add comments --- gram/core/multiplexer.py | 4 +- gram/frontend/wishbone.py | 54 ++++++++++++++---------- gram/phy/ecp5ddrphy.py | 55 ++++++++++++++----------- gram/simulation/icarusecpix5platform.py | 2 +- gram/simulation/simsoctb.v | 6 ++- 5 files changed, 70 insertions(+), 51 deletions(-) diff --git a/gram/core/multiplexer.py b/gram/core/multiplexer.py index 69d9fb4..9e9b40d 100644 --- a/gram/core/multiplexer.py +++ b/gram/core/multiplexer.py @@ -173,9 +173,9 @@ def valid_and(cmd, attr): with m.If(sel == STEER_REFRESH): m.d.sync += phase.cs_n.eq(0) with m.Else(): - m.d.sync += phase.cs_n.eq(rank_decoder.o) + m.d.sync += phase.cs_n.eq(~rank_decoder.o) else: - m.d.sync += phase.cs_n.eq(rank_decoder.o) + m.d.sync += phase.cs_n.eq(~rank_decoder.o) m.d.sync += phase.bank.eq(Array(cmd.ba[:-rankbits] for cmd in self.commands)[sel]) else: m.d.sync += [ diff --git a/gram/frontend/wishbone.py b/gram/frontend/wishbone.py index 0b04444..984eb71 100644 --- a/gram/frontend/wishbone.py +++ b/gram/frontend/wishbone.py @@ -48,61 +48,71 @@ def __init__(self, core, data_width=32, granularity=8, def elaborate(self, platform): m = Module() + comb = m.d.comb cmd = self.native_port.cmd wdata = self.native_port.wdata rdata = self.native_port.rdata + bus = self.bus # Write datapath - m.d.comb += wdata.valid.eq(self.bus.cyc & self.bus.stb & self.bus.we) + comb += wdata.valid.eq(bus.cyc & bus.stb & bus.we) ratio_bitmask = Repl(1, log2_int(self.ratio)) - sel = Signal.like(self.bus.sel) - with m.If(self.bus.sel == 0): - m.d.comb += sel.eq(Repl(1, sel.width)) + # XXX? sel is zero being compensated-for as all 1s does not seem right + sel = Signal.like(bus.sel) + with m.If(bus.sel == 0): + comb += sel.eq(-1) # all 1s with m.Else(): - m.d.comb += sel.eq(self.bus.sel) + comb += sel.eq(bus.sel) - with m.Switch(self.bus.adr & ratio_bitmask): + with m.Switch(bus.adr & ratio_bitmask): # XXX adr changes (WB4-pipe) for i in range(self.ratio): with m.Case(i): - m.d.comb += wdata.we.eq(Repl(sel, self.bus.granularity//8) << (self.ratio*i)) - - with m.Switch(self.bus.adr & ratio_bitmask): - for i in range(self.ratio): - with m.Case(i): - m.d.comb += wdata.data.eq(self.bus.dat_w << (self.bus.data_width*i)) + # write-enable + we = Repl(sel, bus.granularity//8) << (self.ratio*i) + comb += wdata.we.eq(we) + # write-data + data = bus.dat_w << (bus.data_width*i) + comb += wdata.data.eq(data) # Read datapath - m.d.comb += rdata.ready.eq(1) + comb += rdata.ready.eq(1) - with m.Switch(self.bus.adr & ratio_bitmask): + with m.Switch(bus.adr & ratio_bitmask): # XXX adr changes (WB4-pipe) for i in range(self.ratio): with m.Case(i): - m.d.comb += self.bus.dat_r.eq(rdata.data >> (self.bus.data_width*i)) + data = rdata.data >> (bus.data_width*i) + comb += bus.dat_r.eq(data) + # Command FSM with m.FSM(): + # raise a command when WB has a request with m.State("Send-Cmd"): - m.d.comb += [ - cmd.valid.eq(self.bus.cyc & self.bus.stb), - cmd.we.eq(self.bus.we), - cmd.addr.eq(self.bus.adr >> log2_int(self.bus.data_width//self.bus.granularity)), + # XXX this logic is only WB 3.0 classic compatible! + comb += [ + cmd.valid.eq(bus.cyc & bus.stb), + cmd.we.eq(bus.we), + cmd.addr.eq(bus.adr >> self.dsize), ] + # when cmd is accepted, move to either read or write FSM with m.If(cmd.valid & cmd.ready): - with m.If(self.bus.we): + with m.If(bus.we): m.next = "Wait-Write" with m.Else(): m.next = "Wait-Read" + # read-wait: when read valid, ack the WB bus, return idle with m.State("Wait-Read"): with m.If(rdata.valid): - m.d.comb += self.bus.ack.eq(1) + comb += bus.ack.eq(1) m.next = "Send-Cmd" + # write-wait: when write valid, ack the WB bus, return idle with m.State("Wait-Write"): with m.If(wdata.ready): - m.d.comb += self.bus.ack.eq(1) + comb += bus.ack.eq(1) m.next = "Send-Cmd" return m diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index ce13c99..a31dfb3 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -174,6 +174,7 @@ def __init__(self, pads, sys_clk_freq=100e6): def elaborate(self, platform): m = Module() + comb, sync = m.d.comb, m.d.sync m.submodules.bridge = self._bridge @@ -405,12 +406,12 @@ def elaborate(self, platform): ] for j in range(8*i, 8*(i+1)): - dq_o = Signal() - dq_i = Signal() - dq_oe_n = Signal() - dq_i_delayed = Signal() - dq_i_data = Signal(4) - dq_o_data = Signal(8) + dq_o = Signal(name="dq_o_%d" % j) + dq_i = Signal(name="dq_i_%d" % j) + dq_oe_n = Signal(name="dq_oe_n_%d" % j) + dq_i_delayed = Signal(name="dq_i_delayed_%d" % j) + dq_i_data = Signal(4, name="dq_i_data_%d" % j) + dq_o_data = Signal(8, name="dq_o_data_%d" % j) dq_o_data_d = Signal(8, reset_less=True) dq_o_data_muxed = Signal(4, reset_less=True) m.d.comb += dq_o_data.eq(Cat( @@ -478,20 +479,28 @@ def elaborate(self, platform): o_O=dq_i, io_B=self.pads.dq.io[j]) ] - with m.If(~datavalid_prev & datavalid): - m.d.sync += [ - dfi.phases[0].rddata[0*databits+j].eq(dq_i_data[0]), - dfi.phases[0].rddata[1*databits+j].eq(dq_i_data[1]), - dfi.phases[0].rddata[2*databits+j].eq(dq_i_data[2]), - dfi.phases[0].rddata[3*databits+j].eq(dq_i_data[3]), - ] - with m.Elif(datavalid): - m.d.sync += [ - dfi.phases[1].rddata[0*databits+j].eq(dq_i_data[0]), - dfi.phases[1].rddata[1*databits+j].eq(dq_i_data[1]), - dfi.phases[1].rddata[2*databits+j].eq(dq_i_data[2]), - dfi.phases[1].rddata[3*databits+j].eq(dq_i_data[3]), - ] + # shift-register delay on the incoming read data + dq_i_bs = BitSlip(4, Const(0), Const(0), cycles=1) + m.submodules['dq_i_bitslip_%d' % j] = dq_i_bs + dq_i_bs_o = Signal(4, name="dq_i_bs_o_%d" % j) + dq_i_bs_o_d = Signal(4, name="dq_i_bs_o_d_%d" % j) + comb += dq_i_bs.i.eq(dq_i_data) + comb += dq_i_bs_o.eq(dq_i_bs.o) + sync += dq_i_bs_o_d.eq(dq_i_bs_o) # delay by 1 clock + #with m.If(~datavalid_prev & datavalid): + comb += [ + dfi.phases[0].rddata[0*databits+j].eq(dq_i_bs_o_d[0]), + dfi.phases[0].rddata[1*databits+j].eq(dq_i_bs_o_d[1]), + dfi.phases[0].rddata[2*databits+j].eq(dq_i_bs_o_d[2]), + dfi.phases[0].rddata[3*databits+j].eq(dq_i_bs_o_d[3]), + ] + #with m.Elif(datavalid): + comb += [ + dfi.phases[1].rddata[0*databits+j].eq(dq_i_bs_o[0]), + dfi.phases[1].rddata[1*databits+j].eq(dq_i_bs_o[1]), + dfi.phases[1].rddata[2*databits+j].eq(dq_i_bs_o[2]), + dfi.phases[1].rddata[3*databits+j].eq(dq_i_bs_o[3]), + ] # Read Control Path ------------------------------------------------------------------------ # Creates a shift register of read commands coming from the DFI interface. This shift register @@ -507,12 +516,10 @@ def elaborate(self, platform): rddata_en_last = Signal.like(rddata_en) m.d.comb += rddata_en.eq(Cat(dfi.phases[self.settings.rdphase].rddata_en, rddata_en_last)) m.d.sync += rddata_en_last.eq(rddata_en) + for phase in dfi.phases: + m.d.sync += phase.rddata_valid.eq(rddata_en[-1]) m.d.comb += dqs_re.eq(rddata_en[cl_sys_latency + 1] | rddata_en[cl_sys_latency + 2]) - rddata_valid = Signal() - m.d.sync += rddata_valid.eq(datavalid_prev & ~datavalid) - for phase in dfi.phases: - m.d.comb += phase.rddata_valid.eq(rddata_valid) # Write Control Path ----------------------------------------------------------------------- # Creates a shift register of write commands coming from the DFI interface. This shift register diff --git a/gram/simulation/icarusecpix5platform.py b/gram/simulation/icarusecpix5platform.py index b3b5a1b..7b4b601 100644 --- a/gram/simulation/icarusecpix5platform.py +++ b/gram/simulation/icarusecpix5platform.py @@ -27,7 +27,7 @@ class IcarusECPIX5Platform(LatticeECP5Platform): ), Resource("ddr3", 0, - Subsignal("rst", Pins("fake", dir="o")), # for sim + Subsignal("rst", PinsN("fake", dir="o")), # for sim #Subsignal("clk", Pins("H3", dir="o")), Subsignal("clk", DiffPairs("H3", "J3", dir="o"), Attrs(IO_TYPE="SSTL135D_I")), Subsignal("clk_en", Pins("P1", dir="o")), diff --git a/gram/simulation/simsoctb.v b/gram/simulation/simsoctb.v index 54c76e4..5afa17f 100644 --- a/gram/simulation/simsoctb.v +++ b/gram/simulation/simsoctb.v @@ -43,10 +43,12 @@ module simsoctb; wire [1:0] dram_tdqs_n; wire dram_rst; + // anything here with "_n" has to be inverted. nmigen platforms + // sort that out by inverting (with PinsN) ddr3 #( .check_strict_timing(0) ) ram_chip ( - .rst_n(dram_rst), + .rst_n(~dram_rst), .ck(dram_ck), .ck_n(~dram_ck), .cke(dram_cke), @@ -82,7 +84,7 @@ module simsoctb; .ddr3_0__rst__io(dram_rst), .ddr3_0__dq__io(dram_dq), .ddr3_0__dqs__p(dram_dqs), - .ddr3_0__clk__io(dram_ck), + .ddr3_0__clk__p(dram_ck), .ddr3_0__clk_en__io(dram_cke), .ddr3_0__we__io(dram_we_n), .ddr3_0__cs__io(dram_cs_n), From 41c51d7d185189bac92043aaf1bf1cc2125b48d7 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 11 Mar 2022 13:00:16 +0000 Subject: [PATCH 22/47] annoyingly reverting reset_n naming back to reset --- gram/core/multiplexer.py | 4 ++-- gram/dfii.py | 11 ++++++++--- gram/phy/dfi.py | 3 ++- gram/phy/ecp5ddrphy.py | 4 +--- gram/simulation/simsoctb.v | 2 +- gram/test/test_dfii.py | 4 ++-- 6 files changed, 16 insertions(+), 12 deletions(-) diff --git a/gram/core/multiplexer.py b/gram/core/multiplexer.py index 9e9b40d..87ea8a9 100644 --- a/gram/core/multiplexer.py +++ b/gram/core/multiplexer.py @@ -159,8 +159,8 @@ def valid_and(cmd, attr): for i, (phase, sel) in enumerate(zip(self.dfi.phases, self.sel)): nranks = len(phase.cs_n) rankbits = log2_int(nranks) - if hasattr(phase, "reset_n"): - m.d.comb += phase.reset_n.eq(1) + if hasattr(phase, "reset"): + m.d.comb += phase.reset.eq(0) m.d.comb += phase.clk_en.eq(Repl(1, nranks)) if hasattr(phase, "odt"): # FIXME: add dynamic drive for multi-rank (will be needed for high frequencies) diff --git a/gram/dfii.py b/gram/dfii.py index 8b5537f..bf978d2 100644 --- a/gram/dfii.py +++ b/gram/dfii.py @@ -74,11 +74,16 @@ def __init__(self, csr_bank, addressbits, bankbits, nranks, databits, nphases=1) name="master") self._control = csr_bank.csr(4, "w") # sel, clk_en, odt, reset + self._control.w_data.reset = 0b1000 # reset HI + self._phases = [] for n, phase in enumerate(self._inti.phases): self._phases += [PhaseInjector(CSRPrefixProxy(csr_bank, - "p{}".format(n)), phase)] + "p{}".format(n)), + phase)] + if hasattr(phase, "reset"): + phase.reset.reset = 1 def elaborate(self, platform): m = Module() @@ -99,7 +104,7 @@ def elaborate(self, platform): for phase in self._inti.phases] m.d.comb += [phase.odt[i].eq(self._control.w_data[2]) for phase in self._inti.phases if hasattr(phase, "odt")] - m.d.comb += [phase.reset_n.eq(self._control.w_data[3]) - for phase in self._inti.phases if hasattr(phase, "reset_n")] + m.d.comb += [phase.reset.eq(~self._control.w_data[3]) + for phase in self._inti.phases if hasattr(phase, "reset")] return m diff --git a/gram/phy/dfi.py b/gram/phy/dfi.py index 8f76c6d..f5a5eb9 100644 --- a/gram/phy/dfi.py +++ b/gram/phy/dfi.py @@ -18,7 +18,7 @@ def phase_description(addressbits, bankbits, nranks, databits): ("we", 1, DIR_FANOUT), ("clk_en", nranks, DIR_FANOUT), ("odt", nranks, DIR_FANOUT), - ("reset_n", 1, DIR_FANOUT), + ("reset", 1, DIR_FANOUT), ("act", 1, DIR_FANOUT), # wrdata description ("wrdata", databits, DIR_FANOUT), @@ -46,6 +46,7 @@ def __init__(self, addressbits, bankbits, nranks, databits, nphases=1, # set all logic-inverted x_n signal resets to on at power-up p.cas.reset = 1 p.ras.reset = 1 + p.reset.reset = 1 p.cs_n.reset = -1 p.we.reset = 1 p.act.reset = 1 diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index a31dfb3..5a9c8f6 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -243,8 +243,6 @@ def elaborate(self, platform): controls = ["ras", "cas", "we", "clk_en", "odt"] if hasattr(self.pads, "rst"): # this gets renamed later to match dfi controls.append("rst") - if hasattr(self.pads, "reset_n"): - controls.append("reset_n") if hasattr(self.pads, "cs"): controls.append("cs") for name in controls: @@ -252,7 +250,7 @@ def elaborate(self, platform): pad = getattr(self.pads, name) # sigh, convention in nmigen_boards is "rst" but in # dfi.Interface it is "reset" - dfi2pads = {'rst': 'reset_n', 'cs': 'cs_n'} + dfi2pads = {'rst': 'reset', 'cs': 'cs_n'} name = dfi2pads.get(name, name) # remap if exists m.d.comb += [ pad.o_clk.eq(ClockSignal("dramsync")), diff --git a/gram/simulation/simsoctb.v b/gram/simulation/simsoctb.v index 5afa17f..7818b4e 100644 --- a/gram/simulation/simsoctb.v +++ b/gram/simulation/simsoctb.v @@ -48,7 +48,7 @@ module simsoctb; ddr3 #( .check_strict_timing(0) ) ram_chip ( - .rst_n(~dram_rst), + .rst_n(dram_rst), .ck(dram_ck), .ck_n(~dram_ck), .cke(dram_cke), diff --git a/gram/test/test_dfii.py b/gram/test/test_dfii.py index 4f5cc9b..7016da1 100644 --- a/gram/test/test_dfii.py +++ b/gram/test/test_dfii.py @@ -162,10 +162,10 @@ def test_reset(self): def process(): yield from wb_write(csrhost.bus, DFII_CONTROL_ADDR >> 2, (1 << 3), sel=0xF) yield - self.assertTrue((yield dut.master.phases[0].reset_n)) + self.assertTrue((yield dut.master.phases[0].reset)) yield from wb_write(csrhost.bus, DFII_CONTROL_ADDR >> 2, 0, sel=0xF) yield - self.assertFalse((yield dut.master.phases[0].reset_n)) + self.assertFalse((yield dut.master.phases[0].reset)) runSimulation(m, process, "test_dfiinjector.vcd") From 0107a36d930d4c8dab1099969b140673198d29a4 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 11 Mar 2022 13:47:13 +0000 Subject: [PATCH 23/47] add a 2nd clock, this one deliberately the same frequency as the main one, for now --- gram/simulation/crg.py | 34 +++++++++++++++++++++++++++++++--- gram/simulation/simsoc.py | 2 +- 2 files changed, 32 insertions(+), 4 deletions(-) diff --git a/gram/simulation/crg.py b/gram/simulation/crg.py index aa44c37..8f9a78b 100644 --- a/gram/simulation/crg.py +++ b/gram/simulation/crg.py @@ -168,8 +168,9 @@ def elaborate(self, platform): class ECPIX5CRG(Elaboratable): - def __init__(self, sys_clk_freq=100e6): + def __init__(self, sys_clk_freq=100e6, dram_clk_freq=None): self.sys_clk_freq = sys_clk_freq + self.dram_clk_freq = dram_clk_freq def elaborate(self, platform): m = Module() @@ -211,7 +212,12 @@ def elaborate(self, platform): local=False, reset_less=True) cd_init = ClockDomain("init", local=False) cd_sync = ClockDomain("sync", local=False) + # generate dram (and 2xdram if requested) cd_dramsync = ClockDomain("dramsync", local=False) + if self.dram_clk_freq is not None: + cd_dramsync2x = ClockDomain("dramsync2x", local=False) + cd_dramsync2x_unbuf = ClockDomain("dramsync2x_unbuf", + local=False, reset_less=True) m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~reset) pll.set_clkin_freq(100e6) pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq) @@ -220,6 +226,18 @@ def elaborate(self, platform): i_ECLKI = ClockSignal("sync2x_unbuf"), i_STOP = 0, o_ECLKO = ClockSignal("sync2x")) + + # if dram is a separate frequency request it. set up a 2nd 2x unbuf + if self.dram_clk_freq is not None: + pll.create_clkout(ClockSignal("dramsync2x_unbuf"), + 2*self.dram_clk_freq) + m.submodules += Instance("ECLKSYNCB", + i_ECLKI = ClockSignal("dramsync2x_unbuf"), + i_STOP = 0, + o_ECLKO = ClockSignal("dramsync2x")) + m.domains += cd_dramsync2x_unbuf + m.domains += cd_dramsync2x + m.domains += cd_sync2x_unbuf m.domains += cd_sync2x m.domains += cd_init @@ -240,7 +258,17 @@ def elaborate(self, platform): i_RST=0, o_CDIVX=ClockSignal("sync")) - # temporarily set dram sync clock exactly equal to main sync - m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync")) + # Generating dramsync (100Mhz) from dramsync2x + if self.dram_clk_freq is not None: + m.submodules += Instance("CLKDIVF", + p_DIV="2.0", + i_ALIGNWD=0, + i_CLKI=ClockSignal("dramsync2x"), + i_RST=0, + o_CDIVX=ClockSignal("dramsync")) + + # if no separate dram set dram sync clock exactly equal to main sync + if self.dram_clk_freq is None: + m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync")) return m diff --git a/gram/simulation/simsoc.py b/gram/simulation/simsoc.py index 29aa35b..0340275 100644 --- a/gram/simulation/simsoc.py +++ b/gram/simulation/simsoc.py @@ -19,7 +19,7 @@ class DDR3SoC(SoC, Elaboratable): def __init__(self, *, clk_freq, ddrphy_addr, dramcore_addr, ddr_addr): - self.crg = ECPIX5CRG() + self.crg = ECPIX5CRG(clk_freq, dram_clk_freq=clk_freq) self._decoder = wishbone.Decoder(addr_width=30, data_width=32, granularity=8, features={"cti", "bte"}) From b1b115dfb6433d125ec14ace0ad0f02fa2a82163 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Sun, 13 Mar 2022 11:16:01 +0000 Subject: [PATCH 24/47] add 1024M_ddr3_parameters.vh for MT41K64M16 --- .../dram_model/1024Mb_ddr3_parameters.vh | 703 ++++++++++++++++++ gram/simulation/dram_model/ddr3.v | 3 +- 2 files changed, 705 insertions(+), 1 deletion(-) create mode 100644 gram/simulation/dram_model/1024Mb_ddr3_parameters.vh diff --git a/gram/simulation/dram_model/1024Mb_ddr3_parameters.vh b/gram/simulation/dram_model/1024Mb_ddr3_parameters.vh new file mode 100644 index 0000000..f44bab6 --- /dev/null +++ b/gram/simulation/dram_model/1024Mb_ddr3_parameters.vh @@ -0,0 +1,703 @@ +/**************************************************************************************** +* +* Disclaimer This software code and all associated documentation, comments or other +* of Warranty: information (collectively "Software") is provided "AS IS" without +* warranty of any kind. MICRON TECHNOLOGY, INC. ("MTI") EXPRESSLY +* DISCLAIMS ALL WARRANTIES EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED +* TO, NONINFRINGEMENT OF THIRD PARTY RIGHTS, AND ANY IMPLIED WARRANTIES +* OF MERCHANTABILITY OR FITNESS FOR ANY PARTICULAR PURPOSE. MTI DOES NOT +* WARRANT THAT THE SOFTWARE WILL MEET YOUR REQUIREMENTS, OR THAT THE +* OPERATION OF THE SOFTWARE WILL BE UNINTERRUPTED OR ERROR-FREE. +* FURTHERMORE, MTI DOES NOT MAKE ANY REPRESENTATIONS REGARDING THE USE OR +* THE RESULTS OF THE USE OF THE SOFTWARE IN TERMS OF ITS CORRECTNESS, +* ACCURACY, RELIABILITY, OR OTHERWISE. THE ENTIRE RISK ARISING OUT OF USE +* OR PERFORMANCE OF THE SOFTWARE REMAINS WITH YOU. IN NO EVENT SHALL MTI, +* ITS AFFILIATED COMPANIES OR THEIR SUPPLIERS BE LIABLE FOR ANY DIRECT, +* INDIRECT, CONSEQUENTIAL, INCIDENTAL, OR SPECIAL DAMAGES (INCLUDING, +* WITHOUT LIMITATION, DAMAGES FOR LOSS OF PROFITS, BUSINESS INTERRUPTION, +* OR LOSS OF INFORMATION) ARISING OUT OF YOUR USE OF OR INABILITY TO USE +* THE SOFTWARE, EVEN IF MTI HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH +* DAMAGES. Because some jurisdictions prohibit the exclusion or +* limitation of liability for consequential or incidental damages, the +* above limitation may not apply to you. +* +* Copyright 2003 Micron Technology, Inc. All rights reserved. +* +****************************************************************************************/ + + // Timing parameters based on 1Gb_DDR3_SDRAM.cdf - Rev. L 09/12 EN + + // SYMBOL UNITS DESCRIPTION + // ------ ----- ----------- +`ifdef sg093 // sg093 is equivalent to the JEDEC DDR3-2133 (14-14-14) speed bin + parameter TCK_MIN = 938; // tCK ps Minimum Clock Cycle Time + parameter TJIT_PER = 50; // tJIT(per) ps Period JItter + parameter TJIT_CC = 100; // tJIT(cc) ps Cycle to Cycle jitter + parameter TERR_2PER = 74; // tERR(2per) ps Accumulated Error (2-cycle) + parameter TERR_3PER = 87; // tERR(3per) ps Accumulated Error (3-cycle) + parameter TERR_4PER = 97; // tERR(4per) ps Accumulated Error (4-cycle) + parameter TERR_5PER = 105; // tERR(5per) ps Accumulated Error (5-cycle) + parameter TERR_6PER = 111; // tERR(6per) ps Accumulated Error (6-cycle) + parameter TERR_7PER = 116; // tERR(7per) ps Accumulated Error (7-cycle) + parameter TERR_8PER = 121; // tERR(8per) ps Accumulated Error (8-cycle) + parameter TERR_9PER = 125; // tERR(9per) ps Accumulated Error (9-cycle) + parameter TERR_10PER = 128; // tERR(10per)ps Accumulated Error (10-cycle) + parameter TERR_11PER = 132; // tERR(11per)ps Accumulated Error (11-cycle) + parameter TERR_12PER = 134; // tERR(12per)ps Accumulated Error (12-cycle) + parameter TDS = 5; // tDS ps DQ and DM input setup time relative to DQS + parameter TDH = 20; // tDH ps DQ and DM input hold time relative to DQS + parameter TDQSQ = 70; // tDQSQ ps DQS-DQ skew, DQS to last DQ valid, per group, per access + parameter TDQSS = 0.27; // tDQSS tCK Rising clock edge to DQS/DQS# latching transition + parameter TDSS = 0.18; // tDSS tCK DQS falling edge to CLK rising (setup time) + parameter TDSH = 0.18; // tDSH tCK DQS falling edge from CLK rising (hold time) + parameter TDQSCK = 180; // tDQSCK ps DQS output access time from CK/CK# + parameter TQSH = 0.40; // tQSH tCK DQS Output High Pulse Width + parameter TQSL = 0.40; // tQSL tCK DQS Output Low Pulse Width + parameter TDIPW = 280; // tDIPW ps DQ and DM input Pulse Width + parameter TIPW = 470; // tIPW ps Control and Address input Pulse Width + parameter TIS = 35; // tIS ps Input Setup Time + parameter TIH = 75; // tIH ps Input Hold Time + parameter TRAS_MIN = 33000; // tRAS ps Minimum Active to Precharge command time + parameter TRC = 46130; // tRC ps Active to Active/Auto Refresh command time + parameter TRCD = 13090; // tRCD ps Active to Read/Write command time + parameter TRP = 13090; // tRP ps Precharge command period + parameter TXP = 6000; // tXP ps Exit power down to a valid command + parameter TCKE = 5000; // tCKE ps CKE minimum high or low pulse width + parameter TAON = 180; // tAON ps RTT turn-on from ODTLon reference + parameter TWLS = 122; // tWLS ps Setup time for tDQS flop + parameter TWLH = 122; // tWLH ps Hold time of tDQS flop + parameter TWLO = 7500; // tWLO ps Write levelization output delay + parameter TAA_MIN = 13090; // TAA ps Internal READ command to first data + parameter CL_TIME = 13090; // CL ps Minimum CAS Latency +`elsif sg107 // sg107 is equivalent to the JEDEC DDR3-1866 (13-13-13) speed bin + parameter TCK_MIN = 1071; // tCK ps Minimum Clock Cycle Time + parameter TJIT_PER = 60; // tJIT(per) ps Period JItter + parameter TJIT_CC = 120; // tJIT(cc) ps Cycle to Cycle jitter + parameter TERR_2PER = 88; // tERR(2per) ps Accumulated Error (2-cycle) + parameter TERR_3PER = 105; // tERR(3per) ps Accumulated Error (3-cycle) + parameter TERR_4PER = 117; // tERR(4per) ps Accumulated Error (4-cycle) + parameter TERR_5PER = 126; // tERR(5per) ps Accumulated Error (5-cycle) + parameter TERR_6PER = 133; // tERR(6per) ps Accumulated Error (6-cycle) + parameter TERR_7PER = 139; // tERR(7per) ps Accumulated Error (7-cycle) + parameter TERR_8PER = 145; // tERR(8per) ps Accumulated Error (8-cycle) + parameter TERR_9PER = 150; // tERR(9per) ps Accumulated Error (9-cycle) + parameter TERR_10PER = 154; // tERR(10per)ps Accumulated Error (10-cycle) + parameter TERR_11PER = 158; // tERR(11per)ps Accumulated Error (11-cycle) + parameter TERR_12PER = 161; // tERR(12per)ps Accumulated Error (12-cycle) + parameter TDS = 10; // tDS ps DQ and DM input setup time relative to DQS + parameter TDH = 20; // tDH ps DQ and DM input hold time relative to DQS + parameter TDQSQ = 80; // tDQSQ ps DQS-DQ skew, DQS to last DQ valid, per group, per access + parameter TDQSS = 0.27; // tDQSS tCK Rising clock edge to DQS/DQS# latching transition + parameter TDSS = 0.18; // tDSS tCK DQS falling edge to CLK rising (setup time) + parameter TDSH = 0.18; // tDSH tCK DQS falling edge from CLK rising (hold time) + parameter TDQSCK = 200; // tDQSCK ps DQS output access time from CK/CK# + parameter TQSH = 0.40; // tQSH tCK DQS Output High Pulse Width + parameter TQSL = 0.40; // tQSL tCK DQS Output Low Pulse Width + parameter TDIPW = 320; // tDIPW ps DQ and DM input Pulse Width + parameter TIPW = 535; // tIPW ps Control and Address input Pulse Width + parameter TIS = 50; // tIS ps Input Setup Time + parameter TIH = 100; // tIH ps Input Hold Time + parameter TRAS_MIN = 34000; // tRAS ps Minimum Active to Precharge command time + parameter TRC = 48910; // tRC ps Active to Active/Auto Refresh command time + parameter TRCD = 13910; // tRCD ps Active to Read/Write command time + parameter TRP = 13910; // tRP ps Precharge command period + parameter TXP = 6000; // tXP ps Exit power down to a valid command + parameter TCKE = 5000; // tCKE ps CKE minimum high or low pulse width + parameter TAON = 200; // tAON ps RTT turn-on from ODTLon reference + parameter TWLS = 140; // tWLS ps Setup time for tDQS flop + parameter TWLH = 140; // tWLH ps Hold time of tDQS flop + parameter TWLO = 7500; // tWLO ps Write levelization output delay + parameter TAA_MIN = 13910; // TAA ps Internal READ command to first data + parameter CL_TIME = 13910; // CL ps Minimum CAS Latency +`elsif sg125 // sg125 is equivalent to the JEDEC DDR3-1600 (11-11-11) speed bin + parameter TCK_MIN = 1250; // tCK ps Minimum Clock Cycle Time + parameter TJIT_PER = 70; // tJIT(per) ps Period JItter + parameter TJIT_CC = 140; // tJIT(cc) ps Cycle to Cycle jitter + parameter TERR_2PER = 103; // tERR(2per) ps Accumulated Error (2-cycle) + parameter TERR_3PER = 122; // tERR(3per) ps Accumulated Error (3-cycle) + parameter TERR_4PER = 136; // tERR(4per) ps Accumulated Error (4-cycle) + parameter TERR_5PER = 147; // tERR(5per) ps Accumulated Error (5-cycle) + parameter TERR_6PER = 155; // tERR(6per) ps Accumulated Error (6-cycle) + parameter TERR_7PER = 163; // tERR(7per) ps Accumulated Error (7-cycle) + parameter TERR_8PER = 169; // tERR(8per) ps Accumulated Error (8-cycle) + parameter TERR_9PER = 175; // tERR(9per) ps Accumulated Error (9-cycle) + parameter TERR_10PER = 180; // tERR(10per)ps Accumulated Error (10-cycle) + parameter TERR_11PER = 184; // tERR(11per)ps Accumulated Error (11-cycle) + parameter TERR_12PER = 188; // tERR(12per)ps Accumulated Error (12-cycle) + parameter TDS = 10; // tDS ps DQ and DM input setup time relative to DQS + parameter TDH = 45; // tDH ps DQ and DM input hold time relative to DQS + parameter TDQSQ = 100; // tDQSQ ps DQS-DQ skew, DQS to last DQ valid, per group, per access + parameter TDQSS = 0.27; // tDQSS tCK Rising clock edge to DQS/DQS# latching transition + parameter TDSS = 0.18; // tDSS tCK DQS falling edge to CLK rising (setup time) + parameter TDSH = 0.18; // tDSH tCK DQS falling edge from CLK rising (hold time) + parameter TDQSCK = 225; // tDQSCK ps DQS output access time from CK/CK# + parameter TQSH = 0.40; // tQSH tCK DQS Output High Pulse Width + parameter TQSL = 0.40; // tQSL tCK DQS Output Low Pulse Width + parameter TDIPW = 360; // tDIPW ps DQ and DM input Pulse Width + parameter TIPW = 560; // tIPW ps Control and Address input Pulse Width + parameter TIS = 170; // tIS ps Input Setup Time + parameter TIH = 120; // tIH ps Input Hold Time + parameter TRAS_MIN = 35000; // tRAS ps Minimum Active to Precharge command time + parameter TRC = 48750; // tRC ps Active to Active/Auto Refresh command time + parameter TRCD = 13750; // tRCD ps Active to Read/Write command time + parameter TRP = 13750; // tRP ps Precharge command period + parameter TXP = 6000; // tXP ps Exit power down to a valid command + parameter TCKE = 5000; // tCKE ps CKE minimum high or low pulse width + parameter TAON = 250; // tAON ps RTT turn-on from ODTLon reference + parameter TWLS = 165; // tWLS ps Setup time for tDQS flop + parameter TWLH = 165; // tWLH ps Hold time of tDQS flop + parameter TWLO = 7500; // tWLO ps Write levelization output delay + parameter TAA_MIN = 13750; // TAA ps Internal READ command to first data + parameter CL_TIME = 13750; // CL ps Minimum CAS Latency +`elsif sg15E // sg15E is equivalent to the JEDEC DDR3-1333H (9-9-9) speed bin + parameter TCK_MIN = 1500; // tCK ps Minimum Clock Cycle Time + parameter TJIT_PER = 80; // tJIT(per) ps Period JItter + parameter TJIT_CC = 160; // tJIT(cc) ps Cycle to Cycle jitter + parameter TERR_2PER = 118; // tERR(2per) ps Accumulated Error (2-cycle) + parameter TERR_3PER = 140; // tERR(3per) ps Accumulated Error (3-cycle) + parameter TERR_4PER = 155; // tERR(4per) ps Accumulated Error (4-cycle) + parameter TERR_5PER = 168; // tERR(5per) ps Accumulated Error (5-cycle) + parameter TERR_6PER = 177; // tERR(6per) ps Accumulated Error (6-cycle) + parameter TERR_7PER = 186; // tERR(7per) ps Accumulated Error (7-cycle) + parameter TERR_8PER = 193; // tERR(8per) ps Accumulated Error (8-cycle) + parameter TERR_9PER = 200; // tERR(9per) ps Accumulated Error (9-cycle) + parameter TERR_10PER = 205; // tERR(10per)ps Accumulated Error (10-cycle) + parameter TERR_11PER = 210; // tERR(11per)ps Accumulated Error (11-cycle) + parameter TERR_12PER = 215; // tERR(12per)ps Accumulated Error (12-cycle) + parameter TDS = 30; // tDS ps DQ and DM input setup time relative to DQS + parameter TDH = 65; // tDH ps DQ and DM input hold time relative to DQS + parameter TDQSQ = 125; // tDQSQ ps DQS-DQ skew, DQS to last DQ valid, per group, per access + parameter TDQSS = 0.25; // tDQSS tCK Rising clock edge to DQS/DQS# latching transition + parameter TDSS = 0.20; // tDSS tCK DQS falling edge to CLK rising (setup time) + parameter TDSH = 0.20; // tDSH tCK DQS falling edge from CLK rising (hold time) + parameter TDQSCK = 255; // tDQSCK ps DQS output access time from CK/CK# + parameter TQSH = 0.40; // tQSH tCK DQS Output High Pulse Width + parameter TQSL = 0.40; // tQSL tCK DQS Output Low Pulse Width + parameter TDIPW = 400; // tDIPW ps DQ and DM input Pulse Width + parameter TIPW = 620; // tIPW ps Control and Address input Pulse Width + parameter TIS = 190; // tIS ps Input Setup Time + parameter TIH = 140; // tIH ps Input Hold Time + parameter TRAS_MIN = 36000; // tRAS ps Minimum Active to Precharge command time + parameter TRC = 49500; // tRC ps Active to Active/Auto Refresh command time + parameter TRCD = 13500; // tRCD ps Active to Read/Write command time + parameter TRP = 13500; // tRP ps Precharge command period + parameter TXP = 6000; // tXP ps Exit power down to a valid command + parameter TCKE = 5625; // tCKE ps CKE minimum high or low pulse width + parameter TAON = 250; // tAON ps RTT turn-on from ODTLon reference + parameter TWLS = 195; // tWLS ps Setup time for tDQS flop + parameter TWLH = 195; // tWLH ps Hold time of tDQS flop + parameter TWLO = 9000; // tWLO ps Write levelization output delay + parameter TAA_MIN = 13500; // TAA ps Internal READ command to first data + parameter CL_TIME = 13500; // CL ps Minimum CAS Latency +`elsif sg15 // sg15 is equivalent to the JEDEC DDR3-1333J (10-10-10) speed bin + parameter TCK_MIN = 1500; // tCK ps Minimum Clock Cycle Time + parameter TJIT_PER = 80; // tJIT(per) ps Period JItter + parameter TJIT_CC = 160; // tJIT(cc) ps Cycle to Cycle jitter + parameter TERR_2PER = 118; // tERR(2per) ps Accumulated Error (2-cycle) + parameter TERR_3PER = 140; // tERR(3per) ps Accumulated Error (3-cycle) + parameter TERR_4PER = 155; // tERR(4per) ps Accumulated Error (4-cycle) + parameter TERR_5PER = 168; // tERR(5per) ps Accumulated Error (5-cycle) + parameter TERR_6PER = 177; // tERR(6per) ps Accumulated Error (6-cycle) + parameter TERR_7PER = 186; // tERR(7per) ps Accumulated Error (7-cycle) + parameter TERR_8PER = 193; // tERR(8per) ps Accumulated Error (8-cycle) + parameter TERR_9PER = 200; // tERR(9per) ps Accumulated Error (9-cycle) + parameter TERR_10PER = 205; // tERR(10per)ps Accumulated Error (10-cycle) + parameter TERR_11PER = 210; // tERR(11per)ps Accumulated Error (11-cycle) + parameter TERR_12PER = 215; // tERR(12per)ps Accumulated Error (12-cycle) + parameter TDS = 30; // tDS ps DQ and DM input setup time relative to DQS + parameter TDH = 65; // tDH ps DQ and DM input hold time relative to DQS + parameter TDQSQ = 125; // tDQSQ ps DQS-DQ skew, DQS to last DQ valid, per group, per access + parameter TDQSS = 0.25; // tDQSS tCK Rising clock edge to DQS/DQS# latching transition + parameter TDSS = 0.20; // tDSS tCK DQS falling edge to CLK rising (setup time) + parameter TDSH = 0.20; // tDSH tCK DQS falling edge from CLK rising (hold time) + parameter TDQSCK = 255; // tDQSCK ps DQS output access time from CK/CK# + parameter TQSH = 0.40; // tQSH tCK DQS Output High Pulse Width + parameter TQSL = 0.40; // tQSL tCK DQS Output Low Pulse Width + parameter TDIPW = 400; // tDIPW ps DQ and DM input Pulse Width + parameter TIPW = 620; // tIPW ps Control and Address input Pulse Width + parameter TIS = 190; // tIS ps Input Setup Time + parameter TIH = 140; // tIH ps Input Hold Time + parameter TRAS_MIN = 36000; // tRAS ps Minimum Active to Precharge command time + parameter TRC = 51000; // tRC ps Active to Active/Auto Refresh command time + parameter TRCD = 15000; // tRCD ps Active to Read/Write command time + parameter TRP = 15000; // tRP ps Precharge command period + parameter TXP = 6000; // tXP ps Exit power down to a valid command + parameter TCKE = 5625; // tCKE ps CKE minimum high or low pulse width + parameter TAON = 250; // tAON ps RTT turn-on from ODTLon reference + parameter TWLS = 195; // tWLS ps Setup time for tDQS flop + parameter TWLH = 195; // tWLH ps Hold time of tDQS flop + parameter TWLO = 9000; // tWLO ps Write levelization output delay + parameter TAA_MIN = 15000; // TAA ps Internal READ command to first data + parameter CL_TIME = 15000; // CL ps Minimum CAS Latency +`elsif sg187E // sg187E is equivalent to the JEDEC DDR3-1066F (7-7-7) speed bin + parameter TCK_MIN = 1875; // tCK ps Minimum Clock Cycle Time + parameter TJIT_PER = 90; // tJIT(per) ps Period JItter + parameter TJIT_CC = 180; // tJIT(cc) ps Cycle to Cycle jitter + parameter TERR_2PER = 132; // tERR(2per) ps Accumulated Error (2-cycle) + parameter TERR_3PER = 157; // tERR(3per) ps Accumulated Error (3-cycle) + parameter TERR_4PER = 175; // tERR(4per) ps Accumulated Error (4-cycle) + parameter TERR_5PER = 188; // tERR(5per) ps Accumulated Error (5-cycle) + parameter TERR_6PER = 200; // tERR(6per) ps Accumulated Error (6-cycle) + parameter TERR_7PER = 209; // tERR(7per) ps Accumulated Error (7-cycle) + parameter TERR_8PER = 217; // tERR(8per) ps Accumulated Error (8-cycle) + parameter TERR_9PER = 224; // tERR(9per) ps Accumulated Error (9-cycle) + parameter TERR_10PER = 231; // tERR(10per)ps Accumulated Error (10-cycle) + parameter TERR_11PER = 237; // tERR(11per)ps Accumulated Error (11-cycle) + parameter TERR_12PER = 242; // tERR(12per)ps Accumulated Error (12-cycle) + parameter TDS = 75; // tDS ps DQ and DM input setup time relative to DQS + parameter TDH = 100; // tDH ps DQ and DM input hold time relative to DQS + parameter TDQSQ = 150; // tDQSQ ps DQS-DQ skew, DQS to last DQ valid, per group, per access + parameter TDQSS = 0.25; // tDQSS tCK Rising clock edge to DQS/DQS# latching transition + parameter TDSS = 0.20; // tDSS tCK DQS falling edge to CLK rising (setup time) + parameter TDSH = 0.20; // tDSH tCK DQS falling edge from CLK rising (hold time) + parameter TDQSCK = 300; // tDQSCK ps DQS output access time from CK/CK# + parameter TQSH = 0.38; // tQSH tCK DQS Output High Pulse Width + parameter TQSL = 0.38; // tQSL tCK DQS Output Low Pulse Width + parameter TDIPW = 490; // tDIPW ps DQ and DM input Pulse Width + parameter TIPW = 780; // tIPW ps Control and Address input Pulse Width + parameter TIS = 275; // tIS ps Input Setup Time + parameter TIH = 200; // tIH ps Input Hold Time + parameter TRAS_MIN = 37500; // tRAS ps Minimum Active to Precharge command time + parameter TRC = 50625; // tRC ps Active to Active/Auto Refresh command time + parameter TRCD = 13125; // tRCD ps Active to Read/Write command time + parameter TRP = 13125; // tRP ps Precharge command period + parameter TXP = 7500; // tXP ps Exit power down to a valid command + parameter TCKE = 5625; // tCKE ps CKE minimum high or low pulse width + parameter TAON = 300; // tAON ps RTT turn-on from ODTLon reference + parameter TWLS = 245; // tWLS ps Setup time for tDQS flop + parameter TWLH = 245; // tWLH ps Hold time of tDQS flop + parameter TWLO = 9000; // tWLO ps Write levelization output delay + parameter TAA_MIN = 13125; // TAA ps Internal READ command to first data + parameter CL_TIME = 13125; // CL ps Minimum CAS Latency +`elsif sg187 // sg187 is equivalent to the JEDEC DDR3-1066G (8-8-8) speed bin + parameter TCK_MIN = 1875; // tCK ps Minimum Clock Cycle Time + parameter TJIT_PER = 90; // tJIT(per) ps Period JItter + parameter TJIT_CC = 180; // tJIT(cc) ps Cycle to Cycle jitter + parameter TERR_2PER = 132; // tERR(2per) ps Accumulated Error (2-cycle) + parameter TERR_3PER = 157; // tERR(3per) ps Accumulated Error (3-cycle) + parameter TERR_4PER = 175; // tERR(4per) ps Accumulated Error (4-cycle) + parameter TERR_5PER = 188; // tERR(5per) ps Accumulated Error (5-cycle) + parameter TERR_6PER = 200; // tERR(6per) ps Accumulated Error (6-cycle) + parameter TERR_7PER = 209; // tERR(7per) ps Accumulated Error (7-cycle) + parameter TERR_8PER = 217; // tERR(8per) ps Accumulated Error (8-cycle) + parameter TERR_9PER = 224; // tERR(9per) ps Accumulated Error (9-cycle) + parameter TERR_10PER = 231; // tERR(10per)ps Accumulated Error (10-cycle) + parameter TERR_11PER = 237; // tERR(11per)ps Accumulated Error (11-cycle) + parameter TERR_12PER = 242; // tERR(12per)ps Accumulated Error (12-cycle) + parameter TDS = 75; // tDS ps DQ and DM input setup time relative to DQS + parameter TDH = 100; // tDH ps DQ and DM input hold time relative to DQS + parameter TDQSQ = 150; // tDQSQ ps DQS-DQ skew, DQS to last DQ valid, per group, per access + parameter TDQSS = 0.25; // tDQSS tCK Rising clock edge to DQS/DQS# latching transition + parameter TDSS = 0.20; // tDSS tCK DQS falling edge to CLK rising (setup time) + parameter TDSH = 0.20; // tDSH tCK DQS falling edge from CLK rising (hold time) + parameter TDQSCK = 300; // tDQSCK ps DQS output access time from CK/CK# + parameter TQSH = 0.38; // tQSH tCK DQS Output High Pulse Width + parameter TQSL = 0.38; // tQSL tCK DQS Output Low Pulse Width + parameter TDIPW = 490; // tDIPW ps DQ and DM input Pulse Width + parameter TIPW = 780; // tIPW ps Control and Address input Pulse Width + parameter TIS = 275; // tIS ps Input Setup Time + parameter TIH = 200; // tIH ps Input Hold Time + parameter TRAS_MIN = 37500; // tRAS ps Minimum Active to Precharge command time + parameter TRC = 52500; // tRC ps Active to Active/Auto Refresh command time + parameter TRCD = 15000; // tRCD ps Active to Read/Write command time + parameter TRP = 15000; // tRP ps Precharge command period + parameter TXP = 7500; // tXP ps Exit power down to a valid command + parameter TCKE = 5625; // tCKE ps CKE minimum high or low pulse width + parameter TAON = 300; // tAON ps RTT turn-on from ODTLon reference + parameter TWLS = 245; // tWLS ps Setup time for tDQS flop + parameter TWLH = 245; // tWLH ps Hold time of tDQS flop + parameter TWLO = 9000; // tWLO ps Write levelization output delay + parameter TAA_MIN = 15000; // TAA ps Internal READ command to first data + parameter CL_TIME = 15000; // CL ps Minimum CAS Latency +`elsif sg25E // sg25E is equivalent to the JEDEC DDR3-800E (5-5-5) speed bin + parameter TCK_MIN = 2500; // tCK ps Minimum Clock Cycle Time + parameter TJIT_PER = 100; // tJIT(per) ps Period JItter + parameter TJIT_CC = 200; // tJIT(cc) ps Cycle to Cycle jitter + parameter TERR_2PER = 147; // tERR(2per) ps Accumulated Error (2-cycle) + parameter TERR_3PER = 175; // tERR(3per) ps Accumulated Error (3-cycle) + parameter TERR_4PER = 194; // tERR(4per) ps Accumulated Error (4-cycle) + parameter TERR_5PER = 209; // tERR(5per) ps Accumulated Error (5-cycle) + parameter TERR_6PER = 222; // tERR(6per) ps Accumulated Error (6-cycle) + parameter TERR_7PER = 232; // tERR(7per) ps Accumulated Error (7-cycle) + parameter TERR_8PER = 241; // tERR(8per) ps Accumulated Error (8-cycle) + parameter TERR_9PER = 249; // tERR(9per) ps Accumulated Error (9-cycle) + parameter TERR_10PER = 257; // tERR(10per)ps Accumulated Error (10-cycle) + parameter TERR_11PER = 263; // tERR(11per)ps Accumulated Error (11-cycle) + parameter TERR_12PER = 269; // tERR(12per)ps Accumulated Error (12-cycle) + parameter TDS = 125; // tDS ps DQ and DM input setup time relative to DQS + parameter TDH = 150; // tDH ps DQ and DM input hold time relative to DQS + parameter TDQSQ = 200; // tDQSQ ps DQS-DQ skew, DQS to last DQ valid, per group, per access + parameter TDQSS = 0.25; // tDQSS tCK Rising clock edge to DQS/DQS# latching transition + parameter TDSS = 0.20; // tDSS tCK DQS falling edge to CLK rising (setup time) + parameter TDSH = 0.20; // tDSH tCK DQS falling edge from CLK rising (hold time) + parameter TDQSCK = 400; // tDQSCK ps DQS output access time from CK/CK# + parameter TQSH = 0.38; // tQSH tCK DQS Output High Pulse Width + parameter TQSL = 0.38; // tQSL tCK DQS Output Low Pulse Width + parameter TDIPW = 600; // tDIPW ps DQ and DM input Pulse Width + parameter TIPW = 900; // tIPW ps Control and Address input Pulse Width + parameter TIS = 350; // tIS ps Input Setup Time + parameter TIH = 275; // tIH ps Input Hold Time + parameter TRAS_MIN = 37500; // tRAS ps Minimum Active to Precharge command time + parameter TRC = 50000; // tRC ps Active to Active/Auto Refresh command time + parameter TRCD = 12500; // tRCD ps Active to Read/Write command time + parameter TRP = 12500; // tRP ps Precharge command period + parameter TXP = 7500; // tXP ps Exit power down to a valid command + parameter TCKE = 7500; // tCKE ps CKE minimum high or low pulse width + parameter TAON = 400; // tAON ps RTT turn-on from ODTLon reference + parameter TWLS = 325; // tWLS ps Setup time for tDQS flop + parameter TWLH = 325; // tWLH ps Hold time of tDQS flop + parameter TWLO = 9000; // tWLO ps Write levelization output delay + parameter TAA_MIN = 12500; // TAA ps Internal READ command to first data + parameter CL_TIME = 12500; // CL ps Minimum CAS Latency +`else + `define sg25 // sg25 is equivalent to the JEDEC DDR3-800 (6-6-6) speed bin + parameter TCK_MIN = 2500; // tCK ps Minimum Clock Cycle Time + parameter TJIT_PER = 100; // tJIT(per) ps Period JItter + parameter TJIT_CC = 200; // tJIT(cc) ps Cycle to Cycle jitter + parameter TERR_2PER = 147; // tERR(2per) ps Accumulated Error (2-cycle) + parameter TERR_3PER = 175; // tERR(3per) ps Accumulated Error (3-cycle) + parameter TERR_4PER = 194; // tERR(4per) ps Accumulated Error (4-cycle) + parameter TERR_5PER = 209; // tERR(5per) ps Accumulated Error (5-cycle) + parameter TERR_6PER = 222; // tERR(6per) ps Accumulated Error (6-cycle) + parameter TERR_7PER = 232; // tERR(7per) ps Accumulated Error (7-cycle) + parameter TERR_8PER = 241; // tERR(8per) ps Accumulated Error (8-cycle) + parameter TERR_9PER = 249; // tERR(9per) ps Accumulated Error (9-cycle) + parameter TERR_10PER = 257; // tERR(10per)ps Accumulated Error (10-cycle) + parameter TERR_11PER = 263; // tERR(11per)ps Accumulated Error (11-cycle) + parameter TERR_12PER = 269; // tERR(12per)ps Accumulated Error (12-cycle) + parameter TDS = 125; // tDS ps DQ and DM input setup time relative to DQS + parameter TDH = 150; // tDH ps DQ and DM input hold time relative to DQS + parameter TDQSQ = 200; // tDQSQ ps DQS-DQ skew, DQS to last DQ valid, per group, per access + parameter TDQSS = 0.25; // tDQSS tCK Rising clock edge to DQS/DQS# latching transition + parameter TDSS = 0.20; // tDSS tCK DQS falling edge to CLK rising (setup time) + parameter TDSH = 0.20; // tDSH tCK DQS falling edge from CLK rising (hold time) + parameter TDQSCK = 400; // tDQSCK ps DQS output access time from CK/CK# + parameter TQSH = 0.38; // tQSH tCK DQS Output High Pulse Width + parameter TQSL = 0.38; // tQSL tCK DQS Output Low Pulse Width + parameter TDIPW = 600; // tDIPW ps DQ and DM input Pulse Width + parameter TIPW = 900; // tIPW ps Control and Address input Pulse Width + parameter TIS = 350; // tIS ps Input Setup Time + parameter TIH = 275; // tIH ps Input Hold Time + parameter TRAS_MIN = 37500; // tRAS ps Minimum Active to Precharge command time + parameter TRC = 52500; // tRC ps Active to Active/Auto Refresh command time + parameter TRCD = 15000; // tRCD ps Active to Read/Write command time + parameter TRP = 15000; // tRP ps Precharge command period + parameter TXP = 7500; // tXP ps Exit power down to a valid command + parameter TCKE = 7500; // tCKE ps CKE minimum high or low pulse width + parameter TAON = 400; // tAON ps RTT turn-on from ODTLon reference + parameter TWLS = 325; // tWLS ps Setup time for tDQS flop + parameter TWLH = 325; // tWLH ps Hold time of tDQS flop + parameter TWLO = 9000; // tWLO ps Write levelization output delay + parameter TAA_MIN = 15000; // TAA ps Internal READ command to first data + parameter CL_TIME = 15000; // CL ps Minimum CAS Latency +`endif + + parameter TDQSCK_DLLDIS = TDQSCK; // tDQSCK ps for DLLDIS mode, timing not guaranteed + +`ifdef x16 + `ifdef sg093 + parameter TRRD = 6000; // tRRD ps (2KB page size) Active bank a to Active bank b command time + parameter TFAW = 35000; // tFAW ps (2KB page size) Four Bank Activate window + `elsif sg107 + parameter TRRD = 6000; // tRRD ps (2KB page size) Active bank a to Active bank b command time + parameter TFAW = 35000; // tFAW ps (2KB page size) Four Bank Activate window + `elsif sg125 + parameter TRRD = 7500; // tRRD ps (2KB page size) Active bank a to Active bank b command time + parameter TFAW = 40000; // tFAW ps (2KB page size) Four Bank Activate window + `elsif sg15E + parameter TRRD = 7500; // tRRD ps (2KB page size) Active bank a to Active bank b command time + parameter TFAW = 45000; // tFAW ps (2KB page size) Four Bank Activate window + `elsif sg15 + parameter TRRD = 7500; // tRRD ps (2KB page size) Active bank a to Active bank b command time + parameter TFAW = 45000; // tFAW ps (2KB page size) Four Bank Activate window + `elsif sg187E + parameter TRRD = 10000; // tRRD ps (2KB page size) Active bank a to Active bank b command time + parameter TFAW = 50000; // tFAW ps (2KB page size) Four Bank Activate window + `elsif sg187 + parameter TRRD = 10000; // tRRD ps (2KB page size) Active bank a to Active bank b command time + parameter TFAW = 50000; // tFAW ps (2KB page size) Four Bank Activate window + `elsif sg25E + parameter TRRD = 10000; // tRRD ps (2KB page size) Active bank a to Active bank b command time + parameter TFAW = 50000; // tFAW ps (2KB page size) Four Bank Activate window + `else // sg25 + parameter TRRD = 10000; // tRRD ps (2KB page size) Active bank a to Active bank b command time + parameter TFAW = 50000; // tFAW ps (2KB page size) Four Bank Activate window + `endif +`else // x4, x8 + `ifdef sg093 + parameter TRRD = 5000; // tRRD ps (1KB page size) Active bank a to Active bank b command time + parameter TFAW = 25000; // tFAW ps (1KB page size) Four Bank Activate window + `elsif sg107 + parameter TRRD = 5000; // tRRD ps (1KB page size) Active bank a to Active bank b command time + parameter TFAW = 25000; // tFAW ps (1KB page size) Four Bank Activate window + `elsif sg125 + parameter TRRD = 6000; // tRRD ps (1KB page size) Active bank a to Active bank b command time + parameter TFAW = 30000; // tFAW ps (1KB page size) Four Bank Activate window + `elsif sg15E + parameter TRRD = 6000; // tRRD ps (1KB page size) Active bank a to Active bank b command time + parameter TFAW = 30000; // tFAW ps (1KB page size) Four Bank Activate window + `elsif sg15 + parameter TRRD = 6000; // tRRD ps (1KB page size) Active bank a to Active bank b command time + parameter TFAW = 30000; // tFAW ps (1KB page size) Four Bank Activate window + `elsif sg187E + parameter TRRD = 7500; // tRRD ps (1KB page size) Active bank a to Active bank b command time + parameter TFAW = 37500; // tFAW ps (1KB page size) Four Bank Activate window + `elsif sg187 + parameter TRRD = 7500; // tRRD ps (1KB page size) Active bank a to Active bank b command time + parameter TFAW = 37500; // tFAW ps (1KB page size) Four Bank Activate window + `elsif sg25E + parameter TRRD = 10000; // tRRD ps (1KB page size) Active bank a to Active bank b command time + parameter TFAW = 40000; // tFAW ps (1KB page size) Four Bank Activate window + `else // sg25 + parameter TRRD = 10000; // tRRD ps (1KB page size) Active bank a to Active bank b command time + parameter TFAW = 40000; // tFAW ps (1KB page size) Four Bank Activate window + `endif +`endif + + // Timing Parameters + + // Mode Register + parameter CL_MIN = 5; // CL tCK Minimum CAS Latency + parameter CL_MAX = 14; // CL tCK Maximum CAS Latency + parameter AL_MIN = 0; // AL tCK Minimum Additive Latency + parameter AL_MAX = 2; // AL tCK Maximum Additive Latency + parameter WR_MIN = 5; // WR tCK Minimum Write Recovery + parameter WR_MAX = 16; // WR tCK Maximum Write Recovery + parameter BL_MIN = 4; // BL tCK Minimum Burst Length + parameter BL_MAX = 8; // BL tCK Minimum Burst Length + parameter CWL_MIN = 5; // CWL tCK Minimum CAS Write Latency + parameter CWL_MAX = 10; // CWL tCK Maximum CAS Write Latency + + // Clock + parameter TCK_MAX = 3300; // tCK ps Maximum Clock Cycle Time + parameter TCH_AVG_MIN = 0.47; // tCH tCK Minimum Clock High-Level Pulse Width + parameter TCL_AVG_MIN = 0.47; // tCL tCK Minimum Clock Low-Level Pulse Width + parameter TCH_AVG_MAX = 0.53; // tCH tCK Maximum Clock High-Level Pulse Width + parameter TCL_AVG_MAX = 0.53; // tCL tCK Maximum Clock Low-Level Pulse Width + parameter TCH_ABS_MIN = 0.43; // tCH tCK Minimum Clock High-Level Pulse Width + parameter TCL_ABS_MIN = 0.43; // tCL tCK Maximum Clock Low-Level Pulse Width + parameter TCKE_TCK = 3; // tCKE tCK CKE minimum high or low pulse width + parameter TAA_MAX = 20000; // TAA ps Internal READ command to first data + + // Data OUT + parameter TQH = 0.38; // tQH ps DQ output hold time from DQS, DQS# + // Data Strobe OUT + parameter TRPRE = 0.90; // tRPRE tCK DQS Read Preamble + parameter TRPST = 0.30; // tRPST tCK DQS Read Postamble + // Data Strobe IN + parameter TDQSH = 0.45; // tDQSH tCK DQS input High Pulse Width + parameter TDQSL = 0.45; // tDQSL tCK DQS input Low Pulse Width + parameter TWPRE = 0.90; // tWPRE tCK DQS Write Preamble + parameter TWPST = 0.30; // tWPST tCK DQS Write Postamble + // Command and Address + integer TZQCS; // tZQCS tCK ZQ Cal (Short) time + integer TZQINIT; // tZQinit tCK ZQ Cal (Long) time + integer TZQOPER; // tZQoper tCK ZQ Cal (Long) time + parameter TCCD = 4; // tCCD tCK Cas to Cas command delay + parameter TCCD_DG = 2; // tCCD_DG tCK Cas to Cas command delay to different group + parameter TRAS_MAX = 60e9; // tRAS ps Maximum Active to Precharge command time + parameter TWR = 15000; // tWR ps Write recovery time + parameter TMRD = 4; // tMRD tCK Load Mode Register command cycle time + parameter TMOD = 15000; // tMOD ps LOAD MODE to non-LOAD MODE command cycle time + parameter TMOD_TCK = 12; // tMOD tCK LOAD MODE to non-LOAD MODE command cycle time + parameter TRRD_TCK = 4; // tRRD tCK Active bank a to Active bank b command time + parameter TRRD_DG = 3000; // tRRD_DG ps Active bank a to Active bank b command time to different group + parameter TRRD_DG_TCK = 2; // tRRD_DG tCK Active bank a to Active bank b command time to different group + parameter TRTP = 7500; // tRTP ps Read to Precharge command delay + parameter TRTP_TCK = 4; // tRTP tCK Read to Precharge command delay + parameter TWTR = 7500; // tWTR ps Write to Read command delay + parameter TWTR_DG = 3750; // tWTR_DG ps Write to Read command delay to different group + parameter TWTR_TCK = 4; // tWTR tCK Write to Read command delay + parameter TWTR_DG_TCK = 2; // tWTR_DG tCK Write to Read command delay to different group + parameter TDLLK = 512; // tDLLK tCK DLL locking time + // Refresh - 1Gb + parameter TRFC_MIN = 110000; // tRFC ps Refresh to Refresh Command interval minimum value + parameter TRFC_MAX =70200000; // tRFC ps Refresh to Refresh Command Interval maximum value + // Power Down + parameter TXP_TCK = 3; // tXP tCK Exit power down to a valid command + parameter TXPDLL = 24000; // tXPDLL ps Exit precharge power down to READ or WRITE command (DLL-off mode) + parameter TXPDLL_TCK = 10; // tXPDLL tCK Exit precharge power down to READ or WRITE command (DLL-off mode) + parameter TACTPDEN = 1; // tACTPDEN tCK Timing of last ACT command to power down entry + parameter TPRPDEN = 1; // tPREPDEN tCK Timing of last PRE command to power down entry + parameter TREFPDEN = 1; // tARPDEN tCK Timing of last REFRESH command to power down entry + parameter TCPDED = 1; // tCPDED tCK Command pass disable/enable delay + parameter TPD_MAX =TRFC_MAX; // tPD ps Power-down entry-to-exit timing + parameter TXPR = 120000; // tXPR ps Exit Reset from CKE assertion to a valid command + parameter TXPR_TCK = 5; // tXPR tCK Exit Reset from CKE assertion to a valid command + // Self Refresh + parameter TXS = 120000; // tXS ps Exit self refesh to a non-read or write command + parameter TXS_TCK = 5; // tXS tCK Exit self refesh to a non-read or write command + parameter TXSDLL = TDLLK; // tXSRD tCK Exit self refresh to a read or write command + parameter TISXR = TIS; // tISXR ps CKE setup time during self refresh exit. + parameter TCKSRE = 10000; // tCKSRE ps Valid Clock requirement after self refresh entry (SRE) + parameter TCKSRE_TCK = 5; // tCKSRE tCK Valid Clock requirement after self refresh entry (SRE) + parameter TCKSRX = 10000; // tCKSRX ps Valid Clock requirement prior to self refresh exit (SRX) + parameter TCKSRX_TCK = 5; // tCKSRX tCK Valid Clock requirement prior to self refresh exit (SRX) + parameter TCKESR_TCK = 4; // tCKESR tCK Minimum CKE low width for Self Refresh entry to exit timing + // ODT + parameter TAOF = 0.7; // tAOF tCK RTT turn-off from ODTLoff reference + parameter TAONPD = 8500; // tAONPD ps Asynchronous RTT turn-on delay (Power-Down with DLL frozen) + parameter TAOFPD = 8500; // tAONPD ps Asynchronous RTT turn-off delay (Power-Down with DLL frozen) + parameter ODTH4 = 4; // ODTH4 tCK ODT minimum HIGH time after ODT assertion or write (BL4) + parameter ODTH8 = 6; // ODTH8 tCK ODT minimum HIGH time after write (BL8) + parameter TADC = 0.7; // tADC tCK RTT dynamic change skew + // Write Levelization + parameter TWLMRD = 40; // tWLMRD tCK First DQS pulse rising edge after tDQSS margining mode is programmed + parameter TWLDQSEN = 25; // tWLDQSEN tCK DQS/DQS delay after tDQSS margining mode is programmed + parameter TWLOE = 2000; // tWLOE ps Write levelization output error + + // Size Parameters based on Part Width + +`ifdef x4 + parameter DM_BITS = 1; // Set this parameter to control how many Data Mask bits are used + parameter ADDR_BITS = 14; // MAX Address Bits + parameter ROW_BITS = 14; // Set this parameter to control how many Address bits are used + parameter COL_BITS = 11; // Set this parameter to control how many Column bits are used + parameter DQ_BITS = 4; // Set this parameter to control how many Data bits are used **Same as part bit width** + parameter DQS_BITS = 1; // Set this parameter to control how many Dqs bits are used +`elsif x8 + parameter DM_BITS = 1; // Set this parameter to control how many Data Mask bits are used + parameter ADDR_BITS = 14; // MAX Address Bits + parameter ROW_BITS = 14; // Set this parameter to control how many Address bits are used + parameter COL_BITS = 10; // Set this parameter to control how many Column bits are used + parameter DQ_BITS = 8; // Set this parameter to control how many Data bits are used **Same as part bit width** + parameter DQS_BITS = 1; // Set this parameter to control how many Dqs bits are used +`else + `define x16 + parameter DM_BITS = 2; // Set this parameter to control how many Data Mask bits are used + parameter ADDR_BITS = 13; // MAX Address Bits + parameter ROW_BITS = 13; // Set this parameter to control how many Address bits are used + parameter COL_BITS = 10; // Set this parameter to control how many Column bits are used + parameter DQ_BITS = 16; // Set this parameter to control how many Data bits are used **Same as part bit width** + parameter DQS_BITS = 2; // Set this parameter to control how many Dqs bits are used +`endif + + // Size Parameters + parameter BA_BITS = 3; // Set this parmaeter to control how many Bank Address bits are used + parameter MEM_BITS = 10; // Set this parameter to control how many write data bursts can be stored in memory. The default is 2^10=1024. + parameter AP = 10; // the address bit that controls auto-precharge and precharge-all + parameter BC = 12; // the address bit that controls burst chop + parameter BL_BITS = 3; // the number of bits required to count to BL_MAX + parameter BO_BITS = 2; // the number of Burst Order Bits + +`ifdef QUAD_RANK + parameter CS_BITS = 4; // Number of Chip Select Bits + parameter RANKS = 4; // Number of Chip Selects +`elsif DUAL_RANK + parameter CS_BITS = 2; // Number of Chip Select Bits + parameter RANKS = 2; // Number of Chip Selects +`else + parameter CS_BITS = 1; // Number of Chip Select Bits + parameter RANKS = 1; // Number of Chip Selects +`endif + + // Simulation parameters + parameter RZQ = 240; // termination resistance + parameter PRE_DEF_PAT = 8'hAA; // value returned during mpr pre-defined pattern readout + parameter STOP_ON_ERROR = 1; // If set to 1, the model will halt on command sequence/major errors + parameter DEBUG = 1; // Turn on Debug messages + parameter BUS_DELAY = 0; // delay in nanoseconds + parameter RANDOM_OUT_DELAY = 0; // If set to 1, the model will put a random amount of delay on DQ/DQS during reads + parameter RANDOM_SEED = 31913; //seed value for random generator. + + parameter RDQSEN_PRE = 2; // DQS driving time prior to first read strobe + parameter RDQSEN_PST = 1; // DQS driving time after last read strobe + parameter RDQS_PRE = 2; // DQS low time prior to first read strobe + parameter RDQS_PST = 1; // DQS low time after last read strobe + parameter RDQEN_PRE = 0; // DQ/DM driving time prior to first read data + parameter RDQEN_PST = 0; // DQ/DM driving time after last read data + parameter WDQS_PRE = 2; // DQS half clock periods prior to first write strobe + parameter WDQS_PST = 1; // DQS half clock periods after last write strobe + +// check for legal cas latency based on the cas write latency +function valid_cl; + input [3:0] cl; + input [3:0] cwl; + + case ({cwl, cl}) +`ifdef sg093 + {4'd5 , 4'd5 }, + {4'd5 , 4'd6 }, + {4'd6 , 4'd7 }, + {4'd6 , 4'd8 }, + {4'd7 , 4'd9 }, + {4'd7 , 4'd10}, + {4'd8 , 4'd11}, + {4'd9 , 4'd13}, + {4'd10, 4'd14}: valid_cl = 1; +`elsif sg107 + {4'd5, 4'd5 }, + {4'd5, 4'd6 }, + {4'd6, 4'd7 }, + {4'd6, 4'd8 }, + {4'd7, 4'd9 }, + {4'd7, 4'd10}, + {4'd8, 4'd11}, + {4'd9, 4'd13}: valid_cl = 1; +`elsif sg125 + {4'd5, 4'd5 }, + {4'd5, 4'd6 }, + {4'd6, 4'd7 }, + {4'd6, 4'd8 }, + {4'd7, 4'd9 }, + {4'd7, 4'd10}, + {4'd8, 4'd11}: valid_cl = 1; +`elsif sg15E + {4'd5, 4'd5 }, + {4'd5, 4'd6 }, + {4'd6, 4'd7 }, + {4'd6, 4'd8 }, + {4'd7, 4'd9 }, + {4'd7, 4'd10}: valid_cl = 1; +`elsif sg15 + {4'd5, 4'd5 }, + {4'd5, 4'd6 }, + {4'd6, 4'd8 }, + {4'd7, 4'd10}: valid_cl = 1; +`elsif sg187E + {4'd5, 4'd5 }, + {4'd5, 4'd6 }, + {4'd6, 4'd7 }, + {4'd6, 4'd8 }: valid_cl = 1; +`elsif sg187 + {4'd5, 4'd5 }, + {4'd5, 4'd6 }, + {4'd6, 4'd8 }: valid_cl = 1; +`elsif sg25E + {4'd5, 4'd5 }, + {4'd5, 4'd6 }: valid_cl = 1; +`elsif sg25 + {4'd5, 4'd5 }, + {4'd5, 4'd6 }: valid_cl = 1; +`endif + default : valid_cl = 0; + endcase +endfunction + +// find the minimum valid cas write latency +function [3:0] min_cwl; + input period; + real period; + min_cwl = (period >= 2500.0) ? 5: + (period >= 1875.0) ? 6: + (period >= 1500.0) ? 7: + (period >= 1250.0) ? 8: + (period >= 1071.0) ? 9: + 10; // (period >= 938) +endfunction + +// find the minimum valid cas latency +function [3:0] min_cl; + input period; + real period; + reg [3:0] cwl; + reg [3:0] cl; + begin + cwl = min_cwl(period); + for (cl=CL_MAX; cl>=CL_MIN; cl=cl-1) begin + if (valid_cl(cl, cwl)) begin + min_cl = cl; + end + end + end +endfunction diff --git a/gram/simulation/dram_model/ddr3.v b/gram/simulation/dram_model/ddr3.v index 06566f5..40605d9 100644 --- a/gram/simulation/dram_model/ddr3.v +++ b/gram/simulation/dram_model/ddr3.v @@ -109,7 +109,8 @@ module ddr3 ( odt ); -`include "dram_model/2048Mb_ddr3_parameters.vh" +// `include "dram_model/2048Mb_ddr3_parameters.vh" +`include "dram_model/1024Mb_ddr3_parameters.vh" initial begin $display ("TCK_MIN = %d", TCK_MIN); From 3e499668b351af1e85386d6689d8a623738c27e3 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 17 Mar 2022 12:52:26 +0000 Subject: [PATCH 25/47] add alternative variant of runsimsoc.sh --- gram/simulation/runsimsoc2.sh | 14 ++++++++++++++ 1 file changed, 14 insertions(+) create mode 100755 gram/simulation/runsimsoc2.sh diff --git a/gram/simulation/runsimsoc2.sh b/gram/simulation/runsimsoc2.sh new file mode 100755 index 0000000..4f97052 --- /dev/null +++ b/gram/simulation/runsimsoc2.sh @@ -0,0 +1,14 @@ +#!/bin/bash +set -e + +LIB_DIR=./ecp5u + +python3 simsoc.py +yosys simsoc.ys +cp ${LIB_DIR}/DDRDLLA.v DDRDLLA.v +patch DDRDLLA.v < DDRDLLA.patch +iverilog -Wall -g2012 -s simsoctb -o simsoc simsoctb.v build_simsoc/top.v dram_model/ddr3.v ${LIB_DIR}/ECLKSYNCB.v ${LIB_DIR}/EHXPLLL.v ${LIB_DIR}/PUR.v ${LIB_DIR}/GSR.v \ + ${LIB_DIR}/FD1S3AX.v ${LIB_DIR}/SGSR.v ${LIB_DIR}/ODDRX2F.v ${LIB_DIR}/ODDRX2DQA.v ${LIB_DIR}/DELAYF.v ${LIB_DIR}/BB.v ${LIB_DIR}/OB.v ${LIB_DIR}/IB.v \ + ${LIB_DIR}/DQSBUFM.v ${LIB_DIR}/UDFDL5_UDP_X.v ${LIB_DIR}/TSHX2DQSA.v ${LIB_DIR}/TSHX2DQA.v ${LIB_DIR}/ODDRX2DQSB.v ${LIB_DIR}/IDDRX2DQA.v DDRDLLA.v \ + ${LIB_DIR}/CLKDIVF.v +vvp -n simsoc -fst-speed From 7f70b441fb40a05c097db1f0941fba85ca784080 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 17 Mar 2022 12:52:50 +0000 Subject: [PATCH 26/47] initialise bitslip with a specific value rather than an incrementor --- gram/common.py | 11 ++++++----- gram/phy/ecp5ddrphy.py | 3 ++- 2 files changed, 8 insertions(+), 6 deletions(-) diff --git a/gram/common.py b/gram/common.py index 183f9ce..7c5f449 100644 --- a/gram/common.py +++ b/gram/common.py @@ -71,7 +71,7 @@ class BitSlip(Elaboratable): def __init__(self, dw, rst=None, slp=None, cycles=1): self.i = Signal(dw) self.o = Signal(dw) - self.rst = Signal() if rst is None else rst + #self.rst = Signal() if rst is None else rst self.slp = Signal() if slp is None else slp self.dw = dw self.cycles = cycles @@ -82,10 +82,11 @@ def elaborate(self, platform): vcount = self.cycles * self.dw value = Signal(vcount.bit_length()) - with m.If(self.rst): - sync += value.eq(0) - with m.Elif(self.slp): - sync += value.eq(value+1) + #with m.If(self.rst): + # sync += value.eq(0) + #with m.Elif(self.slp): + # sync += value.eq(value+1) + comb += value.eq(self.slp) # Shift Register using input i. r = Signal((self.cycles+1)*self.dw, reset_less=True) diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 5a9c8f6..b31e19e 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -133,6 +133,7 @@ def __init__(self, pads, sys_clk_freq=100e6): self.rdly = [] self.rdly += [bank.csr(3, "rw", name="rdly_p0")] self.rdly += [bank.csr(3, "rw", name="rdly_p1")] + self.bitslip = bank.csr(3, "rw") # phase-delay on read self._bridge = self.bridge(data_width=32, granularity=8, alignment=2) self.bus = self._bridge.bus @@ -478,7 +479,7 @@ def elaborate(self, platform): io_B=self.pads.dq.io[j]) ] # shift-register delay on the incoming read data - dq_i_bs = BitSlip(4, Const(0), Const(0), cycles=1) + dq_i_bs = BitSlip(4, Const(0), cycles=1) m.submodules['dq_i_bitslip_%d' % j] = dq_i_bs dq_i_bs_o = Signal(4, name="dq_i_bs_o_%d" % j) dq_i_bs_o_d = Signal(4, name="dq_i_bs_o_d_%d" % j) From 6b41f65aa7a42837d14d172c57b261b0e660291a Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Thu, 7 Apr 2022 13:32:41 -0500 Subject: [PATCH 27/47] Add initial support for external DRAM init on the Raptor Versa ECP5-85 board --- LICENSE | 1 + examples/ecp5_crg.py | 257 ++++++++++++++++++++++++++++++++++ examples/headless-versa-85.py | 84 +++++++++++ examples/headless/main.c | 71 +++++++++- libgram/Makefile | 4 +- libgram/src/calibration.c | 16 +++ 6 files changed, 429 insertions(+), 4 deletions(-) create mode 100644 examples/ecp5_crg.py create mode 100644 examples/headless-versa-85.py diff --git a/LICENSE b/LICENSE index 991cbcf..c4f0af0 100644 --- a/LICENSE +++ b/LICENSE @@ -1,4 +1,5 @@ Unless otherwise noted, Gram is Copyright 2020 / LambdaConcept +ECP5 DDR3 fixes and extensions Copyright 2022 Raptor Engineering, LLC Initial development is based on MiSoC's LASMICON / Copyright 2007-2016 / M-Labs LiteDRAM / Copyright 2012-2018 / EnjoyDigital diff --git a/examples/ecp5_crg.py b/examples/ecp5_crg.py new file mode 100644 index 0000000..5c975d6 --- /dev/null +++ b/examples/ecp5_crg.py @@ -0,0 +1,257 @@ +# Copyright (c) 2020 LambdaConcept +# Copyright (c) 2021 Luke Kenneth Casson Leighton +# Copyright (c) 2018-2020 Florent Kermarrec +# Copyright (c) 2019 Michael Betz +# +# Based on code from LambaConcept, from the gram example which is BSD-2-License +# https://github.com/jeanthom/gram/tree/master/examples +# +# Modifications for the Libre-SOC Project funded by NLnet and NGI POINTER +# under EU Grants 871528 and 957073, under the LGPLv3+ License + + +from nmigen import (Elaboratable, Module, Signal, ClockDomain, Instance, + ClockSignal, ResetSignal, Const) + +__all__ = ["ECP5CRG"] + + +class PLL(Elaboratable): + nclkouts_max = 3 + clki_div_range = (1, 128+1) + clkfb_div_range = (1, 128+1) + clko_div_range = (1, 128+1) + clki_freq_range = ( 8e6, 400e6) + clko_freq_range = (3.125e6, 400e6) + vco_freq_range = ( 400e6, 800e6) + + def __init__(self, clkin, + clksel=Signal(shape=2, reset=2), + reset=Signal(reset_less=True), + locked=Signal()): + self.clkin = clkin + self.clkin_freq = None + self.clksel = clksel + self.locked = locked + self.reset = reset + self.nclkouts = 0 + self.clkouts = {} + self.config = {} + self.params = {} + + def ports(self): + return [ + self.clkin, + self.clksel, + self.lock, + ] + list(self.clkouts.values()) + + def set_clkin_freq(self, freq): + (clki_freq_min, clki_freq_max) = self.clki_freq_range + assert freq >= clki_freq_min + assert freq <= clki_freq_max + self.clkin_freq = freq + + def create_clkout(self, cd, freq, phase=0, margin=1e-2): + (clko_freq_min, clko_freq_max) = self.clko_freq_range + assert freq >= clko_freq_min + assert freq <= clko_freq_max + assert self.nclkouts < self.nclkouts_max + self.clkouts[self.nclkouts] = (cd, freq, phase, margin) + #create_clkout_log(self.logger, cd.name, freq, margin, self.nclkouts) + print("clock domain", cd.domain, freq, margin, self.nclkouts) + self.nclkouts += 1 + + def compute_config(self): + config = {} + for clki_div in range(*self.clki_div_range): + config["clki_div"] = clki_div + for clkfb_div in range(*self.clkfb_div_range): + all_valid = True + vco_freq = self.clkin_freq/clki_div*clkfb_div*1 # clkos3_div=1 + (vco_freq_min, vco_freq_max) = self.vco_freq_range + if vco_freq >= vco_freq_min and vco_freq <= vco_freq_max: + for n, (clk, f, p, m) in sorted(self.clkouts.items()): + valid = False + for d in range(*self.clko_div_range): + clk_freq = vco_freq/d + if abs(clk_freq - f) <= f*m: + config["clko{}_freq".format(n)] = clk_freq + config["clko{}_div".format(n)] = d + config["clko{}_phase".format(n)] = p + valid = True + break + if not valid: + all_valid = False + else: + all_valid = False + if all_valid: + config["vco"] = vco_freq + config["clkfb_div"] = clkfb_div + #compute_config_log(self.logger, config) + print ("PLL config", config) + return config + raise ValueError("No PLL config found") + + def elaborate(self, platform): + config = self.compute_config() + clkfb = Signal() + self.params.update( + # attributes + a_FREQUENCY_PIN_CLKI = str(self.clkin_freq/1e6), + a_ICP_CURRENT = "6", + a_LPF_RESISTOR = "16", + a_MFG_ENABLE_FILTEROPAMP = "1", + a_MFG_GMCREF_SEL = "2", + # parameters + p_FEEDBK_PATH = "INT_OS3", # CLKOS3 rsvd for feedback with div=1. + p_CLKOS3_ENABLE = "ENABLED", + p_CLKOS3_DIV = 1, + p_CLKFB_DIV = config["clkfb_div"], + p_CLKI_DIV = config["clki_div"], + # reset, input clock, lock-achieved output + i_RST = self.reset, + i_CLKI = self.clkin, + o_LOCK = self.locked, + ) + # for each clock-out, set additional parameters + for n, (clk, f, p, m) in sorted(self.clkouts.items()): + n_to_l = {0: "P", 1: "S", 2: "S2"} + div = config["clko{}_div".format(n)] + cphase = int(p*(div + 1)/360 + div) + self.params["p_CLKO{}_ENABLE".format(n_to_l[n])] = "ENABLED" + self.params["p_CLKO{}_DIV".format(n_to_l[n])] = div + self.params["p_CLKO{}_FPHASE".format(n_to_l[n])] = 0 + self.params["p_CLKO{}_CPHASE".format(n_to_l[n])] = cphase + self.params["o_CLKO{}".format(n_to_l[n])] = clk + + m = Module() + print ("params", self.params) + pll = Instance("EHXPLLL", **self.params) + m.submodules.pll = pll + return m + + pll = Instance("EHXPLLL", + p_OUTDIVIDER_MUXA='DIVA', + p_OUTDIVIDER_MUXB='DIVB', + p_CLKOP_ENABLE='ENABLED', + p_CLKOS_ENABLE='ENABLED', + p_CLKOS2_ENABLE='DISABLED', + p_CLKOS3_ENABLE='DISABLED', + p_CLKOP_DIV=self.CLKOP_DIV, + p_CLKOS_DIV=self.CLKOS_DIV, + p_CLKFB_DIV=self.CLKFB_DIV, + p_CLKI_DIV=self.CLKI_DIV, + p_FEEDBK_PATH='INT_OP', + p_CLKOP_TRIM_POL="FALLING", + p_CLKOP_TRIM_DELAY=0, + p_CLKOS_TRIM_POL="FALLING", + p_CLKOS_TRIM_DELAY=0, + i_CLKI=self.clkin, + i_RST=0, + i_STDBY=0, + i_PHASESEL0=0, + i_PHASESEL1=0, + i_PHASEDIR=0, + i_PHASESTEP=0, + i_PHASELOADREG=0, + i_PLLWAKESYNC=0, + i_ENCLKOP=1, + i_ENCLKOS=1, + i_ENCLKOS2=0, + i_ENCLKOS3=0, + o_CLKOP=self.clkout1, + o_CLKOS=self.clkout2, + o_CLKOS2=self.clkout3, + o_CLKOS3=self.clkout4, + o_LOCK=self.lock, + ) + + +class ECP5CRG(Elaboratable): + def __init__(self, sys_clk_freq=100e6, pod_bits=25): + self.sys_clk_freq = sys_clk_freq + self.pod_bits = pod_bits + + def elaborate(self, platform): + m = Module() + + # Get 100Mhz from oscillator + extclk = platform.request(platform.default_clk) + cd_rawclk = ClockDomain("rawclk", local=True, reset_less=True) + m.d.comb += cd_rawclk.clk.eq(extclk) + m.domains += cd_rawclk + + # Reset + if platform.default_rst is not None: + reset = platform.request(platform.default_rst).i + else: + reset = Const(0) # whoops + + gsr0 = Signal() + gsr1 = Signal() + + m.submodules += [ + Instance("FD1S3AX", p_GSR="DISABLED", + i_CK=ClockSignal("rawclk"), + i_D=~reset, + o_Q=gsr0), + Instance("FD1S3AX", p_GSR="DISABLED", + i_CK=ClockSignal("rawclk"), + i_D=gsr0, + o_Q=gsr1), + Instance("SGSR", i_CLK=ClockSignal("rawclk"), + i_GSR=gsr1), + ] + + # PLL + m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~reset) + + # Power-on delay (655us) + podcnt = Signal(self.pod_bits, reset=-1) + pod_done = Signal() + with m.If((podcnt != 0) & pll.locked): + m.d.rawclk += podcnt.eq(podcnt-1) + m.d.rawclk += pod_done.eq(podcnt == 0) + + # Generating sync2x (200Mhz) and init (25Mhz) from extclk + cd_sync2x = ClockDomain("sync2x", local=False) + cd_sync2x_unbuf = ClockDomain("sync2x_unbuf", + local=False, reset_less=True) + cd_init = ClockDomain("init", local=False) + cd_sync = ClockDomain("sync", local=False) + cd_dramsync = ClockDomain("dramsync", local=False) + + # create PLL clocks + pll.set_clkin_freq(platform.default_clk_frequency) + pll.create_clkout(ClockSignal("sync2x_unbuf"), 2*self.sys_clk_freq) + pll.create_clkout(ClockSignal("init"), 25e6) + m.submodules += Instance("ECLKSYNCB", + i_ECLKI = ClockSignal("sync2x_unbuf"), + i_STOP = 0, + o_ECLKO = ClockSignal("sync2x")) + m.domains += cd_sync2x_unbuf + m.domains += cd_sync2x + m.domains += cd_init + m.domains += cd_sync + m.domains += cd_dramsync + reset_ok = Signal(reset_less=True) + m.d.comb += reset_ok.eq(~pll.locked|~pod_done) + m.d.comb += ResetSignal("init").eq(reset_ok) + m.d.comb += ResetSignal("sync").eq(reset_ok) + m.d.comb += ResetSignal("dramsync").eq(reset_ok) + + # # Generating sync (100Mhz) from sync2x + + m.submodules += Instance("CLKDIVF", + p_DIV="2.0", + i_ALIGNWD=0, + i_CLKI=ClockSignal("sync2x"), + i_RST=0, + o_CDIVX=ClockSignal("sync")) + + # temporarily set dram sync clock exactly equal to main sync + m.d.comb += ClockSignal("dramsync").eq(ClockSignal("sync")) + + return m + diff --git a/examples/headless-versa-85.py b/examples/headless-versa-85.py new file mode 100644 index 0000000..3254115 --- /dev/null +++ b/examples/headless-versa-85.py @@ -0,0 +1,84 @@ +# This file is Copyright (c) 2020 LambdaConcept +# This file is Copyright (c) 2022 Raptor Engineering, LLC + +from nmigen import * +from nmigen.lib.cdc import ResetSynchronizer +from nmigen_soc import wishbone, memory + +from lambdasoc.cpu.minerva import MinervaCPU +from lambdasoc.periph.intc import GenericInterruptController +from lambdasoc.periph.serial import AsyncSerialPeripheral +from lambdasoc.periph.sram import SRAMPeripheral +from lambdasoc.periph.timer import TimerPeripheral +from lambdasoc.periph import Peripheral +from lambdasoc.soc.base import SoC + +from gram.core import gramCore +from gram.phy.ecp5ddrphy import ECP5DDRPHY +from gram.modules import MT41K64M16 +from gram.frontend.wishbone import gramWishbone + +from nmigen_boards.versa_ecp5 import VersaECP5Platform85 +from ecp5_crg import ECP5CRG +from uartbridge import UARTBridge +from crg import * + +class DDR3SoC(SoC, Elaboratable): + def __init__(self, *, + ddrphy_addr, dramcore_addr, + ddr_addr): + self._decoder = wishbone.Decoder(addr_width=30, data_width=32, granularity=8, + features={"cti", "bte"}) + + self.crg = ECP5CRG() + + self.ub = UARTBridge(divisor=868, pins=platform.request("uart", 0)) + + ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"}, + xdr={"clk":4, "a":4, "ba":4, "clk_en":4, "odt":4, "ras":4, "cas":4, "we":4, "cs":4, "reset":4}) + self.ddrphy = DomainRenamer("dramsync")(ECP5DDRPHY(ddr_pins)) + self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) + + ddrmodule = MT41K64M16(platform.default_clk_frequency, "1:2") + + self.dramcore = DomainRenamer("dramsync")(gramCore( + phy=self.ddrphy, + geom_settings=ddrmodule.geom_settings, + timing_settings=ddrmodule.timing_settings, + clk_freq=platform.default_clk_frequency)) + self._decoder.add(self.dramcore.bus, addr=dramcore_addr) + + self.drambone = DomainRenamer("dramsync")(gramWishbone(self.dramcore)) + self._decoder.add(self.drambone.bus, addr=ddr_addr) + + self.memory_map = self._decoder.bus.memory_map + + self.clk_freq = platform.default_clk_frequency + + def elaborate(self, platform): + m = Module() + + m.submodules.sysclk = self.crg + + m.submodules.ub = self.ub + + m.submodules.decoder = self._decoder + m.submodules.ddrphy = self.ddrphy + m.submodules.dramcore = self.dramcore + m.submodules.drambone = self.drambone + + m.d.comb += [ + self.ub.bus.connect(self._decoder.bus), + ] + + return m + + +if __name__ == "__main__": + platform = VersaECP5Platform85() + + soc = DDR3SoC(ddrphy_addr=0x00008000, dramcore_addr=0x00009000, + ddr_addr=0x10000000) + + soc.build(do_build=True) + platform.build(soc, do_program=True) diff --git a/examples/headless/main.c b/examples/headless/main.c index cfcd36e..bc3e98a 100644 --- a/examples/headless/main.c +++ b/examples/headless/main.c @@ -31,6 +31,8 @@ uint32_t gram_read(struct gramCtx *ctx, void *addr) { fprintf(stderr, "gram_read error (read bytes length mismatch: %d != %d)\n", received, sizeof(reply)); } + //printf("gram_read: 0x%08x: 0x%08x\n", addr, ntohl(reply)); + return ntohl(reply); } @@ -41,6 +43,8 @@ int gram_write(struct gramCtx *ctx, void *addr, uint32_t value) { *(uint32_t*)(commands+2) = htonl((uint32_t)addr >> 2); *(uint32_t*)(commands+6) = htonl(value); + //printf("gram_write: 0x%08x: 0x%08x\n", addr, value); + sent = write(*(int*)(ctx->user_data), commands, sizeof(commands)); if (sent != sizeof(commands)) { fprintf(stderr, "gram_write error (sent bytes length mismatch)\n"); @@ -100,8 +104,22 @@ int main(int argc, char *argv[]) { uint32_t pattern[kPatternSize]; const int kDumpWidth = 8; size_t i; + int res; + uint32_t tmp; int delay, miss = 0; + uint32_t ddr_base = 0x10000000; + +#if 1 + struct gramProfile profile = { + .mode_registers = { + 0x2708, 0x2054, 0x0512, 0x0000 + }, + .rdly_p0 = 2, + .rdly_p1 = 2, + }; +#endif +#if 0 struct gramProfile profile = { .mode_registers = { 0x320, 0x6, 0x200, 0x0 @@ -109,6 +127,8 @@ int main(int argc, char *argv[]) { .rdly_p0 = 2, .rdly_p1 = 2, }; +#endif + struct gramProfile profile2; if (argc < 3) { fprintf(stderr, "Usage: %s port baudrate\n", argv[0]); @@ -126,9 +146,57 @@ int main(int argc, char *argv[]) { ctx.user_data = &serial_port; printf("gram init... "); - gram_init(&ctx, &profile, (void*)0x10000000, (void*)0x00009000, (void*)0x00008000); + gram_init(&ctx, &profile, (void*)ddr_base, (void*)0x00009000, (void*)0x00008000); printf("done\n"); + printf("Rdly\np0: "); + for (size_t i = 0; i < 8; i++) { + profile2.rdly_p0 = i; + gram_load_calibration(&ctx, &profile2); + gram_reset_burstdet(&ctx); + for (size_t j = 0; j < 128; j++) { + tmp = gram_read(&ctx, ddr_base+4*j); + } + if (gram_read_burstdet(&ctx, 0)) { + printf("1"); + } else { + printf("0"); + } + fflush(stdout); + } + printf("\n"); + + printf("Rdly\np1: "); + for (size_t i = 0; i < 8; i++) { + profile2.rdly_p1 = i; + gram_load_calibration(&ctx, &profile2); + gram_reset_burstdet(&ctx); + for (size_t j = 0; j < 128; j++) { + tmp = gram_read(&ctx, ddr_base+4*j); + } + if (gram_read_burstdet(&ctx, 1)) { + printf("1"); + } else { + printf("0"); + } + fflush(stdout); + } + printf("\n"); + + printf("Auto calibrating... "); + res = gram_generate_calibration(&ctx, &profile2); + if (res != GRAM_ERR_NONE) { + printf("failed\n"); + gram_load_calibration(&ctx, &profile); + } else { + gram_load_calibration(&ctx, &profile2); + } + printf("done\n"); + + printf("Auto calibration profile:\n"); + printf("\tp0 rdly: %d\n", profile2.rdly_p0); + printf("\tp1 rdly: %d\n", profile2.rdly_p1); + gram_reset_burstdet(&ctx); srand(time(NULL)); @@ -137,7 +205,6 @@ int main(int argc, char *argv[]) { } printf("memtest... \n"); - uint32_t ddr_base = 0x10000000; printf("Writing data sequence..."); for (i = 0; i < kPatternSize; i++) { diff --git a/libgram/Makefile b/libgram/Makefile index 6093828..4cb7a19 100644 --- a/libgram/Makefile +++ b/libgram/Makefile @@ -1,6 +1,6 @@ OBJS := src/init.o src/dfii.o src/calibration.o -TRIPLE := riscv64-unknown-elf- +TRIPLE := CC := $(TRIPLE)gcc AS := $(TRIPLE)as @@ -8,7 +8,7 @@ OBJCOPY := $(TRIPLE)objcopy AR := $(TRIPLE)ar LD := $(TRIPLE)ld -CFLAGS += -fvisibility=hidden -nostdlib -Os -Iinclude -std=c99 -Wall -Werror -pedantic +CFLAGS += -fvisibility=hidden -nostdlib -O0 -g -Iinclude -std=c99 -Wall -Wno-error -pedantic -DGRAM_RW_FUNC LDFLAGS += -nostdlib ifeq ($(TRIPLE),riscv64-unknown-elf-) diff --git a/libgram/src/calibration.c b/libgram/src/calibration.c index a77c44a..cb7b359 100644 --- a/libgram/src/calibration.c +++ b/libgram/src/calibration.c @@ -57,7 +57,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { +#ifdef GRAM_RW_FUNC + gram_read(ctx, ctx->ddr_base); +#else tmp = ram[i]; +#endif } if (gram_read_burstdet(ctx, 0)) { @@ -74,7 +78,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { +#ifdef GRAM_RW_FUNC + gram_read(ctx, ctx->ddr_base); +#else tmp = ram[i]; +#endif } if (gram_read_burstdet(ctx, 1)) { @@ -92,7 +100,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { +#ifdef GRAM_RW_FUNC + gram_read(ctx, ctx->ddr_base); +#else tmp = ram[i]; +#endif } if (!gram_read_burstdet(ctx, 0)) { @@ -109,7 +121,11 @@ int gram_generate_calibration(const struct gramCtx *ctx, struct gramProfile *pro gram_reset_burstdet(ctx); for (i = 0; i < 128; i++) { +#ifdef GRAM_RW_FUNC + gram_read(ctx, ctx->ddr_base); +#else tmp = ram[i]; +#endif } if (!gram_read_burstdet(ctx, 1)) { From ca3e97fc25b68272fb19322f339c738a1b7bd73c Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Thu, 7 Apr 2022 13:38:01 -0500 Subject: [PATCH 28/47] Properly connect reset and cs signals Starting to get (corrupt) data out of the memory... --- examples/headless-versa-85.py | 8 +++++--- examples/headless/main.c | 37 +++++++++++++++++++++++++++++++--- gram/phy/dfi.py | 2 +- gram/phy/ecp5ddrphy.py | 38 ++++++++++++++++++++++++++--------- libgram/src/dfii.c | 6 +++--- 5 files changed, 72 insertions(+), 19 deletions(-) diff --git a/examples/headless-versa-85.py b/examples/headless-versa-85.py index 3254115..24cb4d0 100644 --- a/examples/headless-versa-85.py +++ b/examples/headless-versa-85.py @@ -19,7 +19,8 @@ from gram.frontend.wishbone import gramWishbone from nmigen_boards.versa_ecp5 import VersaECP5Platform85 -from ecp5_crg import ECP5CRG +#from ecp5_crg import ECP5CRG +from crg import ECPIX5CRG from uartbridge import UARTBridge from crg import * @@ -30,12 +31,13 @@ def __init__(self, *, self._decoder = wishbone.Decoder(addr_width=30, data_width=32, granularity=8, features={"cti", "bte"}) - self.crg = ECP5CRG() + self.crg = ECPIX5CRG() + #self.crg = ECP5CRG() self.ub = UARTBridge(divisor=868, pins=platform.request("uart", 0)) ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"}, - xdr={"clk":4, "a":4, "ba":4, "clk_en":4, "odt":4, "ras":4, "cas":4, "we":4, "cs":4, "reset":4}) + xdr={"clk":4, "a":4, "ba":4, "clk_en":4, "odt":4, "ras":4, "cas":4, "we":4, "cs":4, "rst":1}) self.ddrphy = DomainRenamer("dramsync")(ECP5DDRPHY(ddr_pins)) self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) diff --git a/examples/headless/main.c b/examples/headless/main.c index bc3e98a..35e6b46 100644 --- a/examples/headless/main.c +++ b/examples/headless/main.c @@ -110,16 +110,25 @@ int main(int argc, char *argv[]) { uint32_t ddr_base = 0x10000000; -#if 1 +#if 0 struct gramProfile profile = { .mode_registers = { - 0x2708, 0x2054, 0x0512, 0x0000 + 0xb30, 0x806, 0x200, 0x0 }, .rdly_p0 = 2, .rdly_p1 = 2, }; #endif #if 0 + struct gramProfile profile = { + .mode_registers = { + 0xb20, 0x806, 0x200, 0x0 + }, + .rdly_p0 = 2, + .rdly_p1 = 2, + }; +#endif +#if 1 struct gramProfile profile = { .mode_registers = { 0x320, 0x6, 0x200, 0x0 @@ -149,6 +158,7 @@ int main(int argc, char *argv[]) { gram_init(&ctx, &profile, (void*)ddr_base, (void*)0x00009000, (void*)0x00008000); printf("done\n"); +#if 0 printf("Rdly\np0: "); for (size_t i = 0; i < 8; i++) { profile2.rdly_p0 = i; @@ -198,6 +208,7 @@ int main(int argc, char *argv[]) { printf("\tp1 rdly: %d\n", profile2.rdly_p1); gram_reset_burstdet(&ctx); +#endif srand(time(NULL)); for (i = 0; i < kPatternSize; i++) { @@ -220,6 +231,26 @@ int main(int argc, char *argv[]) { printf("done\n"); } + printf("Dumping data sequence...\n"); + for (i = 0; i < kPatternSize; i++) { + if ((i % kDumpWidth) == 0) { + printf("%08x | ", ddr_base+4*i); + } + + expected_value = pattern[i]; + + for (int j = 3; j >= 0; j--) { + printf("%02x", ((uint8_t*)(&expected_value))[j]); + } + + if ((i % kDumpWidth) == kDumpWidth-1) { + printf("\n"); + } else { + printf(" "); + } + } + printf("\n"); + printf("Reading data sequence...\n"); for (i = 0; i < kPatternSize; i++) { if ((i % kDumpWidth) == 0) { @@ -237,7 +268,7 @@ int main(int argc, char *argv[]) { printf("\033[0;32m%02x\033[0m", ((uint8_t*)(&read_value))[j]); } } - + if ((i % kDumpWidth) == kDumpWidth-1) { printf("\n"); } else { diff --git a/gram/phy/dfi.py b/gram/phy/dfi.py index f5a5eb9..aead7a2 100644 --- a/gram/phy/dfi.py +++ b/gram/phy/dfi.py @@ -47,7 +47,7 @@ def __init__(self, addressbits, bankbits, nranks, databits, nphases=1, p.cas.reset = 1 p.ras.reset = 1 p.reset.reset = 1 - p.cs_n.reset = -1 + p.cs_n.reset = 1 p.we.reset = 1 p.act.reset = 1 diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index b31e19e..62b42a5 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -253,17 +253,37 @@ def elaborate(self, platform): # dfi.Interface it is "reset" dfi2pads = {'rst': 'reset', 'cs': 'cs_n'} name = dfi2pads.get(name, name) # remap if exists - m.d.comb += [ - pad.o_clk.eq(ClockSignal("dramsync")), - pad.o_fclk.eq(ClockSignal("sync2x")), - ] - for i in range(len(pad.o0)): + if name == "reset": + m.d.comb += [ + pad.o_clk.eq(ClockSignal("sync")), + ] + else: m.d.comb += [ - pad.o0[i].eq(getattr(dfi.phases[0], name)[i]), - pad.o1[i].eq(getattr(dfi.phases[0], name)[i]), - pad.o2[i].eq(getattr(dfi.phases[1], name)[i]), - pad.o3[i].eq(getattr(dfi.phases[1], name)[i]), + pad.o_clk.eq(ClockSignal("dramsync")), + pad.o_fclk.eq(ClockSignal("sync2x")), ] + if name == "reset": + for i in range(len(pad.o)): + m.d.comb += [ + pad.o[i].eq(getattr(dfi.phases[0], name)[i]), + ] + elif name == "cs_n": + # cs_n can't be directly connected to cs without being inverted first... + for i in range(len(pad.o0)): + m.d.comb += [ + pad.o0[i].eq(~getattr(dfi.phases[0], name)[i]), + pad.o1[i].eq(~getattr(dfi.phases[0], name)[i]), + pad.o2[i].eq(~getattr(dfi.phases[1], name)[i]), + pad.o3[i].eq(~getattr(dfi.phases[1], name)[i]), + ] + else: + for i in range(len(pad.o0)): + m.d.comb += [ + pad.o0[i].eq(getattr(dfi.phases[0], name)[i]), + pad.o1[i].eq(getattr(dfi.phases[0], name)[i]), + pad.o2[i].eq(getattr(dfi.phases[1], name)[i]), + pad.o3[i].eq(getattr(dfi.phases[1], name)[i]), + ] # DQ --------------------------------------------------------------------------------------- dq_oe = Signal() diff --git a/libgram/src/dfii.c b/libgram/src/dfii.c index 58519bd..17e18da 100644 --- a/libgram/src/dfii.c +++ b/libgram/src/dfii.c @@ -15,7 +15,7 @@ static void dfii_setcontrol(const struct gramCtx *ctx, uint8_t val) { void dfii_setsw(const struct gramCtx *ctx, bool software_control) { if (software_control) { - dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT); + dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET|DFII_COMMAND_CS); } else { dfii_setcontrol(ctx, DFII_CONTROL_SEL|DFII_CONTROL_RESET); } @@ -59,13 +59,13 @@ void dfii_initseq(const struct gramCtx *ctx, const struct gramProfile *profile) /* Release reset */ dfii_set_p0_address(ctx, 0x0); dfii_set_p0_baddress(ctx, 0); - dfii_setcontrol(ctx, DFII_CONTROL_ODT); + dfii_setcontrol(ctx, DFII_CONTROL_ODT|DFII_CONTROL_RESET); cdelay(50000); /* Bring CKE high */ dfii_set_p0_address(ctx, 0x0); dfii_set_p0_baddress(ctx, 0); - dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT); + dfii_setcontrol(ctx, DFII_CONTROL_CKE|DFII_CONTROL_ODT|DFII_CONTROL_RESET); cdelay(10000); /* Load Mode Register 2, CWL=5 */ From 03e79da11c95b3fa3a2e55a4c08af8521c4d2283 Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Thu, 7 Apr 2022 13:38:53 -0500 Subject: [PATCH 29/47] Partially revert GIT hash 180026c72f0e1d3ef365b2214288d4a543a238dd UART bridge now gives a valid memtest --- gram/core/multiplexer.py | 4 +-- gram/phy/ecp5ddrphy.py | 55 ++++++++++++++++++---------------------- 2 files changed, 26 insertions(+), 33 deletions(-) diff --git a/gram/core/multiplexer.py b/gram/core/multiplexer.py index 87ea8a9..fe5fc6f 100644 --- a/gram/core/multiplexer.py +++ b/gram/core/multiplexer.py @@ -173,9 +173,9 @@ def valid_and(cmd, attr): with m.If(sel == STEER_REFRESH): m.d.sync += phase.cs_n.eq(0) with m.Else(): - m.d.sync += phase.cs_n.eq(~rank_decoder.o) + m.d.sync += phase.cs_n.eq(rank_decoder.o) else: - m.d.sync += phase.cs_n.eq(~rank_decoder.o) + m.d.sync += phase.cs_n.eq(rank_decoder.o) m.d.sync += phase.bank.eq(Array(cmd.ba[:-rankbits] for cmd in self.commands)[sel]) else: m.d.sync += [ diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 62b42a5..513582e 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -175,7 +175,6 @@ def __init__(self, pads, sys_clk_freq=100e6): def elaborate(self, platform): m = Module() - comb, sync = m.d.comb, m.d.sync m.submodules.bridge = self._bridge @@ -425,12 +424,12 @@ def elaborate(self, platform): ] for j in range(8*i, 8*(i+1)): - dq_o = Signal(name="dq_o_%d" % j) - dq_i = Signal(name="dq_i_%d" % j) - dq_oe_n = Signal(name="dq_oe_n_%d" % j) - dq_i_delayed = Signal(name="dq_i_delayed_%d" % j) - dq_i_data = Signal(4, name="dq_i_data_%d" % j) - dq_o_data = Signal(8, name="dq_o_data_%d" % j) + dq_o = Signal() + dq_i = Signal() + dq_oe_n = Signal() + dq_i_delayed = Signal() + dq_i_data = Signal(4) + dq_o_data = Signal(8) dq_o_data_d = Signal(8, reset_less=True) dq_o_data_muxed = Signal(4, reset_less=True) m.d.comb += dq_o_data.eq(Cat( @@ -498,28 +497,20 @@ def elaborate(self, platform): o_O=dq_i, io_B=self.pads.dq.io[j]) ] - # shift-register delay on the incoming read data - dq_i_bs = BitSlip(4, Const(0), cycles=1) - m.submodules['dq_i_bitslip_%d' % j] = dq_i_bs - dq_i_bs_o = Signal(4, name="dq_i_bs_o_%d" % j) - dq_i_bs_o_d = Signal(4, name="dq_i_bs_o_d_%d" % j) - comb += dq_i_bs.i.eq(dq_i_data) - comb += dq_i_bs_o.eq(dq_i_bs.o) - sync += dq_i_bs_o_d.eq(dq_i_bs_o) # delay by 1 clock - #with m.If(~datavalid_prev & datavalid): - comb += [ - dfi.phases[0].rddata[0*databits+j].eq(dq_i_bs_o_d[0]), - dfi.phases[0].rddata[1*databits+j].eq(dq_i_bs_o_d[1]), - dfi.phases[0].rddata[2*databits+j].eq(dq_i_bs_o_d[2]), - dfi.phases[0].rddata[3*databits+j].eq(dq_i_bs_o_d[3]), - ] - #with m.Elif(datavalid): - comb += [ - dfi.phases[1].rddata[0*databits+j].eq(dq_i_bs_o[0]), - dfi.phases[1].rddata[1*databits+j].eq(dq_i_bs_o[1]), - dfi.phases[1].rddata[2*databits+j].eq(dq_i_bs_o[2]), - dfi.phases[1].rddata[3*databits+j].eq(dq_i_bs_o[3]), - ] + with m.If(~datavalid_prev & datavalid): + m.d.sync += [ + dfi.phases[0].rddata[0*databits+j].eq(dq_i_data[0]), + dfi.phases[0].rddata[1*databits+j].eq(dq_i_data[1]), + dfi.phases[0].rddata[2*databits+j].eq(dq_i_data[2]), + dfi.phases[0].rddata[3*databits+j].eq(dq_i_data[3]), + ] + with m.Elif(datavalid): + m.d.sync += [ + dfi.phases[1].rddata[0*databits+j].eq(dq_i_data[0]), + dfi.phases[1].rddata[1*databits+j].eq(dq_i_data[1]), + dfi.phases[1].rddata[2*databits+j].eq(dq_i_data[2]), + dfi.phases[1].rddata[3*databits+j].eq(dq_i_data[3]), + ] # Read Control Path ------------------------------------------------------------------------ # Creates a shift register of read commands coming from the DFI interface. This shift register @@ -535,10 +526,12 @@ def elaborate(self, platform): rddata_en_last = Signal.like(rddata_en) m.d.comb += rddata_en.eq(Cat(dfi.phases[self.settings.rdphase].rddata_en, rddata_en_last)) m.d.sync += rddata_en_last.eq(rddata_en) - for phase in dfi.phases: - m.d.sync += phase.rddata_valid.eq(rddata_en[-1]) m.d.comb += dqs_re.eq(rddata_en[cl_sys_latency + 1] | rddata_en[cl_sys_latency + 2]) + rddata_valid = Signal() + m.d.sync += rddata_valid.eq(datavalid_prev & ~datavalid) + for phase in dfi.phases: + m.d.comb += phase.rddata_valid.eq(rddata_valid) # Write Control Path ----------------------------------------------------------------------- # Creates a shift register of write commands coming from the DFI interface. This shift register From c95ea6eb7b6cd75de7200778bd36bbb3db19bb3c Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Thu, 7 Apr 2022 13:38:59 -0500 Subject: [PATCH 30/47] Switch CRG back over to ECP5 version Memtest pass using external UART bridge --- examples/headless-versa-85.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/headless-versa-85.py b/examples/headless-versa-85.py index 24cb4d0..d2692e5 100644 --- a/examples/headless-versa-85.py +++ b/examples/headless-versa-85.py @@ -19,8 +19,8 @@ from gram.frontend.wishbone import gramWishbone from nmigen_boards.versa_ecp5 import VersaECP5Platform85 -#from ecp5_crg import ECP5CRG -from crg import ECPIX5CRG +from ecp5_crg import ECP5CRG +#from crg import ECPIX5CRG from uartbridge import UARTBridge from crg import * @@ -31,8 +31,8 @@ def __init__(self, *, self._decoder = wishbone.Decoder(addr_width=30, data_width=32, granularity=8, features={"cti", "bte"}) - self.crg = ECPIX5CRG() - #self.crg = ECP5CRG() + #self.crg = ECPIX5CRG() + self.crg = ECP5CRG() self.ub = UARTBridge(divisor=868, pins=platform.request("uart", 0)) From f0d033795543fd6c451c709713347aa5ce983202 Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Thu, 7 Apr 2022 13:39:38 -0500 Subject: [PATCH 31/47] Working at 50MHz system clock --- examples/headless-versa-85.py | 19 ++++++++++++++----- examples/headless/main.c | 11 ++++++++++- 2 files changed, 24 insertions(+), 6 deletions(-) diff --git a/examples/headless-versa-85.py b/examples/headless-versa-85.py index d2692e5..3050f89 100644 --- a/examples/headless-versa-85.py +++ b/examples/headless-versa-85.py @@ -31,23 +31,32 @@ def __init__(self, *, self._decoder = wishbone.Decoder(addr_width=30, data_width=32, granularity=8, features={"cti", "bte"}) + #desired_sys_clk_freq = 100e6 + #desired_sys_clk_freq = 90e6 + #desired_sys_clk_freq = 75e6 + #desired_sys_clk_freq = 70e6 + #desired_sys_clk_freq = 65e6 + #desired_sys_clk_freq = 60e6 + #desired_sys_clk_freq = 55e6 + desired_sys_clk_freq = 50e6 + #self.crg = ECPIX5CRG() - self.crg = ECP5CRG() + self.crg = ECP5CRG(sys_clk_freq=desired_sys_clk_freq) - self.ub = UARTBridge(divisor=868, pins=platform.request("uart", 0)) + self.ub = UARTBridge(divisor=int(desired_sys_clk_freq/115200), pins=platform.request("uart", 0)) ddr_pins = platform.request("ddr3", 0, dir={"dq":"-", "dqs":"-"}, xdr={"clk":4, "a":4, "ba":4, "clk_en":4, "odt":4, "ras":4, "cas":4, "we":4, "cs":4, "rst":1}) self.ddrphy = DomainRenamer("dramsync")(ECP5DDRPHY(ddr_pins)) self._decoder.add(self.ddrphy.bus, addr=ddrphy_addr) - ddrmodule = MT41K64M16(platform.default_clk_frequency, "1:2") + ddrmodule = MT41K64M16(self.crg.sys_clk_freq, "1:2") self.dramcore = DomainRenamer("dramsync")(gramCore( phy=self.ddrphy, geom_settings=ddrmodule.geom_settings, timing_settings=ddrmodule.timing_settings, - clk_freq=platform.default_clk_frequency)) + clk_freq=self.crg.sys_clk_freq)) self._decoder.add(self.dramcore.bus, addr=dramcore_addr) self.drambone = DomainRenamer("dramsync")(gramWishbone(self.dramcore)) @@ -55,7 +64,7 @@ def __init__(self, *, self.memory_map = self._decoder.bus.memory_map - self.clk_freq = platform.default_clk_frequency + self.clk_freq = self.crg.sys_clk_freq def elaborate(self, platform): m = Module() diff --git a/examples/headless/main.c b/examples/headless/main.c index 35e6b46..37d22bd 100644 --- a/examples/headless/main.c +++ b/examples/headless/main.c @@ -122,13 +122,22 @@ int main(int argc, char *argv[]) { #if 0 struct gramProfile profile = { .mode_registers = { - 0xb20, 0x806, 0x200, 0x0 + 0xb30, 0x806, 0x200, 0x0 }, .rdly_p0 = 2, .rdly_p1 = 2, }; #endif #if 1 + struct gramProfile profile = { + .mode_registers = { + 0xb20, 0x806, 0x200, 0x0 + }, + .rdly_p0 = 2, + .rdly_p1 = 2, + }; +#endif +#if 0 struct gramProfile profile = { .mode_registers = { 0x320, 0x6, 0x200, 0x0 From 725400f06b2346bd523169b698ecfa85a7b20c03 Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Thu, 7 Apr 2022 15:23:13 -0500 Subject: [PATCH 32/47] Backport litedram 05ed5bf59d31029d3f91c5a348cdd539a150631b phy/ecp5ddrphy: simplify using new get_sys_phase. --- gram/common.py | 2 ++ gram/phy/ecp5ddrphy.py | 10 +++++----- 2 files changed, 7 insertions(+), 5 deletions(-) diff --git a/gram/common.py b/gram/common.py index 7c5f449..c8bc6ad 100644 --- a/gram/common.py +++ b/gram/common.py @@ -54,6 +54,8 @@ def get_cl_cw(memtype, tck): def get_sys_latency(nphases, cas_latency): return math.ceil(cas_latency/nphases) +def get_sys_phase(nphases, sys_latency, cas_latency): + return sys_latency*nphases - cas_latency def get_sys_phases(nphases, sys_latency, cas_latency): dat_phase = sys_latency*nphases - cas_latency diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 513582e..585031d 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -154,8 +154,8 @@ def __init__(self, pads, sys_clk_freq=100e6): cl, cwl = get_cl_cw("DDR3", tck) cl_sys_latency = get_sys_latency(nphases, cl) cwl_sys_latency = get_sys_latency(nphases, cwl) - rdcmdphase, rdphase = get_sys_phases(nphases, cl_sys_latency, cl) - wrcmdphase, wrphase = get_sys_phases(nphases, cwl_sys_latency, cwl) + rdphase = get_sys_phase(nphases, cl_sys_latency, cl) + wrphase = get_sys_phase(nphases, cwl_sys_latency, cwl) self.settings = PhySettings( phytype="ECP5DDRPHY", memtype="DDR3", @@ -165,11 +165,11 @@ def __init__(self, pads, sys_clk_freq=100e6): nphases=nphases, rdphase=rdphase, wrphase=wrphase, - rdcmdphase=rdcmdphase, - wrcmdphase=wrcmdphase, + rdcmdphase = (rdphase - 1)%nphases, + wrcmdphase = (wrphase - 1)%nphases, cl=cl, cwl=cwl, - read_latency=2 + cl_sys_latency + 2 + log2_int(4//nphases) + 4, + read_latency = cl_sys_latency + 10, write_latency=cwl_sys_latency ) From ffdcef6b591e73932a97278e011834c8303731cc Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Thu, 7 Apr 2022 15:23:22 -0500 Subject: [PATCH 33/47] Re-apply part of 180026c72f0e1d3ef365b2214288d4a543a238dd The rank decoder inversion was incorrectly removed in commit 03e79da11c95b3fa3a2e55a4c08af8521c4d2283 Tested to give valid memtest output over UART bridge --- gram/core/multiplexer.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/gram/core/multiplexer.py b/gram/core/multiplexer.py index fe5fc6f..87ea8a9 100644 --- a/gram/core/multiplexer.py +++ b/gram/core/multiplexer.py @@ -173,9 +173,9 @@ def valid_and(cmd, attr): with m.If(sel == STEER_REFRESH): m.d.sync += phase.cs_n.eq(0) with m.Else(): - m.d.sync += phase.cs_n.eq(rank_decoder.o) + m.d.sync += phase.cs_n.eq(~rank_decoder.o) else: - m.d.sync += phase.cs_n.eq(rank_decoder.o) + m.d.sync += phase.cs_n.eq(~rank_decoder.o) m.d.sync += phase.bank.eq(Array(cmd.ba[:-rankbits] for cmd in self.commands)[sel]) else: m.d.sync += [ From 7cb3e51d2d2c7e1d71fc9c991697e1270f60358b Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Sat, 9 Apr 2022 15:00:47 -0500 Subject: [PATCH 34/47] Wire up missing CRG / DDR3 clock control / reset signals Swap DELAYF for DELAYG on DQ lines --- examples/ecp5_crg.py | 26 +++++++++++++++----------- gram/phy/ecp5ddrphy.py | 26 ++++++++++++-------------- 2 files changed, 27 insertions(+), 25 deletions(-) diff --git a/examples/ecp5_crg.py b/examples/ecp5_crg.py index 5c975d6..82ff66d 100644 --- a/examples/ecp5_crg.py +++ b/examples/ecp5_crg.py @@ -169,10 +169,14 @@ def elaborate(self, platform): class ECP5CRG(Elaboratable): - def __init__(self, sys_clk_freq=100e6, pod_bits=25): + def __init__(self, sys_clk_freq=100e6, pod_bits=16): self.sys_clk_freq = sys_clk_freq self.pod_bits = pod_bits + # DDR clock control signals + self.ddr_clk_stop = Signal() + self.ddr_clk_reset = Signal() + def elaborate(self, platform): m = Module() @@ -204,18 +208,18 @@ def elaborate(self, platform): i_GSR=gsr1), ] - # PLL - m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~reset) - - # Power-on delay (655us) + # Power-on delay podcnt = Signal(self.pod_bits, reset=-1) pod_done = Signal() - with m.If((podcnt != 0) & pll.locked): + with m.If(podcnt != 0): m.d.rawclk += podcnt.eq(podcnt-1) m.d.rawclk += pod_done.eq(podcnt == 0) + # PLL + m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~pod_done|~reset) + # Generating sync2x (200Mhz) and init (25Mhz) from extclk - cd_sync2x = ClockDomain("sync2x", local=False) + cd_sync2x = ClockDomain("sync2x", local=False, reset_less=True) cd_sync2x_unbuf = ClockDomain("sync2x_unbuf", local=False, reset_less=True) cd_init = ClockDomain("init", local=False) @@ -228,7 +232,7 @@ def elaborate(self, platform): pll.create_clkout(ClockSignal("init"), 25e6) m.submodules += Instance("ECLKSYNCB", i_ECLKI = ClockSignal("sync2x_unbuf"), - i_STOP = 0, + i_STOP = self.ddr_clk_stop, o_ECLKO = ClockSignal("sync2x")) m.domains += cd_sync2x_unbuf m.domains += cd_sync2x @@ -238,8 +242,8 @@ def elaborate(self, platform): reset_ok = Signal(reset_less=True) m.d.comb += reset_ok.eq(~pll.locked|~pod_done) m.d.comb += ResetSignal("init").eq(reset_ok) - m.d.comb += ResetSignal("sync").eq(reset_ok) - m.d.comb += ResetSignal("dramsync").eq(reset_ok) + m.d.comb += ResetSignal("sync").eq(reset_ok|self.ddr_clk_reset) + m.d.comb += ResetSignal("dramsync").eq(reset_ok|self.ddr_clk_reset) # # Generating sync (100Mhz) from sync2x @@ -247,7 +251,7 @@ def elaborate(self, platform): p_DIV="2.0", i_ALIGNWD=0, i_CLKI=ClockSignal("sync2x"), - i_RST=0, + i_RST=ResetSignal("dramsync"), o_CDIVX=ClockSignal("sync")) # temporarily set dram sync clock exactly equal to main sync diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 585031d..32a6792 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -52,15 +52,15 @@ def elaborate(self, platform): # DDRDLLA/DDQBUFM/ECLK initialization sequence --------------------------------------------- t = 8 # in cycles tl = Timeline([ - (1*t, [freeze.eq(1)]), # Freeze DDRDLLA - (2*t, [self.stop.eq(1)]), # Stop ECLK domain + (1*t, [ freeze.eq(1)]), # Freeze DDRDLLA + (2*t, [ self.stop.eq(1)]), # Stop ECLK domain (3*t, [self.reset.eq(1)]), # Reset ECLK domain (4*t, [self.reset.eq(0)]), # Release ECLK domain reset - (5*t, [self.stop.eq(0)]), # Release ECLK domain stop - (6*t, [freeze.eq(0)]), # Release DDRDLLA freeze + (5*t, [ self.stop.eq(0)]), # Release ECLK domain stop + (6*t, [ freeze.eq(0)]), # Release DDRDLLA freeze (7*t, [self.pause.eq(1)]), # Pause DQSBUFM - (8*t, [update.eq(1)]), # Update DDRDLLA - (9*t, [update.eq(0)]), # Release DDRDMMA update + (8*t, [ update.eq(1)]), # Update DDRDLLA + (9*t, [ update.eq(0)]), # Release DDRDMMA update (10*t, [self.pause.eq(0)]), # Release DQSBUFM pause ]) m.d.comb += tl.trigger.eq(lock & ~lock_d) # Trigger timeline on lock rising edge @@ -120,6 +120,7 @@ def __init__(self, pads, sys_clk_freq=100e6): self.pads = pads self._sys_clk_freq = sys_clk_freq + self.init = ECP5DDRPHYInit() databits = len(self.pads.dq.io) if databits % 8 != 0: @@ -190,7 +191,7 @@ def elaborate(self, platform): m.d.sync += burstdet_reg.eq(0) # Init ------------------------------------------------------------------------------------- - m.submodules.init = init = ECP5DDRPHYInit() + m.submodules.init = init = self.init # Parameters ------------------------------------------------------------------------------- cl, cwl = get_cl_cw("DDR3", tck) @@ -460,13 +461,10 @@ def elaborate(self, platform): i_D2=dq_o_data_muxed[2], i_D3=dq_o_data_muxed[3], o_Q=dq_o), - Instance("DELAYF", - p_DEL_MODE="DQS_ALIGNED_X2", - i_LOADN=1, - i_MOVE=0, - i_DIRECTION=0, - i_A=dq_i, - o_Z=dq_i_delayed), + Instance("DELAYG", + p_DEL_MODE = "DQS_ALIGNED_X2", + i_A = dq_i, + o_Z = dq_i_delayed), Instance("IDDRX2DQA", i_RST=ResetSignal("dramsync"), i_ECLK=ClockSignal("sync2x"), From 11d72971fc1656daa05abfa7ff33f697eb3d629f Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Sat, 9 Apr 2022 15:01:28 -0500 Subject: [PATCH 35/47] Avoid timing violation on ECP5 PHY PAUSE signal --- examples/headless/main.c | 10 ++++++---- gram/phy/ecp5ddrphy.py | 20 +++++++++++++++++++- libgram/src/dfii.c | 6 ++++++ 3 files changed, 31 insertions(+), 5 deletions(-) diff --git a/examples/headless/main.c b/examples/headless/main.c index 37d22bd..43bfab7 100644 --- a/examples/headless/main.c +++ b/examples/headless/main.c @@ -131,10 +131,10 @@ int main(int argc, char *argv[]) { #if 1 struct gramProfile profile = { .mode_registers = { - 0xb20, 0x806, 0x200, 0x0 + 0xb30, 0x806, 0x200, 0x0 }, - .rdly_p0 = 2, - .rdly_p1 = 2, + .rdly_p0 = 5, + .rdly_p1 = 5, }; #endif #if 0 @@ -167,7 +167,7 @@ int main(int argc, char *argv[]) { gram_init(&ctx, &profile, (void*)ddr_base, (void*)0x00009000, (void*)0x00008000); printf("done\n"); -#if 0 +#if 1 printf("Rdly\np0: "); for (size_t i = 0; i < 8; i++) { profile2.rdly_p0 = i; @@ -201,7 +201,9 @@ int main(int argc, char *argv[]) { fflush(stdout); } printf("\n"); +#endif +#if 0 printf("Auto calibrating... "); res = gram_generate_calibration(&ctx, &profile2); if (res != GRAM_ERR_NONE) { diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 32a6792..007c143 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -101,10 +101,28 @@ def elaborate(self, platform): with m.State("Idle"): with m.If(self.rdly_csr.w_stb): m.d.sync += self.pause.eq(1) - m.next = "RdlyUpdateRequested" + m.next = "RdlyUpdateRequestedDelay1" + + with m.State("RdlyUpdateRequestedDelay1"): + m.next = "RdlyUpdateRequestedDelay2" + + with m.State("RdlyUpdateRequestedDelay2"): + m.next = "RdlyUpdateRequestedDelay3" + + with m.State("RdlyUpdateRequestedDelay3"): + m.next = "RdlyUpdateRequested" with m.State("RdlyUpdateRequested"): m.d.sync += self.readclksel.eq(self.rdly_csr.w_data) + m.next = "ResetPauseDelay1" + + with m.State("ResetPauseDelay1"): + m.next = "ResetPauseDelay2" + + with m.State("ResetPauseDelay2"): + m.next = "ResetPauseDelay3" + + with m.State("ResetPauseDelay3"): m.next = "ResetPause" with m.State("ResetPause"): diff --git a/libgram/src/dfii.c b/libgram/src/dfii.c index 17e18da..8049a07 100644 --- a/libgram/src/dfii.c +++ b/libgram/src/dfii.c @@ -56,6 +56,12 @@ static void dfii_set_mr(const struct gramCtx *ctx, uint8_t mr, uint16_t val) { #define MR0_DLL_RESET (1 << 8) void dfii_initseq(const struct gramCtx *ctx, const struct gramProfile *profile) { + /* Assert reset */ + dfii_set_p0_address(ctx, 0x0); + dfii_set_p0_baddress(ctx, 0); + dfii_setcontrol(ctx, 0); + cdelay(50000); + /* Release reset */ dfii_set_p0_address(ctx, 0x0); dfii_set_p0_baddress(ctx, 0); From 689c8712c2581efde31e9f359c0741a4bcf2d92b Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Sat, 9 Apr 2022 15:18:23 -0500 Subject: [PATCH 36/47] Revert "Avoid timing violation on ECP5 PHY PAUSE signal" This reverts commit 11d72971fc1656daa05abfa7ff33f697eb3d629f. Inadvertently added debug garbage in this commit. --- examples/headless/main.c | 10 ++++------ gram/phy/ecp5ddrphy.py | 20 +------------------- libgram/src/dfii.c | 6 ------ 3 files changed, 5 insertions(+), 31 deletions(-) diff --git a/examples/headless/main.c b/examples/headless/main.c index 43bfab7..37d22bd 100644 --- a/examples/headless/main.c +++ b/examples/headless/main.c @@ -131,10 +131,10 @@ int main(int argc, char *argv[]) { #if 1 struct gramProfile profile = { .mode_registers = { - 0xb30, 0x806, 0x200, 0x0 + 0xb20, 0x806, 0x200, 0x0 }, - .rdly_p0 = 5, - .rdly_p1 = 5, + .rdly_p0 = 2, + .rdly_p1 = 2, }; #endif #if 0 @@ -167,7 +167,7 @@ int main(int argc, char *argv[]) { gram_init(&ctx, &profile, (void*)ddr_base, (void*)0x00009000, (void*)0x00008000); printf("done\n"); -#if 1 +#if 0 printf("Rdly\np0: "); for (size_t i = 0; i < 8; i++) { profile2.rdly_p0 = i; @@ -201,9 +201,7 @@ int main(int argc, char *argv[]) { fflush(stdout); } printf("\n"); -#endif -#if 0 printf("Auto calibrating... "); res = gram_generate_calibration(&ctx, &profile2); if (res != GRAM_ERR_NONE) { diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 007c143..32a6792 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -101,28 +101,10 @@ def elaborate(self, platform): with m.State("Idle"): with m.If(self.rdly_csr.w_stb): m.d.sync += self.pause.eq(1) - m.next = "RdlyUpdateRequestedDelay1" - - with m.State("RdlyUpdateRequestedDelay1"): - m.next = "RdlyUpdateRequestedDelay2" - - with m.State("RdlyUpdateRequestedDelay2"): - m.next = "RdlyUpdateRequestedDelay3" - - with m.State("RdlyUpdateRequestedDelay3"): - m.next = "RdlyUpdateRequested" + m.next = "RdlyUpdateRequested" with m.State("RdlyUpdateRequested"): m.d.sync += self.readclksel.eq(self.rdly_csr.w_data) - m.next = "ResetPauseDelay1" - - with m.State("ResetPauseDelay1"): - m.next = "ResetPauseDelay2" - - with m.State("ResetPauseDelay2"): - m.next = "ResetPauseDelay3" - - with m.State("ResetPauseDelay3"): m.next = "ResetPause" with m.State("ResetPause"): diff --git a/libgram/src/dfii.c b/libgram/src/dfii.c index 8049a07..17e18da 100644 --- a/libgram/src/dfii.c +++ b/libgram/src/dfii.c @@ -56,12 +56,6 @@ static void dfii_set_mr(const struct gramCtx *ctx, uint8_t mr, uint16_t val) { #define MR0_DLL_RESET (1 << 8) void dfii_initseq(const struct gramCtx *ctx, const struct gramProfile *profile) { - /* Assert reset */ - dfii_set_p0_address(ctx, 0x0); - dfii_set_p0_baddress(ctx, 0); - dfii_setcontrol(ctx, 0); - cdelay(50000); - /* Release reset */ dfii_set_p0_address(ctx, 0x0); dfii_set_p0_baddress(ctx, 0); From 6b7e293c39dcb17f9f435e6c1c27f4ee0b1f589f Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Sat, 9 Apr 2022 15:19:22 -0500 Subject: [PATCH 37/47] Avoid timing violation on ECP5 PHY PAUSE signal --- gram/phy/ecp5ddrphy.py | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 32a6792..007c143 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -101,10 +101,28 @@ def elaborate(self, platform): with m.State("Idle"): with m.If(self.rdly_csr.w_stb): m.d.sync += self.pause.eq(1) - m.next = "RdlyUpdateRequested" + m.next = "RdlyUpdateRequestedDelay1" + + with m.State("RdlyUpdateRequestedDelay1"): + m.next = "RdlyUpdateRequestedDelay2" + + with m.State("RdlyUpdateRequestedDelay2"): + m.next = "RdlyUpdateRequestedDelay3" + + with m.State("RdlyUpdateRequestedDelay3"): + m.next = "RdlyUpdateRequested" with m.State("RdlyUpdateRequested"): m.d.sync += self.readclksel.eq(self.rdly_csr.w_data) + m.next = "ResetPauseDelay1" + + with m.State("ResetPauseDelay1"): + m.next = "ResetPauseDelay2" + + with m.State("ResetPauseDelay2"): + m.next = "ResetPauseDelay3" + + with m.State("ResetPauseDelay3"): m.next = "ResetPause" with m.State("ResetPause"): From 423fdc3682f2b3a6c30df46af92e0f7aac617797 Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Sat, 9 Apr 2022 21:31:27 -0500 Subject: [PATCH 38/47] Put sysclk2x back under system reset control --- examples/ecp5_crg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ecp5_crg.py b/examples/ecp5_crg.py index 82ff66d..cda8a99 100644 --- a/examples/ecp5_crg.py +++ b/examples/ecp5_crg.py @@ -219,7 +219,7 @@ def elaborate(self, platform): m.submodules.pll = pll = PLL(ClockSignal("rawclk"), reset=~pod_done|~reset) # Generating sync2x (200Mhz) and init (25Mhz) from extclk - cd_sync2x = ClockDomain("sync2x", local=False, reset_less=True) + cd_sync2x = ClockDomain("sync2x", local=False) cd_sync2x_unbuf = ClockDomain("sync2x_unbuf", local=False, reset_less=True) cd_init = ClockDomain("init", local=False) From 8f6a40bb418e346a78af5a83a5bc20f0ed538d57 Mon Sep 17 00:00:00 2001 From: Raptor Engineering Development Team Date: Sun, 10 Apr 2022 03:39:52 -0500 Subject: [PATCH 39/47] Don't reset the core / peripherals on DRAM controller reset request --- examples/ecp5_crg.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/ecp5_crg.py b/examples/ecp5_crg.py index cda8a99..931168e 100644 --- a/examples/ecp5_crg.py +++ b/examples/ecp5_crg.py @@ -242,7 +242,7 @@ def elaborate(self, platform): reset_ok = Signal(reset_less=True) m.d.comb += reset_ok.eq(~pll.locked|~pod_done) m.d.comb += ResetSignal("init").eq(reset_ok) - m.d.comb += ResetSignal("sync").eq(reset_ok|self.ddr_clk_reset) + m.d.comb += ResetSignal("sync").eq(reset_ok) m.d.comb += ResetSignal("dramsync").eq(reset_ok|self.ddr_clk_reset) # # Generating sync (100Mhz) from sync2x From dedba0951797896c4f6c44238ebffc7e4472f0f2 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Thu, 14 Apr 2022 11:34:48 +0100 Subject: [PATCH 40/47] whilst IOpads and PLLs were driving from dramsync, they were *not* driving the 4x from dramsync2x, but from sync2x instead. which is completely wrong when trying to do asynchronous DRAM PHY for when synchronous is done (the default right now) this requires a matching drs = DomainRenamer({"sync": "dramsync", "sync2x": "dramsync2x"}) --- gram/phy/ecp5ddrphy.py | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index 007c143..e6e0fb1 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -40,7 +40,7 @@ def elaborate(self, platform): lock = Signal() lock_d = Signal() m.submodules += Instance("DDRDLLA", - i_CLK=ClockSignal("sync2x"), + i_CLK=ClockSignal("dramsync2x"), i_RST=ResetSignal("init"), i_UDDCNTLN=~update, i_FREEZE=freeze, @@ -224,7 +224,7 @@ def elaborate(self, platform): # Clock -------------------------------------------------------------------------------- m.d.comb += [ self.pads.clk.o_clk.eq(ClockSignal("dramsync")), - self.pads.clk.o_fclk.eq(ClockSignal("sync2x")), + self.pads.clk.o_fclk.eq(ClockSignal("dramsync2x")), ] for i in range(len(self.pads.clk.o0)): m.d.comb += [ @@ -237,9 +237,9 @@ def elaborate(self, platform): # Addresses and Commands --------------------------------------------------------------- m.d.comb += [ self.pads.a.o_clk.eq(ClockSignal("dramsync")), - self.pads.a.o_fclk.eq(ClockSignal("sync2x")), + self.pads.a.o_fclk.eq(ClockSignal("dramsync2x")), self.pads.ba.o_clk.eq(ClockSignal("dramsync")), - self.pads.ba.o_fclk.eq(ClockSignal("sync2x")), + self.pads.ba.o_fclk.eq(ClockSignal("dramsync2x")), ] for i in range(len(self.pads.a.o0)): m.d.comb += [ @@ -278,7 +278,7 @@ def elaborate(self, platform): else: m.d.comb += [ pad.o_clk.eq(ClockSignal("dramsync")), - pad.o_fclk.eq(ClockSignal("sync2x")), + pad.o_fclk.eq(ClockSignal("dramsync2x")), ] if name == "reset": for i in range(len(pad.o)): @@ -343,7 +343,7 @@ def elaborate(self, platform): # Clocks / Reset i_SCLK=ClockSignal("sync"), - i_ECLK=ClockSignal("sync2x"), + i_ECLK=ClockSignal("dramsync2x"), i_RST=ResetSignal("dramsync"), i_DDRDEL=init.delay, i_PAUSE=init.pause | dqsbufm_manager.pause, @@ -405,7 +405,7 @@ def elaborate(self, platform): m.submodules += Instance("ODDRX2DQA", i_RST=ResetSignal("dramsync"), - i_ECLK=ClockSignal("sync2x"), + i_ECLK=ClockSignal("dramsync2x"), i_SCLK=ClockSignal("dramsync"), i_DQSW270=dqsw270, i_D0=dm_o_data_muxed[0], @@ -419,7 +419,7 @@ def elaborate(self, platform): m.submodules += [ Instance("ODDRX2DQSB", i_RST=ResetSignal("dramsync"), - i_ECLK=ClockSignal("sync2x"), + i_ECLK=ClockSignal("dramsync2x"), i_SCLK=ClockSignal(), i_DQSW=dqsw, i_D0=0, @@ -429,7 +429,7 @@ def elaborate(self, platform): o_Q=dqs), Instance("TSHX2DQSA", i_RST=ResetSignal("dramsync"), - i_ECLK=ClockSignal("sync2x"), + i_ECLK=ClockSignal("dramsync2x"), i_SCLK=ClockSignal(), i_DQSW=dqsw, i_T0=~(dqs_oe | dqs_postamble), @@ -471,7 +471,7 @@ def elaborate(self, platform): m.submodules += [ Instance("ODDRX2DQA", i_RST=ResetSignal("dramsync"), - i_ECLK=ClockSignal("sync2x"), + i_ECLK=ClockSignal("dramsync2x"), i_SCLK=ClockSignal(), i_DQSW270=dqsw270, i_D0=dq_o_data_muxed[0], @@ -485,7 +485,7 @@ def elaborate(self, platform): o_Z = dq_i_delayed), Instance("IDDRX2DQA", i_RST=ResetSignal("dramsync"), - i_ECLK=ClockSignal("sync2x"), + i_ECLK=ClockSignal("dramsync2x"), i_SCLK=ClockSignal(), i_DQSR90=dqsr90, i_RDPNTR0=rdpntr[0], @@ -501,7 +501,7 @@ def elaborate(self, platform): o_Q3=dq_i_data[3]), Instance("TSHX2DQA", i_RST=ResetSignal("dramsync"), - i_ECLK=ClockSignal("sync2x"), + i_ECLK=ClockSignal("dramsync2x"), i_SCLK=ClockSignal(), i_DQSW270=dqsw270, i_T0=~dq_oe, From 1765baa4d77985ac39a9503ddf833c4b7ee644e0 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 15 Apr 2022 10:50:33 +0100 Subject: [PATCH 41/47] connect (new) reset signal on IOPads which comes from the nmigen Pin. this had to be done because otherwise the IOPads are unstable. next experiment is to hook ResetSignal(dramsync) with the firmware-driven reset, which should allow the IOpads - and DQS - to fully stabilise (oh, and also allow retries on setting them up) --- gram/phy/ecp5ddrphy.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index e6e0fb1..d7d68b4 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -224,6 +224,7 @@ def elaborate(self, platform): # Clock -------------------------------------------------------------------------------- m.d.comb += [ self.pads.clk.o_clk.eq(ClockSignal("dramsync")), + self.pads.clk.o_prst.eq(ResetSignal("dramsync")), self.pads.clk.o_fclk.eq(ClockSignal("dramsync2x")), ] for i in range(len(self.pads.clk.o0)): @@ -237,6 +238,7 @@ def elaborate(self, platform): # Addresses and Commands --------------------------------------------------------------- m.d.comb += [ self.pads.a.o_clk.eq(ClockSignal("dramsync")), + self.pads.a.o_prst.eq(ResetSignal("dramsync")), self.pads.a.o_fclk.eq(ClockSignal("dramsync2x")), self.pads.ba.o_clk.eq(ClockSignal("dramsync")), self.pads.ba.o_fclk.eq(ClockSignal("dramsync2x")), @@ -278,6 +280,7 @@ def elaborate(self, platform): else: m.d.comb += [ pad.o_clk.eq(ClockSignal("dramsync")), + pad.o_prst.eq(ResetSignal("dramsync")), pad.o_fclk.eq(ClockSignal("dramsync2x")), ] if name == "reset": From aa538b61e229aad7ecceb0569a44bfd6bf54f06c Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 15 Apr 2022 13:13:46 +0100 Subject: [PATCH 42/47] fix reset to be xdr=4x in ECP5DDRPHY --- gram/phy/ecp5ddrphy.py | 25 ++++++++++++------------- 1 file changed, 12 insertions(+), 13 deletions(-) diff --git a/gram/phy/ecp5ddrphy.py b/gram/phy/ecp5ddrphy.py index d7d68b4..e1d1147 100644 --- a/gram/phy/ecp5ddrphy.py +++ b/gram/phy/ecp5ddrphy.py @@ -235,6 +235,14 @@ def elaborate(self, platform): self.pads.clk.o3[i].eq(1), ] + # Reset signal ------------------------ + + rst = Signal(reset_less=True) + drs = ResetSignal("dramsync") + m.d.comb += rst.eq(drs) + #if hasattr(self.pads, "rst"): + + # Addresses and Commands --------------------------------------------------------------- m.d.comb += [ self.pads.a.o_clk.eq(ClockSignal("dramsync")), @@ -273,23 +281,14 @@ def elaborate(self, platform): # dfi.Interface it is "reset" dfi2pads = {'rst': 'reset', 'cs': 'cs_n'} name = dfi2pads.get(name, name) # remap if exists - if name == "reset": - m.d.comb += [ - pad.o_clk.eq(ClockSignal("sync")), - ] - else: - m.d.comb += [ + m.d.comb += [ pad.o_clk.eq(ClockSignal("dramsync")), pad.o_prst.eq(ResetSignal("dramsync")), pad.o_fclk.eq(ClockSignal("dramsync2x")), ] - if name == "reset": - for i in range(len(pad.o)): - m.d.comb += [ - pad.o[i].eq(getattr(dfi.phases[0], name)[i]), - ] - elif name == "cs_n": - # cs_n can't be directly connected to cs without being inverted first... + if name == "cs_n": + # cs_n can't be directly connected to cs without + # being inverted first... for i in range(len(pad.o0)): m.d.comb += [ pad.o0[i].eq(~getattr(dfi.phases[0], name)[i]), From c2a19a50f48686fc589da2b461c4e15303c378f5 Mon Sep 17 00:00:00 2001 From: Luke Kenneth Casson Leighton Date: Fri, 15 Apr 2022 15:58:28 +0100 Subject: [PATCH 43/47] remove pyvcd dependency, it is pulled in by nmigen anyway --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index 8f7c7eb..89c5dab 100755 --- a/setup.py +++ b/setup.py @@ -26,7 +26,7 @@ packages=find_packages(exclude=("test*", "doc*", "examples*", "contrib*", "libgram*")), include_package_data=True, python_requires="~=3.7", - install_requires=['nmigen', 'nmigen_boards', 'pyvcd'], + install_requires=['nmigen', 'nmigen_boards'], setup_requires=['setuptools', 'setuptools_scm'], entry_points={}, From dcb069317f801f131f2757bb0d1ea51739a071a8 Mon Sep 17 00:00:00 2001 From: Cesar Strauss Date: Sat, 23 Jul 2022 13:59:10 -0300 Subject: [PATCH 44/47] Merge runsimsoc2.sh into runsimsoc.sh The only difference was a different location of the ECP5 model files and the use of python3. Make it so that both locations work, and use python3 exclusively. --- gram/simulation/.gitignore | 3 +++ gram/simulation/runsimsoc.sh | 15 +++++++++++++-- gram/simulation/runsimsoc2.sh | 14 -------------- 3 files changed, 16 insertions(+), 16 deletions(-) delete mode 100755 gram/simulation/runsimsoc2.sh diff --git a/gram/simulation/.gitignore b/gram/simulation/.gitignore index 5bd86a4..2cdcaaa 100644 --- a/gram/simulation/.gitignore +++ b/gram/simulation/.gitignore @@ -13,3 +13,6 @@ build_simcrg/ # Patched files DDRDLLA.v + +# ECP5 model files +ecp5u diff --git a/gram/simulation/runsimsoc.sh b/gram/simulation/runsimsoc.sh index 1722be7..f1338e9 100755 --- a/gram/simulation/runsimsoc.sh +++ b/gram/simulation/runsimsoc.sh @@ -1,9 +1,20 @@ #!/bin/bash set -e -LIB_DIR=/usr/local/diamond/3.11_x64/ispfpga/verilog/data/ecp5u +# Check for presence of the Diamond ECP5 verilog model files +LIB_DIR=./ecp5u +if [ ! -d "$LIB_DIR" ]; then + LIB_DIR=/usr/local/diamond/3.11_x64/ispfpga/verilog/data/ecp5u + if [ ! -d "$LIB_DIR" ]; then + echo "Error: Could not find the ECP5 verilog models." >&2 + echo >&2 + echo "Please either install Diamond (in /usr/local), " >&2 + echo "or copy its ecp5u directory ($LIB_DIR) here." >&2 + exit 1 + fi +fi -python simsoc.py +python3 simsoc.py yosys simsoc.ys cp ${LIB_DIR}/DDRDLLA.v DDRDLLA.v patch DDRDLLA.v < DDRDLLA.patch diff --git a/gram/simulation/runsimsoc2.sh b/gram/simulation/runsimsoc2.sh deleted file mode 100755 index 4f97052..0000000 --- a/gram/simulation/runsimsoc2.sh +++ /dev/null @@ -1,14 +0,0 @@ -#!/bin/bash -set -e - -LIB_DIR=./ecp5u - -python3 simsoc.py -yosys simsoc.ys -cp ${LIB_DIR}/DDRDLLA.v DDRDLLA.v -patch DDRDLLA.v < DDRDLLA.patch -iverilog -Wall -g2012 -s simsoctb -o simsoc simsoctb.v build_simsoc/top.v dram_model/ddr3.v ${LIB_DIR}/ECLKSYNCB.v ${LIB_DIR}/EHXPLLL.v ${LIB_DIR}/PUR.v ${LIB_DIR}/GSR.v \ - ${LIB_DIR}/FD1S3AX.v ${LIB_DIR}/SGSR.v ${LIB_DIR}/ODDRX2F.v ${LIB_DIR}/ODDRX2DQA.v ${LIB_DIR}/DELAYF.v ${LIB_DIR}/BB.v ${LIB_DIR}/OB.v ${LIB_DIR}/IB.v \ - ${LIB_DIR}/DQSBUFM.v ${LIB_DIR}/UDFDL5_UDP_X.v ${LIB_DIR}/TSHX2DQSA.v ${LIB_DIR}/TSHX2DQA.v ${LIB_DIR}/ODDRX2DQSB.v ${LIB_DIR}/IDDRX2DQA.v DDRDLLA.v \ - ${LIB_DIR}/CLKDIVF.v -vvp -n simsoc -fst-speed From b33a894b75cf78667f9c42904a1ae9f12130473d Mon Sep 17 00:00:00 2001 From: Cesar Strauss Date: Sat, 23 Jul 2022 14:14:53 -0300 Subject: [PATCH 45/47] Use DELAYG instead of DELAYF on Icarus simulation They were swapped, at some point, but simulation was not kept in sync. --- gram/simulation/runsimsoc.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/gram/simulation/runsimsoc.sh b/gram/simulation/runsimsoc.sh index f1338e9..e847c79 100755 --- a/gram/simulation/runsimsoc.sh +++ b/gram/simulation/runsimsoc.sh @@ -19,7 +19,7 @@ yosys simsoc.ys cp ${LIB_DIR}/DDRDLLA.v DDRDLLA.v patch DDRDLLA.v < DDRDLLA.patch iverilog -Wall -g2012 -s simsoctb -o simsoc simsoctb.v build_simsoc/top.v dram_model/ddr3.v ${LIB_DIR}/ECLKSYNCB.v ${LIB_DIR}/EHXPLLL.v ${LIB_DIR}/PUR.v ${LIB_DIR}/GSR.v \ - ${LIB_DIR}/FD1S3AX.v ${LIB_DIR}/SGSR.v ${LIB_DIR}/ODDRX2F.v ${LIB_DIR}/ODDRX2DQA.v ${LIB_DIR}/DELAYF.v ${LIB_DIR}/BB.v ${LIB_DIR}/OB.v ${LIB_DIR}/IB.v \ + ${LIB_DIR}/FD1S3AX.v ${LIB_DIR}/SGSR.v ${LIB_DIR}/ODDRX2F.v ${LIB_DIR}/ODDRX2DQA.v ${LIB_DIR}/DELAYG.v ${LIB_DIR}/BB.v ${LIB_DIR}/OB.v ${LIB_DIR}/IB.v \ ${LIB_DIR}/DQSBUFM.v ${LIB_DIR}/UDFDL5_UDP_X.v ${LIB_DIR}/TSHX2DQSA.v ${LIB_DIR}/TSHX2DQA.v ${LIB_DIR}/ODDRX2DQSB.v ${LIB_DIR}/IDDRX2DQA.v DDRDLLA.v \ ${LIB_DIR}/CLKDIVF.v vvp -n simsoc -fst-speed From 2818912ef6c549eb90814981ae5e4e9a634a9473 Mon Sep 17 00:00:00 2001 From: Cesar Strauss Date: Sat, 23 Jul 2022 14:29:45 -0300 Subject: [PATCH 46/47] Do not invert DDR3 CS pin on Icarus testbench It seems that IcarusECPIX5Platform does handle PinsN correctly. --- gram/simulation/simsoctb.v | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/gram/simulation/simsoctb.v b/gram/simulation/simsoctb.v index 7818b4e..388d1a7 100644 --- a/gram/simulation/simsoctb.v +++ b/gram/simulation/simsoctb.v @@ -43,8 +43,6 @@ module simsoctb; wire [1:0] dram_tdqs_n; wire dram_rst; - // anything here with "_n" has to be inverted. nmigen platforms - // sort that out by inverting (with PinsN) ddr3 #( .check_strict_timing(0) ) ram_chip ( @@ -52,7 +50,7 @@ module simsoctb; .ck(dram_ck), .ck_n(~dram_ck), .cke(dram_cke), - .cs_n(~dram_cs_n), + .cs_n(dram_cs_n), .ras_n(dram_ras_n), .cas_n(dram_cas_n), .we_n(dram_we_n), From 60ec1954c4a2d2fbd6040e175404fe4f12ab9f1a Mon Sep 17 00:00:00 2001 From: Cesar Strauss Date: Sat, 23 Jul 2022 18:38:12 -0300 Subject: [PATCH 47/47] Remove unused Minerva CPU import from headless examples The headless examples do not use an embedded CPU. Instead, the host computer commands the Gram controller via a Wishbone-UART bridge. --- examples/headless-ecpix5.py | 1 - examples/headless-versa-85.py | 1 - 2 files changed, 2 deletions(-) diff --git a/examples/headless-ecpix5.py b/examples/headless-ecpix5.py index 8e1bd20..da7333b 100644 --- a/examples/headless-ecpix5.py +++ b/examples/headless-ecpix5.py @@ -4,7 +4,6 @@ from nmigen.lib.cdc import ResetSynchronizer from nmigen_soc import wishbone, memory -from lambdasoc.cpu.minerva import MinervaCPU from lambdasoc.periph.intc import GenericInterruptController from lambdasoc.periph.serial import AsyncSerialPeripheral from lambdasoc.periph.sram import SRAMPeripheral diff --git a/examples/headless-versa-85.py b/examples/headless-versa-85.py index 3050f89..371a868 100644 --- a/examples/headless-versa-85.py +++ b/examples/headless-versa-85.py @@ -5,7 +5,6 @@ from nmigen.lib.cdc import ResetSynchronizer from nmigen_soc import wishbone, memory -from lambdasoc.cpu.minerva import MinervaCPU from lambdasoc.periph.intc import GenericInterruptController from lambdasoc.periph.serial import AsyncSerialPeripheral from lambdasoc.periph.sram import SRAMPeripheral