From ad6c1a206959f7257ee3826078463a5998c505df Mon Sep 17 00:00:00 2001 From: Matthias Goergens Date: Wed, 4 Dec 2024 14:35:44 +0800 Subject: [PATCH 1/3] Move to Harvard Architecture --- ceno_emul/src/rv32im.rs | 24 ++++++++++++++++++------ ceno_emul/src/tracer.rs | 3 +++ ceno_emul/src/vm_state.rs | 12 ++++++++---- ceno_emul/tests/test_vm_trace.rs | 2 +- 4 files changed, 30 insertions(+), 11 deletions(-) diff --git a/ceno_emul/src/rv32im.rs b/ceno_emul/src/rv32im.rs index 4554bdb08..a830f2721 100644 --- a/ceno_emul/src/rv32im.rs +++ b/ceno_emul/src/rv32im.rs @@ -59,11 +59,10 @@ pub trait EmuContext { // Get the value of a memory word without side-effects. fn peek_memory(&self, addr: WordAddr) -> Word; - // Load from memory, in the context of instruction fetching. - // Only called after check_insn_load returns true. - fn fetch(&mut self, pc: WordAddr) -> Result { - self.load_memory(pc) - } + /// Load from instruction cache + // TODO(Matthias): this should really return `Result` + // because the instruction cache should contain instructions, not just words. + fn fetch(&mut self, pc: WordAddr) -> Option; // Check access for instruction load fn check_insn_load(&self, _addr: ByteAddr) -> bool { @@ -465,12 +464,25 @@ impl Emulator { pub fn step(&self, ctx: &mut C) -> Result<()> { let pc = ctx.get_pc(); + // TODO(Matthias): `check_insn_load` should be unnecessary: we can statically + // check in `fn new_from_elf` that the program only has instructions where + // our platform accepts them. if !ctx.check_insn_load(pc) { ctx.trap(TrapCause::InstructionAccessFault)?; return Err(anyhow!("Fatal: could not fetch instruction at pc={:?}", pc)); } + let Some(word) = ctx.fetch(pc.waddr()) else { + ctx.trap(TrapCause::InstructionAccessFault)?; + return Err(anyhow!("Fatal: could not fetch instruction at pc={:?}", pc)); + }; - let word = ctx.fetch(pc.waddr())?; + // TODO(Matthias): our `Program` that we are fetching from should really store + // already `DecodedInstruction`s, instead of doing this very weird, very partial checking. + // + // [Note how we can fail here with an IllegalInstruction, and again further down + // when we match against the decoded instruction. We should centralise that. And + // our `step` function here shouldn't need to know anything about how instruction + // are encoded as numbers. if word & 0x03 != 0x03 { // Opcode must end in 0b11 in RV32IM. ctx.trap(TrapCause::IllegalInstruction(word))?; diff --git a/ceno_emul/src/tracer.rs b/ceno_emul/src/tracer.rs index bbb1e6a51..a1df363e2 100644 --- a/ceno_emul/src/tracer.rs +++ b/ceno_emul/src/tracer.rs @@ -327,6 +327,9 @@ impl Tracer { self.record.pc.after = pc; } + // TODO(Matthias): this should perhaps record `DecodedInstruction`s instead + // of raw codes, or perhaps only the pc, because we can always look up the + // instruction in the program. pub fn fetch(&mut self, pc: WordAddr, value: Word) { self.record.pc.before = pc.baddr(); self.record.insn_code = value; diff --git a/ceno_emul/src/vm_state.rs b/ceno_emul/src/vm_state.rs index d8eff9701..8f1746d79 100644 --- a/ceno_emul/src/vm_state.rs +++ b/ceno_emul/src/vm_state.rs @@ -190,10 +190,14 @@ impl EmuContext for VMState { *self.memory.get(&addr).unwrap_or(&0) } - fn fetch(&mut self, pc: WordAddr) -> Result { - let value = self.peek_memory(pc); - self.tracer.fetch(pc, value); - Ok(value) + // TODO(Matthias): this should really return `Result` + fn fetch(&mut self, pc: WordAddr) -> Option { + let byte_pc: ByteAddr = pc.into(); + let relative_pc = byte_pc.0.wrapping_sub(self.program.base_address); + let idx = (relative_pc / WORD_SIZE as u32) as usize; + let word = self.program.instructions.get(idx).copied()?; + self.tracer.fetch(pc, word); + Some(word) } fn check_data_load(&self, addr: ByteAddr) -> bool { diff --git a/ceno_emul/tests/test_vm_trace.rs b/ceno_emul/tests/test_vm_trace.rs index 2da543108..d931a6a9c 100644 --- a/ceno_emul/tests/test_vm_trace.rs +++ b/ceno_emul/tests/test_vm_trace.rs @@ -54,7 +54,7 @@ fn test_empty_program() -> Result<()> { ); let mut ctx = VMState::new(CENO_PLATFORM, empty_program); let res = run(&mut ctx); - assert!(matches!(res, Err(e) if e.to_string().contains("IllegalInstruction(0)"))); + assert!(matches!(res, Err(e) if e.to_string().contains("InstructionAccessFault")),); Ok(()) } From 1b4625520eaa1a871b41a070c89de23d30cd0841 Mon Sep 17 00:00:00 2001 From: Matthias Goergens Date: Wed, 4 Dec 2024 14:48:45 +0800 Subject: [PATCH 2/3] Comment --- ceno_emul/src/rv32im.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/ceno_emul/src/rv32im.rs b/ceno_emul/src/rv32im.rs index a830f2721..0b53f1afd 100644 --- a/ceno_emul/src/rv32im.rs +++ b/ceno_emul/src/rv32im.rs @@ -477,9 +477,10 @@ impl Emulator { }; // TODO(Matthias): our `Program` that we are fetching from should really store - // already `DecodedInstruction`s, instead of doing this very weird, very partial checking. + // already decoded instructions, instead of doing this weird, partial checking + // for `0x03` here. // - // [Note how we can fail here with an IllegalInstruction, and again further down + // Note how we can fail here with an IllegalInstruction, and again further down // when we match against the decoded instruction. We should centralise that. And // our `step` function here shouldn't need to know anything about how instruction // are encoded as numbers. From eb1cda5256fcff3de753fccea723ab0be89cafa5 Mon Sep 17 00:00:00 2001 From: Matthias Goergens Date: Wed, 4 Dec 2024 14:49:50 +0800 Subject: [PATCH 3/3] Explain --- ceno_emul/src/rv32im.rs | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ceno_emul/src/rv32im.rs b/ceno_emul/src/rv32im.rs index 0b53f1afd..e065589fd 100644 --- a/ceno_emul/src/rv32im.rs +++ b/ceno_emul/src/rv32im.rs @@ -484,6 +484,9 @@ impl Emulator { // when we match against the decoded instruction. We should centralise that. And // our `step` function here shouldn't need to know anything about how instruction // are encoded as numbers. + // + // One way to centralise is to do the check once when loading the program from the + // ELF. if word & 0x03 != 0x03 { // Opcode must end in 0b11 in RV32IM. ctx.trap(TrapCause::IllegalInstruction(word))?;