From ad6c1a206959f7257ee3826078463a5998c505df Mon Sep 17 00:00:00 2001
From: Matthias Goergens <matthias.goergens@gmail.com>
Date: Wed, 4 Dec 2024 14:35:44 +0800
Subject: [PATCH 1/3] Move to Harvard Architecture

---
 ceno_emul/src/rv32im.rs          | 24 ++++++++++++++++++------
 ceno_emul/src/tracer.rs          |  3 +++
 ceno_emul/src/vm_state.rs        | 12 ++++++++----
 ceno_emul/tests/test_vm_trace.rs |  2 +-
 4 files changed, 30 insertions(+), 11 deletions(-)
diff --git a/ceno_emul/src/rv32im.rs b/ceno_emul/src/rv32im.rs
index 4554bdb08..a830f2721 100644
--- a/ceno_emul/src/rv32im.rs
+++ b/ceno_emul/src/rv32im.rs
@@ -59,11 +59,10 @@ pub trait EmuContext {
     // Get the value of a memory word without side-effects.
     fn peek_memory(&self, addr: WordAddr) -> Word;
 
-    // Load from memory, in the context of instruction fetching.
-    // Only called after check_insn_load returns true.
-    fn fetch(&mut self, pc: WordAddr) -> Result<Word> {
-        self.load_memory(pc)
-    }
+    /// Load from instruction cache
+    // TODO(Matthias): this should really return `Result<DecodedInstruction>`
+    // because the instruction cache should contain instructions, not just words.
+    fn fetch(&mut self, pc: WordAddr) -> Option<Word>;
 
     // Check access for instruction load
     fn check_insn_load(&self, _addr: ByteAddr) -> bool {
@@ -465,12 +464,25 @@ impl Emulator {
     pub fn step<C: EmuContext>(&self, ctx: &mut C) -> Result<()> {
         let pc = ctx.get_pc();
 
+        // TODO(Matthias): `check_insn_load` should be unnecessary: we can statically
+        // check in `fn new_from_elf` that the program only has instructions where
+        // our platform accepts them.
         if !ctx.check_insn_load(pc) {
             ctx.trap(TrapCause::InstructionAccessFault)?;
             return Err(anyhow!("Fatal: could not fetch instruction at pc={:?}", pc));
         }
+        let Some(word) = ctx.fetch(pc.waddr()) else {
+            ctx.trap(TrapCause::InstructionAccessFault)?;
+            return Err(anyhow!("Fatal: could not fetch instruction at pc={:?}", pc));
+        };
 
-        let word = ctx.fetch(pc.waddr())?;
+        // TODO(Matthias): our `Program` that we are fetching from should really store
+        // already `DecodedInstruction`s, instead of doing this very weird, very partial checking.
+        //
+        // [Note how we can fail here with an IllegalInstruction, and again further down
+        // when we match against the decoded instruction. We should centralise that. And
+        // our `step` function here shouldn't need to know anything about how instruction
+        // are encoded as numbers.
         if word & 0x03 != 0x03 {
             // Opcode must end in 0b11 in RV32IM.
             ctx.trap(TrapCause::IllegalInstruction(word))?;
diff --git a/ceno_emul/src/tracer.rs b/ceno_emul/src/tracer.rs
index bbb1e6a51..a1df363e2 100644
--- a/ceno_emul/src/tracer.rs
+++ b/ceno_emul/src/tracer.rs
@@ -327,6 +327,9 @@ impl Tracer {
         self.record.pc.after = pc;
     }
 
+    // TODO(Matthias): this should perhaps record `DecodedInstruction`s instead
+    // of raw codes, or perhaps only the pc, because we can always look up the
+    // instruction in the program.
     pub fn fetch(&mut self, pc: WordAddr, value: Word) {
         self.record.pc.before = pc.baddr();
         self.record.insn_code = value;
diff --git a/ceno_emul/src/vm_state.rs b/ceno_emul/src/vm_state.rs
index d8eff9701..8f1746d79 100644
--- a/ceno_emul/src/vm_state.rs
+++ b/ceno_emul/src/vm_state.rs
@@ -190,10 +190,14 @@ impl EmuContext for VMState {
         *self.memory.get(&addr).unwrap_or(&0)
     }
 
-    fn fetch(&mut self, pc: WordAddr) -> Result<Word> {
-        let value = self.peek_memory(pc);
-        self.tracer.fetch(pc, value);
-        Ok(value)
+    // TODO(Matthias): this should really return `Result<DecodedInstruction>`
+    fn fetch(&mut self, pc: WordAddr) -> Option<Word> {
+        let byte_pc: ByteAddr = pc.into();
+        let relative_pc = byte_pc.0.wrapping_sub(self.program.base_address);
+        let idx = (relative_pc / WORD_SIZE as u32) as usize;
+        let word = self.program.instructions.get(idx).copied()?;
+        self.tracer.fetch(pc, word);
+        Some(word)
     }
 
     fn check_data_load(&self, addr: ByteAddr) -> bool {
diff --git a/ceno_emul/tests/test_vm_trace.rs b/ceno_emul/tests/test_vm_trace.rs
index 2da543108..d931a6a9c 100644
--- a/ceno_emul/tests/test_vm_trace.rs
+++ b/ceno_emul/tests/test_vm_trace.rs
@@ -54,7 +54,7 @@ fn test_empty_program() -> Result<()> {
     );
     let mut ctx = VMState::new(CENO_PLATFORM, empty_program);
     let res = run(&mut ctx);
-    assert!(matches!(res, Err(e) if e.to_string().contains("IllegalInstruction(0)")));
+    assert!(matches!(res, Err(e) if e.to_string().contains("InstructionAccessFault")),);
     Ok(())
 }
 

From 1b4625520eaa1a871b41a070c89de23d30cd0841 Mon Sep 17 00:00:00 2001
From: Matthias Goergens <matthias.goergens@gmail.com>
Date: Wed, 4 Dec 2024 14:48:45 +0800
Subject: [PATCH 2/3] Comment

---
 ceno_emul/src/rv32im.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/ceno_emul/src/rv32im.rs b/ceno_emul/src/rv32im.rs
index a830f2721..0b53f1afd 100644
--- a/ceno_emul/src/rv32im.rs
+++ b/ceno_emul/src/rv32im.rs
@@ -477,9 +477,10 @@ impl Emulator {
         };
 
         // TODO(Matthias): our `Program` that we are fetching from should really store
-        // already `DecodedInstruction`s, instead of doing this very weird, very partial checking.
+        // already decoded instructions, instead of doing this weird, partial checking
+        // for `0x03` here.
         //
-        // [Note how we can fail here with an IllegalInstruction, and again further down
+        // Note how we can fail here with an IllegalInstruction, and again further down
         // when we match against the decoded instruction. We should centralise that. And
         // our `step` function here shouldn't need to know anything about how instruction
         // are encoded as numbers.

From eb1cda5256fcff3de753fccea723ab0be89cafa5 Mon Sep 17 00:00:00 2001
From: Matthias Goergens <matthias.goergens@gmail.com>
Date: Wed, 4 Dec 2024 14:49:50 +0800
Subject: [PATCH 3/3] Explain

---
 ceno_emul/src/rv32im.rs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/ceno_emul/src/rv32im.rs b/ceno_emul/src/rv32im.rs
index 0b53f1afd..e065589fd 100644
--- a/ceno_emul/src/rv32im.rs
+++ b/ceno_emul/src/rv32im.rs
@@ -484,6 +484,9 @@ impl Emulator {
         // when we match against the decoded instruction. We should centralise that. And
         // our `step` function here shouldn't need to know anything about how instruction
         // are encoded as numbers.
+        //
+        // One way to centralise is to do the check once when loading the program from the
+        // ELF.
         if word & 0x03 != 0x03 {
             // Opcode must end in 0b11 in RV32IM.
             ctx.trap(TrapCause::IllegalInstruction(word))?;