diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index b882ca931..8bcc5dfed 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -8,6 +8,10 @@ on:
     branches:
       - master
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
+
 jobs:
   skip_check:
     runs-on: [self-hosted, Linux, X64]
@@ -33,9 +37,9 @@ jobs:
     runs-on: [self-hosted, Linux, X64]
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Cargo cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: |
             ~/.cargo/bin/
@@ -43,18 +47,17 @@ jobs:
             ~/.cargo/registry/cache/
             ~/.cargo/git/db/
             target/
+            guest/target/
           key: integration-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
       - uses: dtolnay/rust-toolchain@nightly
 
       - name: Run example
         env:
-          RAYON_NUM_THREADS: 2
           RUSTFLAGS: "-C opt-level=3"
         run: cargo run --package ceno_zkvm --example riscv_opcodes -- --start 10 --end 11
 
       - name: Run fibonacci
         env:
-          RAYON_NUM_THREADS: 8
           RUST_LOG: debug
           RUSTFLAGS: "-C opt-level=3"
         run: cargo run --package ceno_zkvm --bin e2e -- --platform=sp1 ceno_zkvm/examples/fibonacci.elf
diff --git a/.github/workflows/lints.yml b/.github/workflows/lints.yml
index fe7367a54..1f8d41b0b 100644
--- a/.github/workflows/lints.yml
+++ b/.github/workflows/lints.yml
@@ -8,32 +8,18 @@ on:
     branches:
       - master
 
-jobs:
-  skip_check:
-    runs-on: [self-hosted, Linux, X64]
-    outputs:
-      should_skip: ${{ steps.skip_check.outputs.should_skip }}
-    steps:
-      - id: skip_check
-        uses: fkirc/skip-duplicate-actions@v5
-        with:
-          cancel_others: 'true'
-          concurrent_skipping: 'same_content_newer'
-          paths_ignore: '["**/README.md"]'
-          do_not_skip: '["pull_request", "workflow_dispatch", "schedule", "merge_group"]'
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
 
+jobs:
   lints:
-    needs: [skip_check]
-    if: |
-      github.event.pull_request.draft == false &&
-      (github.event.action == 'ready_for_review' || needs.skip_check.outputs.should_skip != 'true')
-
     name: Various lints
     timeout-minutes: 30
-    runs-on: [self-hosted, Linux, X64]
+    runs-on: ubuntu-24.04
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@master
         with:
           components: rustfmt, clippy
@@ -41,7 +27,7 @@ jobs:
           # TODO(Matthias): see whether we can keep this in sync with rust-toolchain.toml automatically?
           toolchain: nightly-2024-12-06
       - name: Cargo cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: |
             ~/.cargo/bin/
@@ -49,8 +35,9 @@ jobs:
             ~/.cargo/registry/cache/
             ~/.cargo/git/db/
             target/
-            examples/target/
+            guest/target/
           key: lint-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: lint-${{ runner.os }}-cargo-
 
       - name: Install cargo make
         run: |
@@ -62,13 +49,13 @@ jobs:
         env:
           RUSTFLAGS: "-Dwarnings"
         run: |
-          cargo check --workspace --all-targets --exclude ceno_rt
+          cargo check --workspace --all-targets
           # We have a lot of code under #[cfg(not(debug_assertions))] and similar,
           # so we need to run cargo check in release mode, too:
-          cargo check --workspace --all-targets --exclude ceno_rt --release
+          cargo check --workspace --all-targets --release
           cargo make clippy
           # Same for clippy:
-          cargo clippy --workspace --all-targets --exclude ceno_rt --release
+          cargo clippy --workspace --all-targets --release
 
       - name: Install taplo
         run: taplo --version || cargo install taplo-cli
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index da0784782..0f2ea9df3 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -8,38 +8,25 @@ on:
     branches:
       - master
 
-jobs:
-  skip_check:
-    runs-on: [self-hosted, Linux, X64]
-    outputs:
-      should_skip: ${{ steps.skip_check.outputs.should_skip }}
-    steps:
-      - id: skip_check
-        uses: fkirc/skip-duplicate-actions@v5
-        with:
-          cancel_others: 'true'
-          concurrent_skipping: 'same_content_newer'
-          paths_ignore: '["**/README.md"]'
-          do_not_skip: '["pull_request", "workflow_dispatch", "schedule", "merge_group"]'
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
 
+jobs:
   tests:
-    needs: [skip_check]
-    if: |
-      github.event.pull_request.draft == false &&
-      (github.event.action == 'ready_for_review' || needs.skip_check.outputs.should_skip != 'true')
     name: Run Tests
     timeout-minutes: 30
-    runs-on: [self-hosted, Linux, X64]
+    runs-on: ubuntu-24.04
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@master
         with:
           targets: riscv32im-unknown-none-elf
           # TODO(Matthias): see whether we can keep this in sync with rust-toolchain.toml automatically?
           toolchain: nightly-2024-12-06
       - name: Cargo cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: |
             ~/.cargo/bin/
@@ -47,8 +34,9 @@ jobs:
             ~/.cargo/registry/cache/
             ~/.cargo/git/db/
             target/
-            examples/target/
+            guest/target/
           key: tests-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: tests-${{ runner.os }}-cargo-
 
       - name: Install cargo make
         run: |
diff --git a/Cargo.lock b/Cargo.lock
index 17ecc65ad..5edd10edd 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -238,18 +238,12 @@ dependencies = [
  "itertools 0.13.0",
  "num-derive",
  "num-traits",
+ "rrs-succinct",
  "strum",
  "strum_macros",
  "tracing",
 ]
 
-[[package]]
-name = "ceno_rt"
-version = "0.1.0"
-dependencies = [
- "riscv",
-]
-
 [[package]]
 name = "ceno_zkvm"
 version = "0.1.0"
@@ -467,12 +461,6 @@ dependencies = [
  "itertools 0.10.5",
 ]
 
-[[package]]
-name = "critical-section"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
-
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.13"
@@ -583,6 +571,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "downcast-rs"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
+
 [[package]]
 name = "either"
 version = "1.13.0"
@@ -595,12 +589,6 @@ version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b"
 
-[[package]]
-name = "embedded-hal"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89"
-
 [[package]]
 name = "encode_unicode"
 version = "1.0.0"
@@ -648,9 +636,9 @@ dependencies = [
 
 [[package]]
 name = "fastrand"
-version = "2.2.0"
+version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 
 [[package]]
 name = "ff"
@@ -966,9 +954,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.167"
+version = "0.2.168"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
+checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d"
 
 [[package]]
 name = "libredox"
@@ -1194,6 +1182,27 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "num_enum"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9"
+dependencies = [
+ "num_enum_derive",
+]
+
+[[package]]
+name = "num_enum_derive"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "object"
 version = "0.36.5"
@@ -1388,7 +1397,7 @@ dependencies = [
  "smallvec",
  "symbolic-demangle",
  "tempfile",
- "thiserror 2.0.4",
+ "thiserror 2.0.6",
 ]
 
 [[package]]
@@ -1424,6 +1433,16 @@ dependencies = [
  "uint",
 ]
 
+[[package]]
+name = "proc-macro-crate"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919"
+dependencies = [
+ "once_cell",
+ "toml_edit",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.92"
@@ -1510,9 +1529,9 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.7"
+version = "0.5.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f"
+checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
 dependencies = [
  "bitflags",
 ]
@@ -1582,35 +1601,16 @@ dependencies = [
 ]
 
 [[package]]
-name = "riscv"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ea8ff73d3720bdd0a97925f0bf79ad2744b6da8ff36be3840c48ac81191d7a7"
-dependencies = [
- "critical-section",
- "embedded-hal",
- "paste",
- "riscv-macros",
- "riscv-pac",
-]
-
-[[package]]
-name = "riscv-macros"
+name = "rrs-succinct"
 version = "0.1.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f265be5d634272320a7de94cea15c22a3bfdd4eb42eb43edc528415f066a1f25"
+checksum = "3372685893a9f67d18e98e792d690017287fd17379a83d798d958e517d380fa9"
 dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.90",
+ "downcast-rs",
+ "num_enum",
+ "paste",
 ]
 
-[[package]]
-name = "riscv-pac"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
-
 [[package]]
 name = "rustc-demangle"
 version = "0.1.24"
@@ -1619,15 +1619,15 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
 
 [[package]]
 name = "rustix"
-version = "0.38.41"
+version = "0.38.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
+checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85"
 dependencies = [
  "bitflags",
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -1659,18 +1659,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
 [[package]]
 name = "serde"
-version = "1.0.215"
+version = "1.0.216"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f"
+checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.215"
+version = "1.0.216"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0"
+checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1869,11 +1869,11 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "2.0.4"
+version = "2.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f49a1853cf82743e3b7950f77e0f4d622ca36cf4317cba00c767838bac8d490"
+checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47"
 dependencies = [
- "thiserror-impl 2.0.4",
+ "thiserror-impl 2.0.6",
 ]
 
 [[package]]
@@ -1889,9 +1889,9 @@ dependencies = [
 
 [[package]]
 name = "thiserror-impl"
-version = "2.0.4"
+version = "2.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061"
+checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1927,6 +1927,23 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "toml_datetime"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
+
+[[package]]
+name = "toml_edit"
+version = "0.19.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "winnow",
+]
+
 [[package]]
 name = "tracing"
 version = "0.1.41"
@@ -2345,6 +2362,15 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "winnow"
+version = "0.5.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "wyz"
 version = "0.5.1"
diff --git a/Cargo.toml b/Cargo.toml
index 0739c93ae..c658ac887 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,9 +1,8 @@
 [workspace]
-exclude = ["examples"]
+exclude = ["guest"]
 members = [
   "ceno_emul",
   "examples-builder",
-  "ceno_rt",
   "mpcs",
   "multilinear_extensions",
   "sumcheck",
@@ -54,6 +53,13 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 
 [profile.dev]
 lto = "thin"
+# We are running our tests with optimizations turned on to make them faster.
+# Please turn optimizations off, when you want accurate stack traces for debugging.
+opt-level = 2
+
+[profile.dev.package."*"]
+# Set the default for dependencies in Development mode.
+opt-level = 3
 
 [profile.release]
 lto = "thin"
diff --git a/Makefile.toml b/Makefile.toml
index 4a5a93188..199a42528 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -1,7 +1,5 @@
 [env]
 CARGO_MAKE_EXTEND_WORKSPACE_MAKEFILE = true
-CORE = { script = ["nproc"] }
-RAYON_NUM_THREADS = "${CORE}"
 
 [tasks.tests]
 args = [
@@ -11,10 +9,7 @@ args = [
   "--bins",
   "--tests",
   "--examples",
-  "--release",
   "--workspace",
-  "--exclude",
-  "ceno_rt",
 ]
 command = "cargo"
 workspace = false
@@ -34,8 +29,6 @@ args = [
   "clippy",
   "--workspace",
   "--all-targets",
-  "--exclude",
-  "ceno_rt",
   "--",
   "-D",
   "warnings",
diff --git a/README.md b/README.md
index 82f2e8bfe..0c57ac442 100644
--- a/README.md
+++ b/README.md
@@ -33,10 +33,9 @@ Clippy and check work as usual:
 ```sh
 cargo check
 cargo clippy
+cargo build
 ```
 
-Alas, `cargo build` doesn't work. That's a known problem and we're working on it.  Please use `cargo make build` instead for now.
-
 ### Setting up self-hosted CI docker container
 
 To set up docker container for CI, you can run the following command:
diff --git a/ceno_emul/Cargo.toml b/ceno_emul/Cargo.toml
index 38f0a8bfd..74562142a 100644
--- a/ceno_emul/Cargo.toml
+++ b/ceno_emul/Cargo.toml
@@ -15,6 +15,7 @@ elf = "0.7"
 itertools.workspace = true
 num-derive.workspace = true
 num-traits.workspace = true
+rrs_lib = { package = "rrs-succinct", version = "0.1.0" }
 strum.workspace = true
 strum_macros.workspace = true
 tracing.workspace = true
diff --git a/ceno_emul/src/disassemble/mod.rs b/ceno_emul/src/disassemble/mod.rs
new file mode 100644
index 000000000..ca6161000
--- /dev/null
+++ b/ceno_emul/src/disassemble/mod.rs
@@ -0,0 +1,378 @@
+use crate::rv32im::{InsnKind, Instruction};
+use itertools::izip;
+use rrs_lib::{
+    InstructionProcessor,
+    instruction_formats::{BType, IType, ITypeCSR, ITypeShamt, JType, RType, SType, UType},
+    process_instruction,
+};
+
+/// A transpiler that converts the 32-bit encoded instructions into instructions.
+pub(crate) struct InstructionTranspiler {
+    pc: u32,
+    word: u32,
+}
+
+impl Instruction {
+    /// Create a new [`Instruction`] from an R-type instruction.
+    #[must_use]
+    pub const fn from_r_type(kind: InsnKind, dec_insn: &RType, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: dec_insn.rd,
+            rs1: dec_insn.rs1,
+            rs2: dec_insn.rs2,
+            imm: 0,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] from an I-type instruction.
+    #[must_use]
+    pub const fn from_i_type(kind: InsnKind, dec_insn: &IType, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: dec_insn.rd,
+            rs1: dec_insn.rs1,
+            imm: dec_insn.imm,
+            rs2: 0,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] from an I-type instruction with a shamt.
+    #[must_use]
+    pub const fn from_i_type_shamt(kind: InsnKind, dec_insn: &ITypeShamt, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: dec_insn.rd,
+            rs1: dec_insn.rs1,
+            imm: dec_insn.shamt as i32,
+            rs2: 0,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] from an S-type instruction.
+    #[must_use]
+    pub const fn from_s_type(kind: InsnKind, dec_insn: &SType, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: 0,
+            rs1: dec_insn.rs1,
+            rs2: dec_insn.rs2,
+            imm: dec_insn.imm,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] from a B-type instruction.
+    #[must_use]
+    pub const fn from_b_type(kind: InsnKind, dec_insn: &BType, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: 0,
+            rs1: dec_insn.rs1,
+            rs2: dec_insn.rs2,
+            imm: dec_insn.imm,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] that is not implemented.
+    #[must_use]
+    pub const fn unimp(raw: u32) -> Self {
+        Self {
+            kind: InsnKind::INVALID,
+            rd: 0,
+            rs1: 0,
+            rs2: 0,
+            imm: 0,
+            raw,
+        }
+    }
+}
+
+impl InstructionProcessor for InstructionTranspiler {
+    type InstructionResult = Instruction;
+
+    fn process_add(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::ADD, &dec_insn, self.word)
+    }
+
+    fn process_addi(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::ADDI, &dec_insn, self.word)
+    }
+
+    fn process_sub(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SUB, &dec_insn, self.word)
+    }
+
+    fn process_xor(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::XOR, &dec_insn, self.word)
+    }
+
+    fn process_xori(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::XORI, &dec_insn, self.word)
+    }
+
+    fn process_or(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::OR, &dec_insn, self.word)
+    }
+
+    fn process_ori(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::ORI, &dec_insn, self.word)
+    }
+
+    fn process_and(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::AND, &dec_insn, self.word)
+    }
+
+    fn process_andi(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::ANDI, &dec_insn, self.word)
+    }
+
+    fn process_sll(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SLL, &dec_insn, self.word)
+    }
+
+    fn process_slli(&mut self, dec_insn: ITypeShamt) -> Self::InstructionResult {
+        Instruction::from_i_type_shamt(InsnKind::SLLI, &dec_insn, self.word)
+    }
+
+    fn process_srl(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SRL, &dec_insn, self.word)
+    }
+
+    fn process_srli(&mut self, dec_insn: ITypeShamt) -> Self::InstructionResult {
+        Instruction::from_i_type_shamt(InsnKind::SRLI, &dec_insn, self.word)
+    }
+
+    fn process_sra(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SRA, &dec_insn, self.word)
+    }
+
+    fn process_srai(&mut self, dec_insn: ITypeShamt) -> Self::InstructionResult {
+        Instruction::from_i_type_shamt(InsnKind::SRAI, &dec_insn, self.word)
+    }
+
+    fn process_slt(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SLT, &dec_insn, self.word)
+    }
+
+    fn process_slti(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::SLTI, &dec_insn, self.word)
+    }
+
+    fn process_sltu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SLTU, &dec_insn, self.word)
+    }
+
+    fn process_sltui(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::SLTIU, &dec_insn, self.word)
+    }
+
+    fn process_lb(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LB, &dec_insn, self.word)
+    }
+
+    fn process_lh(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LH, &dec_insn, self.word)
+    }
+
+    fn process_lw(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LW, &dec_insn, self.word)
+    }
+
+    fn process_lbu(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LBU, &dec_insn, self.word)
+    }
+
+    fn process_lhu(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LHU, &dec_insn, self.word)
+    }
+
+    fn process_sb(&mut self, dec_insn: SType) -> Self::InstructionResult {
+        Instruction::from_s_type(InsnKind::SB, &dec_insn, self.word)
+    }
+
+    fn process_sh(&mut self, dec_insn: SType) -> Self::InstructionResult {
+        Instruction::from_s_type(InsnKind::SH, &dec_insn, self.word)
+    }
+
+    fn process_sw(&mut self, dec_insn: SType) -> Self::InstructionResult {
+        Instruction::from_s_type(InsnKind::SW, &dec_insn, self.word)
+    }
+
+    fn process_beq(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BEQ, &dec_insn, self.word)
+    }
+
+    fn process_bne(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BNE, &dec_insn, self.word)
+    }
+
+    fn process_blt(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BLT, &dec_insn, self.word)
+    }
+
+    fn process_bge(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BGE, &dec_insn, self.word)
+    }
+
+    fn process_bltu(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BLTU, &dec_insn, self.word)
+    }
+
+    fn process_bgeu(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BGEU, &dec_insn, self.word)
+    }
+
+    fn process_jal(&mut self, dec_insn: JType) -> Self::InstructionResult {
+        Instruction {
+            kind: InsnKind::JAL,
+            rd: dec_insn.rd,
+            rs1: 0,
+            rs2: 0,
+            imm: dec_insn.imm,
+            raw: self.word,
+        }
+    }
+
+    fn process_jalr(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction {
+            kind: InsnKind::JALR,
+            rd: dec_insn.rd,
+            rs1: dec_insn.rs1,
+            rs2: 0,
+            imm: dec_insn.imm,
+            raw: self.word,
+        }
+    }
+
+    /// Convert LUI to ADDI.
+    fn process_lui(&mut self, dec_insn: UType) -> Self::InstructionResult {
+        // Verify assumption that the immediate is already shifted left by 12 bits.
+        assert_eq!(dec_insn.imm & 0xfff, 0);
+        Instruction {
+            kind: InsnKind::ADDI,
+            rd: dec_insn.rd,
+            rs1: 0,
+            rs2: 0,
+            imm: dec_insn.imm,
+            raw: self.word,
+        }
+    }
+
+    /// Convert AUIPC to ADDI.
+    fn process_auipc(&mut self, dec_insn: UType) -> Self::InstructionResult {
+        let pc = self.pc;
+        // Verify our assumption that the immediate is already shifted left by 12 bits.
+        assert_eq!(dec_insn.imm & 0xfff, 0);
+        Instruction {
+            kind: InsnKind::ADDI,
+            rd: dec_insn.rd,
+            rs1: 0,
+            rs2: 0,
+            imm: dec_insn.imm.wrapping_add(pc as i32),
+            raw: self.word,
+        }
+    }
+
+    fn process_ecall(&mut self) -> Self::InstructionResult {
+        Instruction {
+            kind: InsnKind::ECALL,
+            rd: 0,
+            rs1: 0,
+            rs2: 0,
+            imm: 0,
+            raw: self.word,
+        }
+    }
+
+    fn process_ebreak(&mut self) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_mul(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::MUL, &dec_insn, self.word)
+    }
+
+    fn process_mulh(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::MULH, &dec_insn, self.word)
+    }
+
+    fn process_mulhu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::MULHU, &dec_insn, self.word)
+    }
+
+    fn process_mulhsu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::MULHSU, &dec_insn, self.word)
+    }
+
+    fn process_div(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::DIV, &dec_insn, self.word)
+    }
+
+    fn process_divu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::DIVU, &dec_insn, self.word)
+    }
+
+    fn process_rem(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::REM, &dec_insn, self.word)
+    }
+
+    fn process_remu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::REMU, &dec_insn, self.word)
+    }
+
+    fn process_csrrc(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrci(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrs(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrsi(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrw(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrwi(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_fence(&mut self, _: IType) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_mret(&mut self) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_wfi(&mut self) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+}
+
+/// Transpile the [`Instruction`]s from the 32-bit encoded instructions.
+#[must_use]
+pub fn transpile(base: u32, instructions_u32: &[u32]) -> Vec<Instruction> {
+    izip!(enumerate(base, 4), instructions_u32)
+        .map(|(pc, &word)| {
+            process_instruction(&mut InstructionTranspiler { pc, word }, word)
+                .unwrap_or(Instruction::unimp(word))
+        })
+        .collect()
+}
+
+fn enumerate(start: u32, step: u32) -> impl Iterator<Item = u32> {
+    std::iter::successors(Some(start), move |&i| Some(i + step))
+}
diff --git a/ceno_emul/src/elf.rs b/ceno_emul/src/elf.rs
index 82697c619..ee59d3de3 100644
--- a/ceno_emul/src/elf.rs
+++ b/ceno_emul/src/elf.rs
@@ -18,7 +18,7 @@ extern crate alloc;
 
 use alloc::collections::BTreeMap;
 
-use crate::addr::WORD_SIZE;
+use crate::{addr::WORD_SIZE, disassemble::transpile, rv32im::Instruction};
 use anyhow::{Context, Result, anyhow, bail};
 use elf::{
     ElfBytes,
@@ -35,7 +35,7 @@ pub struct Program {
     /// This is the lowest address of the program's executable code
     pub base_address: u32,
     /// The instructions of the program
-    pub instructions: Vec<u32>,
+    pub instructions: Vec<Instruction>,
     /// The initial memory image
     pub image: BTreeMap<u32, u32>,
 }
@@ -45,7 +45,7 @@ impl Program {
     pub fn new(
         entry: u32,
         base_address: u32,
-        instructions: Vec<u32>,
+        instructions: Vec<Instruction>,
         image: BTreeMap<u32, u32>,
     ) -> Program {
         Self {
@@ -162,6 +162,8 @@ impl Program {
         assert!(entry >= base_address);
         assert!((entry - base_address) as usize <= instructions.len() * WORD_SIZE);
 
+        let instructions = transpile(base_address, &instructions);
+
         Ok(Program {
             entry,
             base_address,
diff --git a/ceno_emul/src/lib.rs b/ceno_emul/src/lib.rs
index c734b1794..1a855006e 100644
--- a/ceno_emul/src/lib.rs
+++ b/ceno_emul/src/lib.rs
@@ -12,10 +12,11 @@ mod vm_state;
 pub use vm_state::VMState;
 
 mod rv32im;
-pub use rv32im::{DecodedInstruction, EmuContext, InsnCategory, InsnCodes, InsnFormat, InsnKind};
+pub use rv32im::{
+    EmuContext, InsnCategory, InsnFormat, InsnKind, Instruction, encode_rv32, encode_rv32u,
+};
 
 mod elf;
 pub use elf::Program;
 
-mod rv32im_encode;
-pub use rv32im_encode::encode_rv32;
+pub mod disassemble;
diff --git a/ceno_emul/src/platform.rs b/ceno_emul/src/platform.rs
index c28bcda40..fec7e0181 100644
--- a/ceno_emul/src/platform.rs
+++ b/ceno_emul/src/platform.rs
@@ -73,10 +73,6 @@ impl Platform {
         self.is_ram(addr)
     }
 
-    pub fn can_execute(&self, addr: Addr) -> bool {
-        self.is_rom(addr)
-    }
-
     // Environment calls.
 
     /// Register containing the ecall function code. (x5, t0)
@@ -113,7 +109,6 @@ mod tests {
     #[test]
     fn test_no_overlap() {
         let p = CENO_PLATFORM;
-        assert!(p.can_execute(p.pc_base()));
         // ROM and RAM do not overlap.
         assert!(!p.is_rom(p.ram.start));
         assert!(!p.is_rom(p.ram.end - WORD_SIZE as Addr));
diff --git a/ceno_emul/src/rv32im.rs b/ceno_emul/src/rv32im.rs
index 52fac1508..e63fa761b 100644
--- a/ceno_emul/src/rv32im.rs
+++ b/ceno_emul/src/rv32im.rs
@@ -15,13 +15,39 @@
 // limitations under the License.
 
 use anyhow::{Result, anyhow};
-use itertools::enumerate;
 use num_derive::ToPrimitive;
-use std::sync::OnceLock;
 use strum_macros::{Display, EnumIter};
 
 use super::addr::{ByteAddr, RegIdx, WORD_SIZE, Word, WordAddr};
 
+/// Convenience function to create an `Instruction` with the given fields.
+///
+/// Pass 0 for unused fields.
+pub const fn encode_rv32(kind: InsnKind, rs1: u32, rs2: u32, rd: u32, imm: i32) -> Instruction {
+    Instruction {
+        kind,
+        rs1: rs1 as usize,
+        rs2: rs2 as usize,
+        rd: rd as usize,
+        imm,
+        raw: 0,
+    }
+}
+
+/// Convenience function to create an `Instruction` with the given fields.
+///
+/// Pass 0 for unused fields.
+pub const fn encode_rv32u(kind: InsnKind, rs1: u32, rs2: u32, rd: u32, imm: u32) -> Instruction {
+    Instruction {
+        kind,
+        rs1: rs1 as usize,
+        rs2: rs2 as usize,
+        rd: rd as usize,
+        imm: imm as i32,
+        raw: 0,
+    }
+}
+
 pub trait EmuContext {
     // Handle environment call
     fn ecall(&mut self) -> Result<bool>;
@@ -29,11 +55,8 @@ pub trait EmuContext {
     // Handle a trap
     fn trap(&self, cause: TrapCause) -> Result<bool>;
 
-    // Callback when instructions are decoded
-    fn on_insn_decoded(&mut self, _decoded: &DecodedInstruction) {}
-
     // Callback when instructions end normally
-    fn on_normal_end(&mut self, _decoded: &DecodedInstruction) {}
+    fn on_normal_end(&mut self, _decoded: &Instruction) {}
 
     // Get the program counter
     fn get_pc(&self) -> ByteAddr;
@@ -60,14 +83,7 @@ pub trait EmuContext {
     fn peek_memory(&self, addr: WordAddr) -> Word;
 
     /// Load from instruction cache
-    // TODO(Matthias): this should really return `Result<DecodedInstruction>`
-    // because the instruction cache should contain instructions, not just words.
-    fn fetch(&mut self, pc: WordAddr) -> Option<Word>;
-
-    // Check access for instruction load
-    fn check_insn_load(&self, _addr: ByteAddr) -> bool {
-        true
-    }
+    fn fetch(&mut self, pc: WordAddr) -> Option<Instruction>;
 
     // Check access for data load
     fn check_data_load(&self, _addr: ByteAddr) -> bool {
@@ -80,11 +96,6 @@ pub trait EmuContext {
     }
 }
 
-/// An implementation of the basic ISA (RV32IM), that is instruction decoding and functional units.
-pub struct Emulator {
-    table: &'static FastDecodeTable,
-}
-
 #[derive(Debug)]
 pub enum TrapCause {
     InstructionAddressMisaligned,
@@ -98,17 +109,18 @@ pub enum TrapCause {
     EcallError,
 }
 
-#[derive(Clone, Debug, Default)]
-pub struct DecodedInstruction {
-    insn: u32,
-    top_bit: u32,
-    // The bit fields of the instruction encoding, regardless of the instruction format.
-    func7: u32,
-    rs2: u32,
-    rs1: u32,
-    func3: u32,
-    rd: u32,
-    opcode: u32,
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Instruction {
+    pub kind: InsnKind,
+    pub rs1: RegIdx,
+    pub rs2: RegIdx,
+    pub rd: RegIdx,
+    pub imm: i32,
+    /// `raw` is there only to produce better logging and error messages.
+    ///
+    /// Set to 0, if you are creating an instruction directly,
+    /// instead of decoding it from a raw 32-bit `Word`.
+    pub raw: Word,
 }
 
 #[derive(Clone, Copy, Debug)]
@@ -133,9 +145,12 @@ pub enum InsnFormat {
 }
 use InsnFormat::*;
 
-#[derive(Clone, Copy, Display, Debug, PartialEq, Eq, PartialOrd, Ord, EnumIter, ToPrimitive)]
+#[derive(
+    Clone, Copy, Display, Debug, PartialEq, Eq, PartialOrd, Ord, EnumIter, ToPrimitive, Default,
+)]
 #[allow(clippy::upper_case_acronyms)]
 pub enum InsnKind {
+    #[default]
     INVALID,
     ADD,
     SUB,
@@ -164,8 +179,6 @@ pub enum InsnKind {
     BGEU,
     JAL,
     JALR,
-    LUI,
-    AUIPC,
     MUL,
     MULH,
     MULHSU,
@@ -182,611 +195,331 @@ pub enum InsnKind {
     SB,
     SH,
     SW,
-    /// ECALL and EBREAK etc.
-    EANY,
+    ECALL,
 }
 use InsnKind::*;
 
-impl InsnKind {
-    pub const fn codes(self) -> InsnCodes {
-        RV32IM_ISA[self as usize]
+impl From<InsnKind> for InsnCategory {
+    fn from(kind: InsnKind) -> Self {
+        match kind {
+            INVALID => Invalid,
+            ADD | SUB | XOR | OR | AND | SLL | SRL | SRA | SLT | SLTU | MUL | MULH | MULHSU
+            | MULHU | DIV | DIVU | REM | REMU => Compute,
+            ADDI | XORI | ORI | ANDI | SLLI | SRLI | SRAI | SLTI | SLTIU => Compute,
+            BEQ | BNE | BLT | BGE | BLTU | BGEU => Branch,
+            JAL | JALR => Compute,
+            LB | LH | LW | LBU | LHU => Load,
+            SB | SH | SW => Store,
+            ECALL => System,
+        }
     }
 }
 
-#[derive(Clone, Copy, Debug)]
-pub struct InsnCodes {
-    pub format: InsnFormat,
-    pub kind: InsnKind,
-    pub category: InsnCategory,
-    pub(crate) opcode: u32,
-    pub(crate) func3: u32,
-    pub(crate) func7: u32,
-}
-
-impl DecodedInstruction {
-    /// A virtual register which absorbs the writes to x0.
-    pub const RD_NULL: u32 = 32;
-
-    pub fn new(insn: u32) -> Self {
-        Self {
-            insn,
-            top_bit: (insn & 0x80000000) >> 31,
-            func7: (insn & 0xfe000000) >> 25,
-            rs2: (insn & 0x01f00000) >> 20,
-            rs1: (insn & 0x000f8000) >> 15,
-            func3: (insn & 0x00007000) >> 12,
-            rd: (insn & 0x00000f80) >> 7,
-            opcode: insn & 0x0000007f,
+// For encoding, which is useful for tests.
+impl From<InsnKind> for InsnFormat {
+    fn from(kind: InsnKind) -> Self {
+        match kind {
+            ADD | SUB | XOR | OR | AND | SLL | SRL | SRA | SLT | SLTU | MUL | MULH | MULHSU
+            | MULHU | DIV | DIVU | REM | REMU => R,
+            ADDI | XORI | ORI | ANDI | SLLI | SRLI | SRAI | SLTI | SLTIU => I,
+            BEQ | BNE | BLT | BGE | BLTU | BGEU => B,
+            JAL => J,
+            JALR => I,
+            LB | LH | LW | LBU | LHU => I,
+            SB | SH | SW => S,
+            ECALL => I,
+            INVALID => I,
         }
     }
+}
 
-    pub fn encoded(&self) -> u32 {
-        self.insn
-    }
-
-    pub fn opcode(&self) -> u32 {
-        self.opcode
-    }
-
-    /// The internal register destination. It is either the regular rd, or an internal RD_NULL if
-    /// the instruction does not write to a register or writes to x0.
+impl Instruction {
+    pub const RD_NULL: u32 = 32;
     pub fn rd_internal(&self) -> u32 {
-        match self.codes().format {
-            R | I | U | J if self.rd != 0 => self.rd,
+        match InsnFormat::from(self.kind) {
+            R | I | U | J if self.rd != 0 => self.rd as u32,
             _ => Self::RD_NULL,
         }
     }
-
-    /// Get the rs1 field, regardless of the instruction format.
-    pub fn rs1(&self) -> u32 {
-        self.rs1
-    }
-
     /// Get the register source 1, or zero if the instruction does not use rs1.
     pub fn rs1_or_zero(&self) -> u32 {
-        match self.codes().format {
-            R | I | S | B => self.rs1,
+        match InsnFormat::from(self.kind) {
+            R | I | S | B => self.rs1 as u32,
             _ => 0,
         }
     }
-
-    /// Get the rs2 field, regardless of the instruction format.
-    pub fn rs2(&self) -> u32 {
-        self.rs2
-    }
-
     /// Get the register source 2, or zero if the instruction does not use rs2.
     pub fn rs2_or_zero(&self) -> u32 {
-        match self.codes().format {
-            R | S | B => self.rs2,
+        match InsnFormat::from(self.kind) {
+            R | S | B => self.rs2 as u32,
             _ => 0,
         }
     }
-
-    pub fn immediate(&self) -> u32 {
-        match self.codes().format {
-            R => 0,
-            I => self.imm_i(),
-            S => self.imm_s(),
-            B => self.imm_b(),
-            U => self.imm_u(),
-            J => self.imm_j(),
-        }
-    }
-
-    pub fn codes(&self) -> InsnCodes {
-        FastDecodeTable::get().lookup(self)
-    }
-
-    fn imm_b(&self) -> u32 {
-        (self.top_bit * 0xfffff000)
-            | ((self.rd & 1) << 11)
-            | ((self.func7 & 0x3f) << 5)
-            | (self.rd & 0x1e)
-    }
-
-    fn imm_i(&self) -> u32 {
-        (self.top_bit * 0xffff_f000) | (self.func7 << 5) | self.rs2
-    }
-
-    fn imm_s(&self) -> u32 {
-        (self.top_bit * 0xfffff000) | (self.func7 << 5) | self.rd
-    }
-
-    fn imm_j(&self) -> u32 {
-        (self.top_bit * 0xfff00000)
-            | (self.rs1 << 15)
-            | (self.func3 << 12)
-            | ((self.rs2 & 1) << 11)
-            | ((self.func7 & 0x3f) << 5)
-            | (self.rs2 & 0x1e)
-    }
-
-    fn imm_u(&self) -> u32 {
-        self.insn & 0xfffff000
-    }
 }
 
-const fn insn(
-    format: InsnFormat,
-    kind: InsnKind,
-    category: InsnCategory,
-    opcode: u32,
-    func3: i32,
-    func7: i32,
-) -> InsnCodes {
-    InsnCodes {
-        format,
-        kind,
-        category,
-        opcode,
-        func3: func3 as u32,
-        func7: func7 as u32,
-    }
-}
-
-type InstructionTable = [InsnCodes; 47];
-type FastInstructionTable = [u8; 1 << 10];
-
-const RV32IM_ISA: InstructionTable = [
-    insn(R, INVALID, Invalid, 0x00, 0x0, 0x00),
-    insn(R, ADD, Compute, 0x33, 0x0, 0x00),
-    insn(R, SUB, Compute, 0x33, 0x0, 0x20),
-    insn(R, XOR, Compute, 0x33, 0x4, 0x00),
-    insn(R, OR, Compute, 0x33, 0x6, 0x00),
-    insn(R, AND, Compute, 0x33, 0x7, 0x00),
-    insn(R, SLL, Compute, 0x33, 0x1, 0x00),
-    insn(R, SRL, Compute, 0x33, 0x5, 0x00),
-    insn(R, SRA, Compute, 0x33, 0x5, 0x20),
-    insn(R, SLT, Compute, 0x33, 0x2, 0x00),
-    insn(R, SLTU, Compute, 0x33, 0x3, 0x00),
-    insn(I, ADDI, Compute, 0x13, 0x0, -1),
-    insn(I, XORI, Compute, 0x13, 0x4, -1),
-    insn(I, ORI, Compute, 0x13, 0x6, -1),
-    insn(I, ANDI, Compute, 0x13, 0x7, -1),
-    insn(I, SLLI, Compute, 0x13, 0x1, 0x00),
-    insn(I, SRLI, Compute, 0x13, 0x5, 0x00),
-    insn(I, SRAI, Compute, 0x13, 0x5, 0x20),
-    insn(I, SLTI, Compute, 0x13, 0x2, -1),
-    insn(I, SLTIU, Compute, 0x13, 0x3, -1),
-    insn(B, BEQ, Branch, 0x63, 0x0, -1),
-    insn(B, BNE, Branch, 0x63, 0x1, -1),
-    insn(B, BLT, Branch, 0x63, 0x4, -1),
-    insn(B, BGE, Branch, 0x63, 0x5, -1),
-    insn(B, BLTU, Branch, 0x63, 0x6, -1),
-    insn(B, BGEU, Branch, 0x63, 0x7, -1),
-    insn(J, JAL, Compute, 0x6f, -1, -1),
-    insn(I, JALR, Compute, 0x67, 0x0, -1),
-    insn(U, LUI, Compute, 0x37, -1, -1),
-    insn(U, AUIPC, Compute, 0x17, -1, -1),
-    insn(R, MUL, Compute, 0x33, 0x0, 0x01),
-    insn(R, MULH, Compute, 0x33, 0x1, 0x01),
-    insn(R, MULHSU, Compute, 0x33, 0x2, 0x01),
-    insn(R, MULHU, Compute, 0x33, 0x3, 0x01),
-    insn(R, DIV, Compute, 0x33, 0x4, 0x01),
-    insn(R, DIVU, Compute, 0x33, 0x5, 0x01),
-    insn(R, REM, Compute, 0x33, 0x6, 0x01),
-    insn(R, REMU, Compute, 0x33, 0x7, 0x01),
-    insn(I, LB, Load, 0x03, 0x0, -1),
-    insn(I, LH, Load, 0x03, 0x1, -1),
-    insn(I, LW, Load, 0x03, 0x2, -1),
-    insn(I, LBU, Load, 0x03, 0x4, -1),
-    insn(I, LHU, Load, 0x03, 0x5, -1),
-    insn(S, SB, Store, 0x23, 0x0, -1),
-    insn(S, SH, Store, 0x23, 0x1, -1),
-    insn(S, SW, Store, 0x23, 0x2, -1),
-    insn(I, EANY, System, 0x73, 0x0, 0x00),
-];
-
-#[cfg(test)]
-#[test]
-fn test_isa_table() {
-    use strum::IntoEnumIterator;
-    for kind in InsnKind::iter() {
-        assert_eq!(kind.codes().kind, kind);
-    }
-}
-
-// RISC-V instruction are determined by 3 parts:
-// - Opcode: 7 bits
-// - Func3: 3 bits
-// - Func7: 7 bits
-// In many cases, func7 and/or func3 is ignored.  A standard trick is to decode
-// via a table, but a 17 bit lookup table destroys L1 cache.  Luckily for us,
-// in practice the low 2 bits of opcode are always 11, so we can drop them, and
-// also func7 is always either 0, 1, 0x20 or don't care, so we can reduce func7
-// to 2 bits, which gets us to 10 bits, which is only 1k.
-struct FastDecodeTable {
-    table: FastInstructionTable,
-}
-
-impl FastDecodeTable {
-    fn new() -> Self {
-        let mut table: FastInstructionTable = [0; 1 << 10];
-        for (isa_idx, insn) in enumerate(&RV32IM_ISA) {
-            Self::add_insn(&mut table, insn, isa_idx);
-        }
-        Self { table }
-    }
-
-    fn get() -> &'static Self {
-        FAST_DECODE_TABLE.get_or_init(Self::new)
-    }
-
-    // Map to 10 bit format
-    fn map10(opcode: u32, func3: u32, func7: u32) -> usize {
-        let op_high = opcode >> 2;
-        // Map 0 -> 0, 1 -> 1, 0x20 -> 2, everything else to 3
-        let func72bits = if func7 <= 1 {
-            func7
-        } else if func7 == 0x20 {
-            2
-        } else {
-            3
-        };
-        ((op_high << 5) | (func72bits << 3) | func3) as usize
-    }
-
-    fn add_insn(table: &mut FastInstructionTable, insn: &InsnCodes, isa_idx: usize) {
-        let op_high = insn.opcode >> 2;
-        if (insn.func3 as i32) < 0 {
-            for f3 in 0..8 {
-                for f7b in 0..4 {
-                    let idx = (op_high << 5) | (f7b << 3) | f3;
-                    table[idx as usize] = isa_idx as u8;
-                }
-            }
-        } else if (insn.func7 as i32) < 0 {
-            for f7b in 0..4 {
-                let idx = (op_high << 5) | (f7b << 3) | insn.func3;
-                table[idx as usize] = isa_idx as u8;
-            }
-        } else {
-            table[Self::map10(insn.opcode, insn.func3, insn.func7)] = isa_idx as u8;
-        }
-    }
-
-    fn lookup(&self, decoded: &DecodedInstruction) -> InsnCodes {
-        let isa_idx = self.table[Self::map10(decoded.opcode, decoded.func3, decoded.func7)];
-        RV32IM_ISA[isa_idx as usize]
-    }
+pub fn step<C: EmuContext>(ctx: &mut C) -> Result<()> {
+    let pc = ctx.get_pc();
+
+    let Some(insn) = ctx.fetch(pc.waddr()) else {
+        ctx.trap(TrapCause::InstructionAccessFault)?;
+        return Err(anyhow!(
+            "Fatal: could not fetch instruction at pc={pc:?}, ELF does not have instructions there."
+        ));
+    };
+
+    tracing::trace!("pc: {:x}, kind: {:?}", pc.0, insn.kind);
+
+    if match InsnCategory::from(insn.kind) {
+        InsnCategory::Compute => step_compute(ctx, insn.kind, &insn)?,
+        InsnCategory::Branch => step_branch(ctx, insn.kind, &insn)?,
+        InsnCategory::Load => step_load(ctx, insn.kind, &insn)?,
+        InsnCategory::Store => step_store(ctx, insn.kind, &insn)?,
+        InsnCategory::System => step_system(ctx, insn.kind, &insn)?,
+        InsnCategory::Invalid => ctx.trap(TrapCause::IllegalInstruction(insn.raw))?,
+    } {
+        ctx.on_normal_end(&insn);
+    };
+
+    Ok(())
 }
 
-static FAST_DECODE_TABLE: OnceLock<FastDecodeTable> = OnceLock::new();
-
-impl Emulator {
-    pub fn new() -> Self {
-        Self {
-            table: FastDecodeTable::get(),
+fn step_compute<M: EmuContext>(ctx: &mut M, kind: InsnKind, insn: &Instruction) -> Result<bool> {
+    use super::InsnKind::*;
+
+    let pc = ctx.get_pc();
+    let mut new_pc = pc + WORD_SIZE;
+    let imm_i = insn.imm as u32;
+    let out = match kind {
+        // Instructions that do not read rs1 nor rs2.
+        JAL => {
+            new_pc = pc.wrapping_add(insn.imm as u32);
+            (pc + WORD_SIZE).0
         }
-    }
-
-    pub fn step<C: EmuContext>(&self, ctx: &mut C) -> Result<()> {
-        let pc = ctx.get_pc();
-
-        // TODO(Matthias): `check_insn_load` should be unnecessary: we can statically
-        // check in `fn new_from_elf` that the program only has instructions where
-        // our platform accepts them.
-        if !ctx.check_insn_load(pc) {
-            ctx.trap(TrapCause::InstructionAccessFault)?;
-            return Err(anyhow!("Fatal: could not fetch instruction at pc={:?}", pc));
-        }
-        let Some(word) = ctx.fetch(pc.waddr()) else {
-            ctx.trap(TrapCause::InstructionAccessFault)?;
-            return Err(anyhow!("Fatal: could not fetch instruction at pc={:?}", pc));
-        };
-
-        // TODO(Matthias): our `Program` that we are fetching from should really store
-        // already decoded instructions, instead of doing this weird, partial checking
-        // for `0x03` here.
-        //
-        // Note how we can fail here with an IllegalInstruction, and again further down
-        // when we match against the decoded instruction. We should centralise that. And
-        // our `step` function here shouldn't need to know anything about how instruction
-        // are encoded as numbers.
-        //
-        // One way to centralise is to do the check once when loading the program from the
-        // ELF.
-        if word & 0x03 != 0x03 {
-            // Opcode must end in 0b11 in RV32IM.
-            ctx.trap(TrapCause::IllegalInstruction(word))?;
-            return Err(anyhow!(
-                "Fatal: illegal instruction at pc={:?}: 0x{:08x}",
-                pc,
-                word
-            ));
-        }
-
-        let decoded = DecodedInstruction::new(word);
-        let insn = self.table.lookup(&decoded);
-        ctx.on_insn_decoded(&decoded);
-        tracing::trace!("pc: {:x}, kind: {:?}", pc.0, insn.kind);
-
-        if match insn.category {
-            InsnCategory::Compute => self.step_compute(ctx, insn.kind, &decoded)?,
-            InsnCategory::Branch => self.step_branch(ctx, insn.kind, &decoded)?,
-            InsnCategory::Load => self.step_load(ctx, insn.kind, &decoded)?,
-            InsnCategory::Store => self.step_store(ctx, insn.kind, &decoded)?,
-            InsnCategory::System => self.step_system(ctx, insn.kind, &decoded)?,
-            InsnCategory::Invalid => ctx.trap(TrapCause::IllegalInstruction(word))?,
-        } {
-            ctx.on_normal_end(&decoded);
-        };
-
-        Ok(())
-    }
-
-    fn step_compute<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        use InsnKind::*;
-
-        let pc = ctx.get_pc();
-        let mut new_pc = pc + WORD_SIZE;
-        let imm_i = decoded.imm_i();
-        let out = match kind {
-            // Instructions that do not read rs1 nor rs2.
-            JAL => {
-                new_pc = pc.wrapping_add(decoded.imm_j());
-                (pc + WORD_SIZE).0
-            }
-            LUI => decoded.imm_u(),
-            AUIPC => (pc.wrapping_add(decoded.imm_u())).0,
-
-            _ => {
-                // Instructions that read rs1 but not rs2.
-                let rs1 = ctx.load_register(decoded.rs1 as usize)?;
-
-                match kind {
-                    ADDI => rs1.wrapping_add(imm_i),
-                    XORI => rs1 ^ imm_i,
-                    ORI => rs1 | imm_i,
-                    ANDI => rs1 & imm_i,
-                    SLLI => rs1 << (imm_i & 0x1f),
-                    SRLI => rs1 >> (imm_i & 0x1f),
-                    SRAI => ((rs1 as i32) >> (imm_i & 0x1f)) as u32,
-                    SLTI => {
-                        if (rs1 as i32) < (imm_i as i32) {
-                            1
-                        } else {
-                            0
-                        }
+        _ => {
+            // Instructions that read rs1 but not rs2.
+            let rs1 = ctx.load_register(insn.rs1)?;
+
+            match kind {
+                ADDI => rs1.wrapping_add(imm_i),
+                XORI => rs1 ^ imm_i,
+                ORI => rs1 | imm_i,
+                ANDI => rs1 & imm_i,
+                SLLI => rs1 << (imm_i & 0x1f),
+                SRLI => rs1 >> (imm_i & 0x1f),
+                SRAI => ((rs1 as i32) >> (imm_i & 0x1f)) as u32,
+                SLTI => {
+                    if (rs1 as i32) < (imm_i as i32) {
+                        1
+                    } else {
+                        0
                     }
-                    SLTIU => {
-                        if rs1 < imm_i {
-                            1
-                        } else {
-                            0
-                        }
-                    }
-                    JALR => {
-                        new_pc = ByteAddr(rs1.wrapping_add(imm_i) & 0xfffffffe);
-                        (pc + WORD_SIZE).0
+                }
+                SLTIU => {
+                    if rs1 < imm_i {
+                        1
+                    } else {
+                        0
                     }
+                }
+                JALR => {
+                    new_pc = ByteAddr(rs1.wrapping_add(imm_i) & !1);
+                    (pc + WORD_SIZE).0
+                }
 
-                    _ => {
-                        // Instructions that use rs1 and rs2.
-                        let rs2 = ctx.load_register(decoded.rs2 as usize)?;
-
-                        match kind {
-                            ADD => rs1.wrapping_add(rs2),
-                            SUB => rs1.wrapping_sub(rs2),
-                            XOR => rs1 ^ rs2,
-                            OR => rs1 | rs2,
-                            AND => rs1 & rs2,
-                            SLL => rs1 << (rs2 & 0x1f),
-                            SRL => rs1 >> (rs2 & 0x1f),
-                            SRA => ((rs1 as i32) >> (rs2 & 0x1f)) as u32,
-                            SLT => {
-                                if (rs1 as i32) < (rs2 as i32) {
-                                    1
-                                } else {
-                                    0
-                                }
-                            }
-                            SLTU => {
-                                if rs1 < rs2 {
-                                    1
-                                } else {
-                                    0
-                                }
+                _ => {
+                    // Instructions that use rs1 and rs2.
+                    let rs2 = ctx.load_register(insn.rs2)?;
+
+                    match kind {
+                        ADD => rs1.wrapping_add(rs2),
+                        SUB => rs1.wrapping_sub(rs2),
+                        XOR => rs1 ^ rs2,
+                        OR => rs1 | rs2,
+                        AND => rs1 & rs2,
+                        SLL => rs1 << (rs2 & 0x1f),
+                        SRL => rs1 >> (rs2 & 0x1f),
+                        SRA => ((rs1 as i32) >> (rs2 & 0x1f)) as u32,
+                        SLT => {
+                            if (rs1 as i32) < (rs2 as i32) {
+                                1
+                            } else {
+                                0
                             }
-                            MUL => rs1.wrapping_mul(rs2),
-                            MULH => {
-                                (sign_extend_u32(rs1).wrapping_mul(sign_extend_u32(rs2)) >> 32)
-                                    as u32
+                        }
+                        SLTU => {
+                            if rs1 < rs2 {
+                                1
+                            } else {
+                                0
                             }
-                            MULHSU => (sign_extend_u32(rs1).wrapping_mul(rs2 as i64) >> 32) as u32,
-                            MULHU => (((rs1 as u64).wrapping_mul(rs2 as u64)) >> 32) as u32,
-                            DIV => {
-                                if rs2 == 0 {
-                                    u32::MAX
-                                } else {
-                                    ((rs1 as i32).wrapping_div(rs2 as i32)) as u32
-                                }
+                        }
+                        MUL => rs1.wrapping_mul(rs2),
+                        MULH => {
+                            (sign_extend_u32(rs1).wrapping_mul(sign_extend_u32(rs2)) >> 32) as u32
+                        }
+                        MULHSU => (sign_extend_u32(rs1).wrapping_mul(rs2 as i64) >> 32) as u32,
+                        MULHU => (((rs1 as u64).wrapping_mul(rs2 as u64)) >> 32) as u32,
+                        DIV => {
+                            if rs2 == 0 {
+                                u32::MAX
+                            } else {
+                                ((rs1 as i32).wrapping_div(rs2 as i32)) as u32
                             }
-                            DIVU => {
-                                if rs2 == 0 {
-                                    u32::MAX
-                                } else {
-                                    rs1 / rs2
-                                }
+                        }
+                        DIVU => {
+                            if rs2 == 0 {
+                                u32::MAX
+                            } else {
+                                rs1 / rs2
                             }
-                            REM => {
-                                if rs2 == 0 {
-                                    rs1
-                                } else {
-                                    ((rs1 as i32).wrapping_rem(rs2 as i32)) as u32
-                                }
+                        }
+                        REM => {
+                            if rs2 == 0 {
+                                rs1
+                            } else {
+                                ((rs1 as i32).wrapping_rem(rs2 as i32)) as u32
                             }
-                            REMU => {
-                                if rs2 == 0 {
-                                    rs1
-                                } else {
-                                    rs1 % rs2
-                                }
+                        }
+                        REMU => {
+                            if rs2 == 0 {
+                                rs1
+                            } else {
+                                rs1 % rs2
                             }
-
-                            _ => unreachable!("Illegal compute instruction: {:?}", kind),
                         }
+
+                        _ => unreachable!("Illegal compute instruction: {:?}", kind),
                     }
                 }
             }
-        };
-        if !new_pc.is_aligned() {
-            return ctx.trap(TrapCause::InstructionAddressMisaligned);
         }
-        ctx.store_register(decoded.rd_internal() as usize, out)?;
-        ctx.set_pc(new_pc);
-        Ok(true)
+    };
+    if !new_pc.is_aligned() {
+        return ctx.trap(TrapCause::InstructionAddressMisaligned);
     }
+    ctx.store_register(insn.rd_internal() as usize, out)?;
+    ctx.set_pc(new_pc);
+    Ok(true)
+}
 
-    fn step_branch<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        use InsnKind::*;
-
-        let pc = ctx.get_pc();
-        let rs1 = ctx.load_register(decoded.rs1 as RegIdx)?;
-        let rs2 = ctx.load_register(decoded.rs2 as RegIdx)?;
-
-        let taken = match kind {
-            BEQ => rs1 == rs2,
-            BNE => rs1 != rs2,
-            BLT => (rs1 as i32) < (rs2 as i32),
-            BGE => (rs1 as i32) >= (rs2 as i32),
-            BLTU => rs1 < rs2,
-            BGEU => rs1 >= rs2,
-            _ => unreachable!("Illegal branch instruction: {:?}", kind),
-        };
-
-        let new_pc = if taken {
-            pc.wrapping_add(decoded.imm_b())
-        } else {
-            pc + WORD_SIZE
-        };
-
-        if !new_pc.is_aligned() {
-            return ctx.trap(TrapCause::InstructionAddressMisaligned);
-        }
-        ctx.set_pc(new_pc);
-        Ok(true)
-    }
+fn step_branch<M: EmuContext>(ctx: &mut M, kind: InsnKind, decoded: &Instruction) -> Result<bool> {
+    use super::InsnKind::*;
+
+    let pc = ctx.get_pc();
+    let rs1 = ctx.load_register(decoded.rs1 as RegIdx)?;
+    let rs2 = ctx.load_register(decoded.rs2 as RegIdx)?;
+
+    let taken = match kind {
+        BEQ => rs1 == rs2,
+        BNE => rs1 != rs2,
+        BLT => (rs1 as i32) < (rs2 as i32),
+        BGE => (rs1 as i32) >= (rs2 as i32),
+        BLTU => rs1 < rs2,
+        BGEU => rs1 >= rs2,
+        _ => unreachable!("Illegal branch instruction: {:?}", kind),
+    };
+
+    let new_pc = if taken {
+        pc.wrapping_add(decoded.imm as u32)
+    } else {
+        pc + WORD_SIZE
+    };
+
+    if !new_pc.is_aligned() {
+        return ctx.trap(TrapCause::InstructionAddressMisaligned);
+    }
+    ctx.set_pc(new_pc);
+    Ok(true)
+}
 
-    fn step_load<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        let rs1 = ctx.load_register(decoded.rs1 as usize)?;
-        // LOAD instructions do not read rs2.
-        let addr = ByteAddr(rs1.wrapping_add(decoded.imm_i()));
-        if !ctx.check_data_load(addr) {
-            return ctx.trap(TrapCause::LoadAccessFault(addr));
+fn step_load<M: EmuContext>(ctx: &mut M, kind: InsnKind, decoded: &Instruction) -> Result<bool> {
+    let rs1 = ctx.load_register(decoded.rs1)?;
+    // LOAD instructions do not read rs2.
+    let addr = ByteAddr(rs1.wrapping_add_signed(decoded.imm));
+    if !ctx.check_data_load(addr) {
+        return ctx.trap(TrapCause::LoadAccessFault(addr));
+    }
+    let data = ctx.load_memory(addr.waddr())?;
+    let shift = 8 * (addr.0 & 3);
+    let out = match kind {
+        InsnKind::LB => {
+            let mut out = (data >> shift) & 0xff;
+            if out & 0x80 != 0 {
+                out |= 0xffffff00;
+            }
+            out
         }
-        let data = ctx.load_memory(addr.waddr())?;
-        let shift = 8 * (addr.0 & 3);
-        let out = match kind {
-            InsnKind::LB => {
-                let mut out = (data >> shift) & 0xff;
-                if out & 0x80 != 0 {
-                    out |= 0xffffff00;
-                }
-                out
+        InsnKind::LH => {
+            if addr.0 & 0x01 != 0 {
+                return ctx.trap(TrapCause::LoadAddressMisaligned);
             }
-            InsnKind::LH => {
-                if addr.0 & 0x01 != 0 {
-                    return ctx.trap(TrapCause::LoadAddressMisaligned);
-                }
-                let mut out = (data >> shift) & 0xffff;
-                if out & 0x8000 != 0 {
-                    out |= 0xffff0000;
-                }
-                out
+            let mut out = (data >> shift) & 0xffff;
+            if out & 0x8000 != 0 {
+                out |= 0xffff0000;
             }
-            InsnKind::LW => {
-                if addr.0 & 0x03 != 0 {
-                    return ctx.trap(TrapCause::LoadAddressMisaligned);
-                }
-                data
+            out
+        }
+        InsnKind::LW => {
+            if addr.0 & 0x03 != 0 {
+                return ctx.trap(TrapCause::LoadAddressMisaligned);
             }
-            InsnKind::LBU => (data >> shift) & 0xff,
-            InsnKind::LHU => {
-                if addr.0 & 0x01 != 0 {
-                    return ctx.trap(TrapCause::LoadAddressMisaligned);
-                }
-                (data >> shift) & 0xffff
+            data
+        }
+        InsnKind::LBU => (data >> shift) & 0xff,
+        InsnKind::LHU => {
+            if addr.0 & 0x01 != 0 {
+                return ctx.trap(TrapCause::LoadAddressMisaligned);
             }
-            _ => unreachable!(),
-        };
-        ctx.store_register(decoded.rd_internal() as usize, out)?;
-        ctx.set_pc(ctx.get_pc() + WORD_SIZE);
-        Ok(true)
-    }
+            (data >> shift) & 0xffff
+        }
+        _ => unreachable!(),
+    };
+    ctx.store_register(decoded.rd_internal() as usize, out)?;
+    ctx.set_pc(ctx.get_pc() + WORD_SIZE);
+    Ok(true)
+}
 
-    fn step_store<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        let rs1 = ctx.load_register(decoded.rs1 as usize)?;
-        let rs2 = ctx.load_register(decoded.rs2 as usize)?;
-        let addr = ByteAddr(rs1.wrapping_add(decoded.imm_s()));
-        let shift = 8 * (addr.0 & 3);
-        if !ctx.check_data_store(addr) {
-            tracing::error!("mstore: addr={:x?},rs1={:x}", addr, rs1);
-            return ctx.trap(TrapCause::StoreAccessFault);
+fn step_store<M: EmuContext>(ctx: &mut M, kind: InsnKind, decoded: &Instruction) -> Result<bool> {
+    let rs1 = ctx.load_register(decoded.rs1)?;
+    let rs2 = ctx.load_register(decoded.rs2)?;
+    let addr = ByteAddr(rs1.wrapping_add(decoded.imm as u32));
+    let shift = 8 * (addr.0 & 3);
+    if !ctx.check_data_store(addr) {
+        tracing::error!("mstore: addr={:x?},rs1={:x}", addr, rs1);
+        return ctx.trap(TrapCause::StoreAccessFault);
+    }
+    let mut data = ctx.peek_memory(addr.waddr());
+    match kind {
+        InsnKind::SB => {
+            data ^= data & (0xff << shift);
+            data |= (rs2 & 0xff) << shift;
         }
-        let mut data = ctx.peek_memory(addr.waddr());
-        match kind {
-            InsnKind::SB => {
-                data ^= data & (0xff << shift);
-                data |= (rs2 & 0xff) << shift;
+        InsnKind::SH => {
+            if addr.0 & 0x01 != 0 {
+                tracing::debug!("Misaligned SH");
+                return ctx.trap(TrapCause::StoreAddressMisaligned(addr));
             }
-            InsnKind::SH => {
-                if addr.0 & 0x01 != 0 {
-                    tracing::debug!("Misaligned SH");
-                    return ctx.trap(TrapCause::StoreAddressMisaligned(addr));
-                }
-                data ^= data & (0xffff << shift);
-                data |= (rs2 & 0xffff) << shift;
-            }
-            InsnKind::SW => {
-                if addr.0 & 0x03 != 0 {
-                    tracing::debug!("Misaligned SW");
-                    return ctx.trap(TrapCause::StoreAddressMisaligned(addr));
-                }
-                data = rs2;
+            data ^= data & (0xffff << shift);
+            data |= (rs2 & 0xffff) << shift;
+        }
+        InsnKind::SW => {
+            if addr.0 & 0x03 != 0 {
+                tracing::debug!("Misaligned SW");
+                return ctx.trap(TrapCause::StoreAddressMisaligned(addr));
             }
-            _ => unreachable!(),
+            data = rs2;
         }
-        ctx.store_memory(addr.waddr(), data)?;
-        ctx.set_pc(ctx.get_pc() + WORD_SIZE);
-        Ok(true)
+        _ => unreachable!(),
     }
+    ctx.store_memory(addr.waddr(), data)?;
+    ctx.set_pc(ctx.get_pc() + WORD_SIZE);
+    Ok(true)
+}
 
-    fn step_system<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        match kind {
-            InsnKind::EANY => match decoded.rs2 {
-                0 => ctx.ecall(),
-                1 => ctx.trap(TrapCause::Breakpoint),
-                _ => ctx.trap(TrapCause::IllegalInstruction(decoded.insn)),
-            },
-            _ => unreachable!(),
-        }
+fn step_system<M: EmuContext>(ctx: &mut M, kind: InsnKind, decoded: &Instruction) -> Result<bool> {
+    match kind {
+        InsnKind::ECALL => ctx.ecall(),
+        _ => ctx.trap(TrapCause::IllegalInstruction(decoded.raw)),
     }
 }
 
diff --git a/ceno_emul/src/rv32im_encode.rs b/ceno_emul/src/rv32im_encode.rs
deleted file mode 100644
index b6a80f32b..000000000
--- a/ceno_emul/src/rv32im_encode.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-use crate::{InsnKind, rv32im::InsnFormat};
-
-const MASK_4_BITS: u32 = 0xF;
-const MASK_5_BITS: u32 = 0x1F;
-const MASK_6_BITS: u32 = 0x3F;
-const MASK_7_BITS: u32 = 0x7F;
-const MASK_8_BITS: u32 = 0xFF;
-const MASK_10_BITS: u32 = 0x3FF;
-const MASK_12_BITS: u32 = 0xFFF;
-
-/// Generate bit encoding of a RISC-V instruction.
-///
-/// Values `rs1`, `rs2` and `rd1` are 5-bit register indices, and `imm` is of
-/// bit length depending on the requirements of the instruction format type.
-///
-/// Fields not required by the instruction's format type are ignored, so one can
-/// safely pass an arbitrary value for these, say 0.
-pub const fn encode_rv32(kind: InsnKind, rs1: u32, rs2: u32, rd: u32, imm: u32) -> u32 {
-    match kind.codes().format {
-        InsnFormat::R => encode_r(kind, rs1, rs2, rd),
-        InsnFormat::I => encode_i(kind, rs1, rd, imm),
-        InsnFormat::S => encode_s(kind, rs1, rs2, imm),
-        InsnFormat::B => encode_b(kind, rs1, rs2, imm),
-        InsnFormat::U => encode_u(kind, rd, imm),
-        InsnFormat::J => encode_j(kind, rd, imm),
-    }
-}
-
-// R-Type
-//        25    20    15       12   7       0
-// +------+-----+-----+--------+----+-------+
-// funct7 | rs2 | rs1 | funct3 | rd | opcode
-const fn encode_r(kind: InsnKind, rs1: u32, rs2: u32, rd: u32) -> u32 {
-    let rs2 = rs2 & MASK_5_BITS; // 5-bits mask
-    let rs1 = rs1 & MASK_5_BITS;
-    let rd = rd & MASK_5_BITS;
-    let func7 = kind.codes().func7;
-    let func3 = kind.codes().func3;
-    let opcode = kind.codes().opcode;
-    func7 << 25 | rs2 << 20 | rs1 << 15 | func3 << 12 | rd << 7 | opcode
-}
-
-// I-Type
-//           20    15       12   7       0
-// +---------+-----+--------+----+-------+
-// imm[0:11] | rs1 | funct3 | rd | opcode
-const fn encode_i(kind: InsnKind, rs1: u32, rd: u32, imm: u32) -> u32 {
-    let rs1 = rs1 & MASK_5_BITS;
-    let rd = rd & MASK_5_BITS;
-    let func3 = kind.codes().func3;
-    let opcode = kind.codes().opcode;
-    // SRLI/SRAI use a specialization of the I-type format with the shift type in imm[10].
-    let is_arithmetic_right_shift = (matches!(kind, InsnKind::SRAI) as u32) << 10;
-    let imm = imm & MASK_12_BITS | is_arithmetic_right_shift;
-    imm << 20 | rs1 << 15 | func3 << 12 | rd << 7 | opcode
-}
-
-// S-Type
-//           25    20    15       12         7       0
-// +---------+-----+-----+--------+----------+-------+
-// imm[5:11] | rs2 | rs1 | funct3 | imm[0:4] | opcode
-const fn encode_s(kind: InsnKind, rs1: u32, rs2: u32, imm: u32) -> u32 {
-    let rs2 = rs2 & MASK_5_BITS;
-    let rs1 = rs1 & MASK_5_BITS;
-    let func3 = kind.codes().func3;
-    let opcode = kind.codes().opcode;
-    let imm_lo = imm & MASK_5_BITS;
-    let imm_hi = (imm >> 5) & MASK_7_BITS; // 7-bits mask
-    imm_hi << 25 | rs2 << 20 | rs1 << 15 | func3 << 12 | imm_lo << 7 | opcode
-}
-
-// B-Type
-//         31          25    20    15       12         8         7       0
-// +-------+-----------+-----+-----+--------+----------+---------+-------+
-// imm[12] | imm[5:10] | rs2 | rs1 | funct3 | imm[1:4] | imm[11] | opcode
-const fn encode_b(kind: InsnKind, rs1: u32, rs2: u32, imm: u32) -> u32 {
-    let rs2 = rs2 & MASK_5_BITS;
-    let rs1 = rs1 & MASK_5_BITS;
-    let func3 = kind.codes().func3;
-    let opcode = kind.codes().opcode;
-    let imm_1_4 = (imm >> 1) & MASK_4_BITS; // skip imm[0]
-    let imm_5_10 = (imm >> 5) & MASK_6_BITS;
-    ((imm >> 12) & 1) << 31
-        | imm_5_10 << 25
-        | rs2 << 20
-        | rs1 << 15
-        | func3 << 12
-        | imm_1_4 << 8
-        | ((imm >> 11) & 1) << 7
-        | opcode
-}
-
-// J-Type
-//         31          21        20           12   7       0
-// +-------+-----------+---------+------------+----+-------+
-// imm[20] | imm[1:10] | imm[11] | imm[12:19] | rd | opcode
-const fn encode_j(kind: InsnKind, rd: u32, imm: u32) -> u32 {
-    let rd = rd & MASK_5_BITS;
-    let opcode = kind.codes().opcode;
-    let imm_1_10 = (imm >> 1) & MASK_10_BITS; // skip imm[0]
-    let imm_12_19 = (imm >> 12) & MASK_8_BITS;
-    ((imm >> 20) & 1) << 31
-        | imm_1_10 << 21
-        | ((imm >> 11) & 1) << 20
-        | imm_12_19 << 12
-        | rd << 7
-        | opcode
-}
-
-// U-Type
-//            12   7        0
-// +----------+----+--------+
-// imm[12:31] | rd | opcode
-const fn encode_u(kind: InsnKind, rd: u32, imm: u32) -> u32 {
-    (imm >> 12) << 12 | (rd & MASK_5_BITS) << 7 | kind.codes().opcode
-}
diff --git a/ceno_emul/src/tracer.rs b/ceno_emul/src/tracer.rs
index a1df363e2..01ad33e01 100644
--- a/ceno_emul/src/tracer.rs
+++ b/ceno_emul/src/tracer.rs
@@ -1,10 +1,9 @@
 use std::{collections::HashMap, fmt, mem};
 
 use crate::{
-    CENO_PLATFORM, InsnKind, PC_STEP_SIZE, Platform,
+    CENO_PLATFORM, InsnKind, Instruction, PC_STEP_SIZE, Platform,
     addr::{ByteAddr, Cycle, RegIdx, Word, WordAddr},
     encode_rv32,
-    rv32im::DecodedInstruction,
 };
 
 /// An instruction and its context in an execution trace. That is concrete values of registers and memory.
@@ -22,7 +21,7 @@ use crate::{
 pub struct StepRecord {
     cycle: Cycle,
     pc: Change<ByteAddr>,
-    insn_code: Word,
+    pub insn: Instruction,
 
     rs1: Option<ReadOp>,
     rs2: Option<ReadOp>,
@@ -57,7 +56,7 @@ impl StepRecord {
     pub fn new_r_instruction(
         cycle: Cycle,
         pc: ByteAddr,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rs2_read: Word,
         rd: Change<Word>,
@@ -79,7 +78,7 @@ impl StepRecord {
     pub fn new_b_instruction(
         cycle: Cycle,
         pc: Change<ByteAddr>,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rs2_read: Word,
         prev_cycle: Cycle,
@@ -99,7 +98,7 @@ impl StepRecord {
     pub fn new_i_instruction(
         cycle: Cycle,
         pc: Change<ByteAddr>,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rd: Change<Word>,
         prev_cycle: Cycle,
@@ -119,7 +118,7 @@ impl StepRecord {
     pub fn new_im_instruction(
         cycle: Cycle,
         pc: ByteAddr,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rd: Change<Word>,
         mem_op: ReadOp,
@@ -148,7 +147,7 @@ impl StepRecord {
     pub fn new_u_instruction(
         cycle: Cycle,
         pc: ByteAddr,
-        insn_code: u32,
+        insn_code: Instruction,
         rd: Change<Word>,
         prev_cycle: Cycle,
     ) -> StepRecord {
@@ -159,7 +158,7 @@ impl StepRecord {
     pub fn new_j_instruction(
         cycle: Cycle,
         pc: Change<ByteAddr>,
-        insn_code: u32,
+        insn_code: Instruction,
         rd: Change<Word>,
         prev_cycle: Cycle,
     ) -> StepRecord {
@@ -169,7 +168,7 @@ impl StepRecord {
     pub fn new_s_instruction(
         cycle: Cycle,
         pc: ByteAddr,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rs2_read: Word,
         memory_op: WriteOp,
@@ -194,7 +193,7 @@ impl StepRecord {
         Self::new_insn(
             cycle,
             Change::new(pc, pc + PC_STEP_SIZE),
-            encode_rv32(InsnKind::EANY, 0, 0, 0, 0),
+            encode_rv32(InsnKind::ECALL, 0, 0, 0, 0),
             Some(value),
             Some(value),
             Some(Change::new(value, value)),
@@ -214,25 +213,23 @@ impl StepRecord {
     fn new_insn(
         cycle: Cycle,
         pc: Change<ByteAddr>,
-        insn_code: u32,
+        insn: Instruction,
         rs1_read: Option<Word>,
         rs2_read: Option<Word>,
         rd: Option<Change<Word>>,
         memory_op: Option<WriteOp>,
         previous_cycle: Cycle,
     ) -> StepRecord {
-        let insn = DecodedInstruction::new(insn_code);
         StepRecord {
             cycle,
             pc,
-            insn_code,
             rs1: rs1_read.map(|rs1| ReadOp {
-                addr: Platform::register_vma(insn.rs1() as RegIdx).into(),
+                addr: Platform::register_vma(insn.rs1).into(),
                 value: rs1,
                 previous_cycle,
             }),
             rs2: rs2_read.map(|rs2| ReadOp {
-                addr: Platform::register_vma(insn.rs2() as RegIdx).into(),
+                addr: Platform::register_vma(insn.rs2).into(),
                 value: rs2,
                 previous_cycle,
             }),
@@ -241,6 +238,7 @@ impl StepRecord {
                 value: rd,
                 previous_cycle,
             }),
+            insn,
             memory_op,
         }
     }
@@ -253,14 +251,9 @@ impl StepRecord {
         self.pc
     }
 
-    /// The instruction as a raw code.
-    pub fn insn_code(&self) -> Word {
-        self.insn_code
-    }
-
     /// The instruction as a decoded structure.
-    pub fn insn(&self) -> DecodedInstruction {
-        DecodedInstruction::new(self.insn_code)
+    pub fn insn(&self) -> Instruction {
+        self.insn
     }
 
     pub fn rs1(&self) -> Option<ReadOp> {
@@ -327,12 +320,9 @@ impl Tracer {
         self.record.pc.after = pc;
     }
 
-    // TODO(Matthias): this should perhaps record `DecodedInstruction`s instead
-    // of raw codes, or perhaps only the pc, because we can always look up the
-    // instruction in the program.
-    pub fn fetch(&mut self, pc: WordAddr, value: Word) {
+    pub fn fetch(&mut self, pc: WordAddr, value: Instruction) {
         self.record.pc.before = pc.baddr();
-        self.record.insn_code = value;
+        self.record.insn = value;
     }
 
     pub fn load_register(&mut self, idx: RegIdx, value: Word) {
diff --git a/ceno_emul/src/vm_state.rs b/ceno_emul/src/vm_state.rs
index e071cb000..838779979 100644
--- a/ceno_emul/src/vm_state.rs
+++ b/ceno_emul/src/vm_state.rs
@@ -5,7 +5,7 @@ use crate::{
     PC_STEP_SIZE, Program, WORD_SIZE,
     addr::{ByteAddr, RegIdx, Word, WordAddr},
     platform::Platform,
-    rv32im::{DecodedInstruction, Emulator, TrapCause},
+    rv32im::{Instruction, TrapCause},
     tracer::{Change, StepRecord, Tracer},
 };
 use anyhow::{Result, anyhow};
@@ -77,18 +77,17 @@ impl VMState {
     }
 
     pub fn iter_until_halt(&mut self) -> impl Iterator<Item = Result<StepRecord>> + '_ {
-        let emu = Emulator::new();
         from_fn(move || {
             if self.halted() {
                 None
             } else {
-                Some(self.step(&emu))
+                Some(self.step())
             }
         })
     }
 
-    fn step(&mut self, emu: &Emulator) -> Result<StepRecord> {
-        emu.step(self)?;
+    fn step(&mut self) -> Result<StepRecord> {
+        crate::rv32im::step(self)?;
         let step = self.tracer.advance();
         if step.is_busy_loop() && !self.halted() {
             Err(anyhow!("Stuck in loop {}", "{}"))
@@ -121,7 +120,7 @@ impl EmuContext for VMState {
             // Treat unknown ecalls as all powerful instructions:
             // Read two registers, write one register, write one memory word, and branch.
             tracing::warn!("ecall ignored: syscall_id={}", function);
-            self.store_register(DecodedInstruction::RD_NULL as RegIdx, 0)?;
+            self.store_register(Instruction::RD_NULL as RegIdx, 0)?;
             // Example ecall effect - any writable address will do.
             let addr = (self.platform.stack_top - WORD_SIZE as u32).into();
             self.store_memory(addr, self.peek_memory(addr))?;
@@ -136,7 +135,7 @@ impl EmuContext for VMState {
         Err(anyhow!("Trap {:?}", cause)) // Crash.
     }
 
-    fn on_normal_end(&mut self, _decoded: &DecodedInstruction) {
+    fn on_normal_end(&mut self, _decoded: &Instruction) {
         self.tracer.store_pc(ByteAddr(self.pc));
     }
 
@@ -189,8 +188,7 @@ impl EmuContext for VMState {
         *self.memory.get(&addr).unwrap_or(&0)
     }
 
-    // TODO(Matthias): this should really return `Result<DecodedInstruction>`
-    fn fetch(&mut self, pc: WordAddr) -> Option<Word> {
+    fn fetch(&mut self, pc: WordAddr) -> Option<Instruction> {
         let byte_pc: ByteAddr = pc.into();
         let relative_pc = byte_pc.0.wrapping_sub(self.program.base_address);
         let idx = (relative_pc / WORD_SIZE as u32) as usize;
@@ -206,8 +204,4 @@ impl EmuContext for VMState {
     fn check_data_store(&self, addr: ByteAddr) -> bool {
         self.platform.can_write(addr.0)
     }
-
-    fn check_insn_load(&self, addr: ByteAddr) -> bool {
-        self.platform.can_execute(addr.0)
-    }
 }
diff --git a/ceno_emul/tests/test_elf.rs b/ceno_emul/tests/test_elf.rs
index ca7a14c1c..7448d4508 100644
--- a/ceno_emul/tests/test_elf.rs
+++ b/ceno_emul/tests/test_elf.rs
@@ -15,7 +15,7 @@ fn test_ceno_rt_panic() -> Result<()> {
     let mut state = VMState::new_from_elf(CENO_PLATFORM, program_elf)?;
     let steps = run(&mut state)?;
     let last = steps.last().unwrap();
-    assert_eq!(last.insn().codes().kind, InsnKind::EANY);
+    assert_eq!(last.insn().kind, InsnKind::ECALL);
     assert_eq!(last.rs1().unwrap().value, Platform::ecall_halt());
     assert_eq!(last.rs2().unwrap().value, 1); // panic / halt(1)
     Ok(())
diff --git a/ceno_emul/tests/test_vm_trace.rs b/ceno_emul/tests/test_vm_trace.rs
index 2aa5f0da2..ef3b8820e 100644
--- a/ceno_emul/tests/test_vm_trace.rs
+++ b/ceno_emul/tests/test_vm_trace.rs
@@ -6,8 +6,8 @@ use std::{
 };
 
 use ceno_emul::{
-    CENO_PLATFORM, Cycle, EmuContext, InsnKind, Platform, Program, StepRecord, Tracer, VMState,
-    WORD_SIZE, WordAddr,
+    CENO_PLATFORM, Cycle, EmuContext, InsnKind, Instruction, Platform, Program, StepRecord, Tracer,
+    VMState, WordAddr, encode_rv32,
 };
 
 #[test]
@@ -15,17 +15,8 @@ fn test_vm_trace() -> Result<()> {
     let program = Program::new(
         CENO_PLATFORM.pc_base(),
         CENO_PLATFORM.pc_base(),
-        PROGRAM_FIBONACCI_20.to_vec(),
-        PROGRAM_FIBONACCI_20
-            .iter()
-            .enumerate()
-            .map(|(insn_idx, &insn)| {
-                (
-                    CENO_PLATFORM.pc_base() + (WORD_SIZE * insn_idx) as u32,
-                    insn,
-                )
-            })
-            .collect(),
+        program_fibonacci_20(),
+        Default::default(),
     );
     let mut ctx = VMState::new(CENO_PLATFORM, Arc::new(program));
 
@@ -36,7 +27,7 @@ fn test_vm_trace() -> Result<()> {
     assert_eq!(ctx.peek_register(2), x2);
     assert_eq!(ctx.peek_register(3), x3);
 
-    let ops: Vec<InsnKind> = steps.iter().map(|step| step.insn().codes().kind).collect();
+    let ops: Vec<InsnKind> = steps.iter().map(|step| step.insn().kind).collect();
     assert_eq!(ops, expected_ops_fibonacci_20());
 
     assert_eq!(
@@ -66,27 +57,25 @@ fn run(state: &mut VMState) -> Result<Vec<StepRecord>> {
 }
 
 /// Example in RISC-V bytecode and assembly.
-const PROGRAM_FIBONACCI_20: [u32; 7] = [
-    // x1 = 10;
-    // x3 = 1;
-    // immediate    rs1  f3   rd   opcode
-    0b_000000001010_00000_000_00001_0010011, // addi x1, x0, 10
-    0b_000000000001_00000_000_00011_0010011, // addi x3, x0, 1
-    // loop {
-    //     x1 -= 1;
-    // immediate    rs1  f3   rd   opcode
-    0b_111111111111_00001_000_00001_0010011, // addi x1, x1, -1
-    //     x2 += x3;
-    //     x3 += x2;
-    // zeros   rs2   rs1   f3  rd    opcode
-    0b_0000000_00011_00010_000_00010_0110011, // add x2, x2, x3
-    0b_0000000_00011_00010_000_00011_0110011, // add x3, x2, x3
-    //     if x1 == 0 { break }
-    // imm      rs2   rs1   f3  imm    opcode
-    0b_1_111111_00000_00001_001_1010_1_1100011, // bne x1, x0, -12
-    // ecall HALT, SUCCESS
-    0b_000000000000_00000_000_00000_1110011,
-];
+pub fn program_fibonacci_20() -> Vec<Instruction> {
+    vec![
+        // x1 = 10;
+        // x3 = 1;
+        encode_rv32(InsnKind::ADDI, 0, 0, 1, 10),
+        encode_rv32(InsnKind::ADDI, 0, 0, 3, 1),
+        // loop {
+        //     x1 -= 1;
+        encode_rv32(InsnKind::ADDI, 1, 0, 1, -1),
+        //     x2 += x3;
+        //     x3 += x2;
+        encode_rv32(InsnKind::ADD, 2, 3, 2, 0),
+        encode_rv32(InsnKind::ADD, 2, 3, 3, 0),
+        //     if x1 == 0 { break }
+        encode_rv32(InsnKind::BNE, 1, 0, 0, -12),
+        // ecall HALT, SUCCESS
+        encode_rv32(InsnKind::ECALL, 0, 0, 0, 0),
+    ]
+}
 
 /// Rust version of the example. Reconstruct the output.
 fn expected_fibonacci_20() -> (u32, u32, u32) {
@@ -115,7 +104,7 @@ fn expected_ops_fibonacci_20() -> Vec<InsnKind> {
     for _ in 0..10 {
         ops.extend(&[ADDI, ADD, ADD, BNE]);
     }
-    ops.push(EANY);
+    ops.push(ECALL);
     ops
 }
 
diff --git a/ceno_zkvm/Makefile.toml b/ceno_zkvm/Makefile.toml
index a4965ba50..45d27cf51 100644
--- a/ceno_zkvm/Makefile.toml
+++ b/ceno_zkvm/Makefile.toml
@@ -1,9 +1,7 @@
 [env]
 CARGO_MAKE_EXTEND_WORKSPACE_MAKEFILE = true
-CORE = { script = ["nproc"] }
-RAYON_NUM_THREADS = "${CORE}"
 
 [tasks.riscv_opcodes_flamegraph]
 args = ["run", "--package", "ceno_zkvm", "--release", "--example", "riscv_opcodes"]
 command = "cargo"
-env = { "RUST_LOG" = "debug", "RAYON_NUM_THREADS" = "8" }
+env = { "RUST_LOG" = "debug" }
diff --git a/ceno_zkvm/examples/riscv_opcodes.rs b/ceno_zkvm/examples/riscv_opcodes.rs
index fe119bb25..81d1f6eb3 100644
--- a/ceno_zkvm/examples/riscv_opcodes.rs
+++ b/ceno_zkvm/examples/riscv_opcodes.rs
@@ -1,7 +1,6 @@
 use std::{panic, sync::Arc, time::Instant};
 
 use ceno_zkvm::{
-    declare_program,
     instructions::riscv::{MemPadder, MmuConfig, Rv32imConfig, constants::EXIT_PC},
     scheme::{mock_prover::MockProver, prover::ZKVMProver},
     state::GlobalState,
@@ -13,8 +12,9 @@ use clap::Parser;
 
 use ceno_emul::{
     CENO_PLATFORM, EmuContext,
-    InsnKind::{ADD, BLTU, EANY, LUI, LW},
-    PC_WORD_SIZE, Platform, Program, StepRecord, Tracer, VMState, Word, WordAddr, encode_rv32,
+    InsnKind::{ADD, ADDI, BLTU, ECALL, LW},
+    Instruction, Platform, Program, StepRecord, Tracer, VMState, Word, WordAddr, encode_rv32,
+    encode_rv32u,
 };
 use ceno_zkvm::{
     scheme::{PublicValues, constants::MAX_NUM_VARIABLES, verifier::ZKVMVerifier},
@@ -28,33 +28,26 @@ use mpcs::{Basefold, BasefoldRSParams, PolynomialCommitmentScheme};
 use sumcheck::macros::{entered_span, exit_span};
 use tracing_subscriber::{EnvFilter, Registry, fmt, fmt::format::FmtSpan, layer::SubscriberExt};
 use transcript::BasicTranscript as Transcript;
-const PROGRAM_SIZE: usize = 16;
 // For now, we assume registers
 //  - x0 is not touched,
 //  - x1 is initialized to 1,
 //  - x2 is initialized to -1,
 //  - x3 is initialized to loop bound.
 // we use x4 to hold the acc_sum.
-#[allow(clippy::unusual_byte_groupings)]
-const ECALL_HALT: u32 = 0b_000000000000_00000_000_00000_1110011;
-#[allow(clippy::unusual_byte_groupings)]
-const PROGRAM_CODE: [u32; PROGRAM_SIZE] = {
-    let mut program: [u32; PROGRAM_SIZE] = [ECALL_HALT; PROGRAM_SIZE];
-    declare_program!(
-        program,
-        encode_rv32(LUI, 0, 0, 10, CENO_PLATFORM.public_io.start), // lui x10, public_io
-        encode_rv32(LW, 10, 0, 1, 0),                              // lw x1, 0(x10)
-        encode_rv32(LW, 10, 0, 2, 4),                              // lw x2, 4(x10)
-        encode_rv32(LW, 10, 0, 3, 8),                              // lw x3, 8(x10)
+fn program_code() -> Vec<Instruction> {
+    vec![
+        encode_rv32u(ADDI, 0, 0, 10, CENO_PLATFORM.public_io.start), // lui x10, public_io
+        encode_rv32(LW, 10, 0, 1, 0),                                // lw x1, 0(x10)
+        encode_rv32(LW, 10, 0, 2, 4),                                // lw x2, 4(x10)
+        encode_rv32(LW, 10, 0, 3, 8),                                // lw x3, 8(x10)
         // Main loop.
-        encode_rv32(ADD, 1, 4, 4, 0),              // add x4, x1, x4
-        encode_rv32(ADD, 2, 3, 3, 0),              // add x3, x2, x3
-        encode_rv32(BLTU, 0, 3, 0, -8_i32 as u32), // bltu x0, x3, -8
+        encode_rv32(ADD, 1, 4, 4, 0),       // add x4, x1, x4
+        encode_rv32(ADD, 2, 3, 3, 0),       // add x3, x2, x3
+        encode_rv32(BLTU, 0, 3, 0, -8_i32), // bltu x0, x3, -8
         // End.
-        ECALL_HALT, // ecall halt
-    );
-    program
-};
+        encode_rv32(ECALL, 0, 0, 0, 0), // ecall (halt)
+    ]
+}
 type ExampleProgramTableCircuit<E> = ProgramTableCircuit<E>;
 
 /// Simple program to greet a person
@@ -74,21 +67,14 @@ fn main() {
     let args = Args::parse();
     type E = GoldilocksExt2;
     type Pcs = Basefold<GoldilocksExt2, BasefoldRSParams>;
+    let program_code = program_code();
+    let program_size = program_code.len();
 
     let program = Program::new(
         CENO_PLATFORM.pc_base(),
         CENO_PLATFORM.pc_base(),
-        PROGRAM_CODE.to_vec(),
-        PROGRAM_CODE
-            .iter()
-            .enumerate()
-            .map(|(insn_idx, &insn)| {
-                (
-                    (insn_idx * PC_WORD_SIZE) as u32 + CENO_PLATFORM.pc_base(),
-                    insn,
-                )
-            })
-            .collect(),
+        program_code,
+        Default::default(),
     );
     let mem_addresses = CENO_PLATFORM.ram.clone();
     let io_addresses = CENO_PLATFORM.public_io.clone();
@@ -117,7 +103,7 @@ fn main() {
     let pcs_param = Pcs::setup(1 << MAX_NUM_VARIABLES).expect("Basefold PCS setup");
     let (pp, vp) = Pcs::trim(pcs_param, 1 << MAX_NUM_VARIABLES).expect("Basefold trim");
     let program_params = ProgramParams {
-        program_size: PROGRAM_SIZE,
+        program_size,
         ..Default::default()
     };
     let mut zkvm_cs = ZKVMConstraintSystem::new_with_platform(program_params);
@@ -193,8 +179,7 @@ fn main() {
             .iter()
             .rev()
             .find(|record| {
-                record.insn().codes().kind == EANY
-                    && record.rs1().unwrap().value == Platform::ecall_halt()
+                record.insn().kind == ECALL && record.rs1().unwrap().value == Platform::ecall_halt()
             })
             .expect("halt record not found");
 
diff --git a/ceno_zkvm/src/e2e.rs b/ceno_zkvm/src/e2e.rs
index b1d5b4d77..95bcb73be 100644
--- a/ceno_zkvm/src/e2e.rs
+++ b/ceno_zkvm/src/e2e.rs
@@ -11,8 +11,8 @@ use crate::{
     tables::{MemFinalRecord, MemInitRecord, ProgramTableCircuit, ProgramTableConfig},
 };
 use ceno_emul::{
-    ByteAddr, EmuContext, InsnKind::EANY, IterAddresses, Platform, Program, StepRecord, Tracer,
-    VMState, WORD_SIZE, WordAddr,
+    ByteAddr, EmuContext, InsnKind, IterAddresses, Platform, Program, StepRecord, Tracer, VMState,
+    WORD_SIZE, WordAddr,
 };
 use ff_ext::ExtensionField;
 use itertools::{Itertools, MinMaxResult, chain};
@@ -73,7 +73,7 @@ fn emulate_program(
         .iter()
         .rev()
         .find(|record| {
-            record.insn().codes().kind == EANY
+            record.insn().kind == InsnKind::ECALL
                 && record.rs1().unwrap().value == Platform::ecall_halt()
         })
         .and_then(|halt_record| halt_record.rs2())
@@ -537,9 +537,8 @@ fn format_segments(
 
 fn format_segment(platform: &Platform, addr: u32) -> String {
     format!(
-        "{}{}{}",
+        "{}{}",
         if platform.can_read(addr) { "R" } else { "-" },
         if platform.can_write(addr) { "W" } else { "-" },
-        if platform.can_execute(addr) { "X" } else { "-" },
     )
 }
diff --git a/ceno_zkvm/src/instructions/riscv.rs b/ceno_zkvm/src/instructions/riscv.rs
index de0dfc771..cc5e12b12 100644
--- a/ceno_zkvm/src/instructions/riscv.rs
+++ b/ceno_zkvm/src/instructions/riscv.rs
@@ -28,7 +28,6 @@ mod i_insn;
 mod insn_base;
 mod j_insn;
 mod r_insn;
-mod u_insn;
 
 mod ecall_insn;
 
@@ -37,15 +36,13 @@ mod memory;
 mod s_insn;
 #[cfg(test)]
 mod test;
-#[cfg(test)]
-mod test_utils;
 
 pub trait RIVInstruction {
     const INST_KIND: InsnKind;
 }
 
 pub use arith::{AddInstruction, SubInstruction};
-pub use jump::{AuipcInstruction, JalInstruction, JalrInstruction, LuiInstruction};
+pub use jump::{JalInstruction, JalrInstruction};
 pub use memory::{
     LbInstruction, LbuInstruction, LhInstruction, LhuInstruction, LwInstruction, SbInstruction,
     ShInstruction, SwInstruction,
diff --git a/ceno_zkvm/src/instructions/riscv/arith_imm.rs b/ceno_zkvm/src/instructions/riscv/arith_imm.rs
index c4c09629b..b7f50e898 100644
--- a/ceno_zkvm/src/instructions/riscv/arith_imm.rs
+++ b/ceno_zkvm/src/instructions/riscv/arith_imm.rs
@@ -89,7 +89,7 @@ mod test {
 
     use crate::{
         circuit_builder::{CircuitBuilder, ConstraintSystem},
-        instructions::{Instruction, riscv::test_utils::imm_i},
+        instructions::Instruction,
         scheme::mock_prover::{MOCK_PC_START, MockProver},
     };
 
@@ -110,7 +110,7 @@ mod test {
             .unwrap()
             .unwrap();
 
-        let insn_code = encode_rv32(InsnKind::ADDI, 2, 0, 4, imm_i(3));
+        let insn_code = encode_rv32(InsnKind::ADDI, 2, 0, 4, 3);
         let (raw_witin, lkm) = AddiInstruction::<GoldilocksExt2>::assign_instances(
             &config,
             cb.cs.num_witin as usize,
@@ -143,7 +143,8 @@ mod test {
             .unwrap()
             .unwrap();
 
-        let insn_code = encode_rv32(InsnKind::ADDI, 2, 0, 4, imm_i(-3));
+        let insn_code = encode_rv32(InsnKind::ADDI, 2, 0, 4, -3);
+
         let (raw_witin, lkm) = AddiInstruction::<GoldilocksExt2>::assign_instances(
             &config,
             cb.cs.num_witin as usize,
diff --git a/ceno_zkvm/src/instructions/riscv/branch/test.rs b/ceno_zkvm/src/instructions/riscv/branch/test.rs
index 9847c66d5..5b53ec904 100644
--- a/ceno_zkvm/src/instructions/riscv/branch/test.rs
+++ b/ceno_zkvm/src/instructions/riscv/branch/test.rs
@@ -5,7 +5,7 @@ use super::*;
 use crate::{
     circuit_builder::{CircuitBuilder, ConstraintSystem},
     error::ZKVMError,
-    instructions::{Instruction, riscv::test_utils::imm_b},
+    instructions::Instruction,
     scheme::mock_prover::{MOCK_PC_START, MockProver},
 };
 
@@ -32,7 +32,7 @@ fn impl_opcode_beq(equal: bool) {
         .unwrap()
         .unwrap();
 
-    let insn_code = encode_rv32(InsnKind::BEQ, 2, 3, 0, imm_b(8));
+    let insn_code = encode_rv32(InsnKind::BEQ, 2, 3, 0, 8);
     let pc_offset = if equal { 8 } else { PC_STEP_SIZE };
     let (raw_witin, lkm) =
         BeqInstruction::assign_instances(&config, cb.cs.num_witin as usize, vec![
@@ -70,7 +70,7 @@ fn impl_opcode_bne(equal: bool) {
         .unwrap()
         .unwrap();
 
-    let insn_code = encode_rv32(InsnKind::BNE, 2, 3, 0, imm_b(8));
+    let insn_code = encode_rv32(InsnKind::BNE, 2, 3, 0, 8);
     let pc_offset = if equal { PC_STEP_SIZE } else { 8 };
     let (raw_witin, lkm) =
         BneInstruction::assign_instances(&config, cb.cs.num_witin as usize, vec![
@@ -111,8 +111,8 @@ fn impl_bltu_circuit(taken: bool, a: u32, b: u32) -> Result<(), ZKVMError> {
         MOCK_PC_START + PC_STEP_SIZE
     };
 
-    let insn_code = encode_rv32(InsnKind::BLTU, 2, 3, 0, imm_b(-8));
-    println!("{:#b}", insn_code);
+    let insn_code = encode_rv32(InsnKind::BLTU, 2, 3, 0, -8);
+    println!("{:?}", insn_code);
     let (raw_witin, lkm) =
         BltuInstruction::assign_instances(&config, circuit_builder.cs.num_witin as usize, vec![
             StepRecord::new_b_instruction(
@@ -153,7 +153,7 @@ fn impl_bgeu_circuit(taken: bool, a: u32, b: u32) -> Result<(), ZKVMError> {
         MOCK_PC_START + PC_STEP_SIZE
     };
 
-    let insn_code = encode_rv32(InsnKind::BGEU, 2, 3, 0, imm_b(-8));
+    let insn_code = encode_rv32(InsnKind::BGEU, 2, 3, 0, -8);
     let (raw_witin, lkm) =
         BgeuInstruction::assign_instances(&config, circuit_builder.cs.num_witin as usize, vec![
             StepRecord::new_b_instruction(
@@ -195,7 +195,7 @@ fn impl_blt_circuit(taken: bool, a: i32, b: i32) -> Result<(), ZKVMError> {
         MOCK_PC_START + PC_STEP_SIZE
     };
 
-    let insn_code = encode_rv32(InsnKind::BLT, 2, 3, 0, imm_b(-8));
+    let insn_code = encode_rv32(InsnKind::BLT, 2, 3, 0, -8);
     let (raw_witin, lkm) =
         BltInstruction::assign_instances(&config, circuit_builder.cs.num_witin as usize, vec![
             StepRecord::new_b_instruction(
@@ -237,7 +237,7 @@ fn impl_bge_circuit(taken: bool, a: i32, b: i32) -> Result<(), ZKVMError> {
         MOCK_PC_START + PC_STEP_SIZE
     };
 
-    let insn_code = encode_rv32(InsnKind::BGE, 2, 3, 0, imm_b(-8));
+    let insn_code = encode_rv32(InsnKind::BGE, 2, 3, 0, -8);
     let (raw_witin, lkm) =
         BgeInstruction::assign_instances(&config, circuit_builder.cs.num_witin as usize, vec![
             StepRecord::new_b_instruction(
diff --git a/ceno_zkvm/src/instructions/riscv/dummy/dummy_circuit.rs b/ceno_zkvm/src/instructions/riscv/dummy/dummy_circuit.rs
index 1d98d1271..b73c932a1 100644
--- a/ceno_zkvm/src/instructions/riscv/dummy/dummy_circuit.rs
+++ b/ceno_zkvm/src/instructions/riscv/dummy/dummy_circuit.rs
@@ -33,13 +33,15 @@ impl<E: ExtensionField, I: RIVInstruction> Instruction<E> for DummyInstruction<E
     fn construct_circuit(
         circuit_builder: &mut CircuitBuilder<E>,
     ) -> Result<Self::InstructionConfig, ZKVMError> {
-        let codes = I::INST_KIND.codes();
+        let kind = I::INST_KIND;
+        let format = InsnFormat::from(kind);
+        let category = InsnCategory::from(kind);
 
         // ECALL can do everything.
-        let is_ecall = matches!(codes.kind, InsnKind::EANY);
+        let is_ecall = matches!(kind, InsnKind::ECALL);
 
         // Regular instructions do what is implied by their format.
-        let (with_rs1, with_rs2, with_rd) = match codes.format {
+        let (with_rs1, with_rs2, with_rd) = match format {
             _ if is_ecall => (true, true, true),
             InsnFormat::R => (true, true, true),
             InsnFormat::I => (true, false, true),
@@ -48,10 +50,10 @@ impl<E: ExtensionField, I: RIVInstruction> Instruction<E> for DummyInstruction<E
             InsnFormat::U => (false, false, true),
             InsnFormat::J => (false, false, true),
         };
-        let with_mem_write = matches!(codes.category, InsnCategory::Store) || is_ecall;
-        let with_mem_read = matches!(codes.category, InsnCategory::Load);
-        let branching = matches!(codes.category, InsnCategory::Branch)
-            || matches!(codes.kind, InsnKind::JAL | InsnKind::JALR)
+        let with_mem_write = matches!(category, InsnCategory::Store) || is_ecall;
+        let with_mem_read = matches!(category, InsnCategory::Load);
+        let branching = matches!(category, InsnCategory::Branch)
+            || matches!(kind, InsnKind::JAL | InsnKind::JALR)
             || is_ecall;
 
         DummyConfig::construct_circuit(
@@ -174,7 +176,7 @@ impl<E: ExtensionField> DummyConfig<E> {
         // Fetch instruction
 
         // The register IDs of ECALL is fixed, not encoded.
-        let is_ecall = matches!(kind, InsnKind::EANY);
+        let is_ecall = matches!(kind, InsnKind::ECALL);
         let rs1_id = match &rs1 {
             Some((r, _)) if !is_ecall => r.id.expr(),
             _ => 0.into(),
diff --git a/ceno_zkvm/src/instructions/riscv/dummy/test.rs b/ceno_zkvm/src/instructions/riscv/dummy/test.rs
index df1eb0572..5f3a03bb9 100644
--- a/ceno_zkvm/src/instructions/riscv/dummy/test.rs
+++ b/ceno_zkvm/src/instructions/riscv/dummy/test.rs
@@ -30,7 +30,7 @@ fn test_dummy_ecall() {
         .unwrap();
 
     let step = StepRecord::new_ecall_any(4, MOCK_PC_START);
-    let insn_code = step.insn_code();
+    let insn_code = step.insn();
     let (raw_witin, lkm) =
         EcallDummy::assign_instances(&config, cb.cs.num_witin as usize, vec![step]).unwrap();
 
diff --git a/ceno_zkvm/src/instructions/riscv/ecall.rs b/ceno_zkvm/src/instructions/riscv/ecall.rs
index 0d7a3315a..261772638 100644
--- a/ceno_zkvm/src/instructions/riscv/ecall.rs
+++ b/ceno_zkvm/src/instructions/riscv/ecall.rs
@@ -7,7 +7,7 @@ use super::{RIVInstruction, dummy::DummyInstruction};
 
 pub struct EcallOp;
 impl RIVInstruction for EcallOp {
-    const INST_KIND: InsnKind = InsnKind::EANY;
+    const INST_KIND: InsnKind = InsnKind::ECALL;
 }
 /// Unsafe. A dummy ecall circuit that ignores unimplemented functions.
 pub type EcallDummy<E> = DummyInstruction<E, EcallOp>;
diff --git a/ceno_zkvm/src/instructions/riscv/ecall_insn.rs b/ceno_zkvm/src/instructions/riscv/ecall_insn.rs
index 078223617..8de7e02c8 100644
--- a/ceno_zkvm/src/instructions/riscv/ecall_insn.rs
+++ b/ceno_zkvm/src/instructions/riscv/ecall_insn.rs
@@ -10,7 +10,7 @@ use crate::{
     tables::InsnRecord,
     witness::LkMultiplicity,
 };
-use ceno_emul::{InsnKind::EANY, PC_STEP_SIZE, Platform, StepRecord, Tracer};
+use ceno_emul::{InsnKind::ECALL, PC_STEP_SIZE, Platform, StepRecord, Tracer};
 use ff_ext::ExtensionField;
 
 pub struct EcallInstructionConfig {
@@ -38,7 +38,7 @@ impl EcallInstructionConfig {
 
         cb.lk_fetch(&InsnRecord::new(
             pc.expr(),
-            EANY.into(),
+            ECALL.into(),
             None,
             0.into(),
             0.into(),
diff --git a/ceno_zkvm/src/instructions/riscv/jump.rs b/ceno_zkvm/src/instructions/riscv/jump.rs
index 50708d07a..b57aadbbb 100644
--- a/ceno_zkvm/src/instructions/riscv/jump.rs
+++ b/ceno_zkvm/src/instructions/riscv/jump.rs
@@ -1,12 +1,8 @@
-mod auipc;
 mod jal;
 mod jalr;
-mod lui;
 
-pub use auipc::AuipcInstruction;
 pub use jal::JalInstruction;
 pub use jalr::JalrInstruction;
-pub use lui::LuiInstruction;
 
 #[cfg(test)]
 mod test;
diff --git a/ceno_zkvm/src/instructions/riscv/jump/auipc.rs b/ceno_zkvm/src/instructions/riscv/jump/auipc.rs
deleted file mode 100644
index ed1f07d1f..000000000
--- a/ceno_zkvm/src/instructions/riscv/jump/auipc.rs
+++ /dev/null
@@ -1,94 +0,0 @@
-use std::marker::PhantomData;
-
-use ceno_emul::InsnKind;
-use ff_ext::ExtensionField;
-
-use crate::{
-    Value,
-    circuit_builder::CircuitBuilder,
-    error::ZKVMError,
-    expression::{ToExpr, WitIn},
-    instructions::{
-        Instruction,
-        riscv::{constants::UInt, u_insn::UInstructionConfig},
-    },
-    set_val,
-    tables::InsnRecord,
-    utils::i64_to_base,
-    witness::LkMultiplicity,
-};
-
-pub struct AuipcConfig<E: ExtensionField> {
-    pub u_insn: UInstructionConfig<E>,
-    pub imm: WitIn,
-    pub overflow_bit: WitIn,
-    pub rd_written: UInt<E>,
-}
-
-pub struct AuipcInstruction<E>(PhantomData<E>);
-
-/// AUIPC instruction circuit
-impl<E: ExtensionField> Instruction<E> for AuipcInstruction<E> {
-    type InstructionConfig = AuipcConfig<E>;
-
-    fn name() -> String {
-        format!("{:?}", InsnKind::AUIPC)
-    }
-
-    fn construct_circuit(
-        circuit_builder: &mut CircuitBuilder<E>,
-    ) -> Result<AuipcConfig<E>, ZKVMError> {
-        let imm = circuit_builder.create_witin(|| "imm");
-        let rd_written = UInt::new(|| "rd_written", circuit_builder)?;
-
-        let u_insn = UInstructionConfig::construct_circuit(
-            circuit_builder,
-            InsnKind::AUIPC,
-            &imm.expr(),
-            rd_written.register_expr(),
-        )?;
-
-        let overflow_bit = circuit_builder.create_witin(|| "overflow_bit");
-        circuit_builder.assert_bit(|| "is_bit", overflow_bit.expr())?;
-
-        // assert: imm + pc = rd_written + overflow_bit * 2^32
-        // valid formulation of mod 2^32 arithmetic because:
-        // - imm and pc are constrained to 4 bytes by instruction table lookup
-        // - rd_written is constrained to 4 bytes by UInt checked limbs
-        circuit_builder.require_equal(
-            || "imm+pc = rd_written+2^32*overflow",
-            imm.expr() + u_insn.vm_state.pc.expr(),
-            rd_written.value() + overflow_bit.expr() * (1u64 << 32),
-        )?;
-
-        Ok(AuipcConfig {
-            u_insn,
-            imm,
-            overflow_bit,
-            rd_written,
-        })
-    }
-
-    fn assign_instance(
-        config: &Self::InstructionConfig,
-        instance: &mut [E::BaseField],
-        lk_multiplicity: &mut LkMultiplicity,
-        step: &ceno_emul::StepRecord,
-    ) -> Result<(), ZKVMError> {
-        let pc: u32 = step.pc().before.0;
-        let imm = InsnRecord::imm_internal(&step.insn());
-        let (sum, overflow) = pc.overflowing_add(imm as u32);
-
-        set_val!(instance, config.imm, i64_to_base::<E::BaseField>(imm));
-        set_val!(instance, config.overflow_bit, overflow as u64);
-
-        let sum_limbs = Value::new(sum, lk_multiplicity);
-        config.rd_written.assign_value(instance, sum_limbs);
-
-        config
-            .u_insn
-            .assign_instance(instance, lk_multiplicity, step)?;
-
-        Ok(())
-    }
-}
diff --git a/ceno_zkvm/src/instructions/riscv/jump/lui.rs b/ceno_zkvm/src/instructions/riscv/jump/lui.rs
deleted file mode 100644
index 8ad9d497e..000000000
--- a/ceno_zkvm/src/instructions/riscv/jump/lui.rs
+++ /dev/null
@@ -1,64 +0,0 @@
-use std::marker::PhantomData;
-
-use ceno_emul::InsnKind;
-use ff_ext::ExtensionField;
-
-use crate::{
-    Value,
-    circuit_builder::CircuitBuilder,
-    error::ZKVMError,
-    instructions::{
-        Instruction,
-        riscv::{constants::UInt, u_insn::UInstructionConfig},
-    },
-    witness::LkMultiplicity,
-};
-
-pub struct LuiConfig<E: ExtensionField> {
-    pub u_insn: UInstructionConfig<E>,
-    pub rd_written: UInt<E>,
-}
-
-pub struct LuiInstruction<E>(PhantomData<E>);
-
-/// LUI instruction circuit
-impl<E: ExtensionField> Instruction<E> for LuiInstruction<E> {
-    type InstructionConfig = LuiConfig<E>;
-
-    fn name() -> String {
-        format!("{:?}", InsnKind::LUI)
-    }
-
-    fn construct_circuit(
-        circuit_builder: &mut CircuitBuilder<E>,
-    ) -> Result<LuiConfig<E>, ZKVMError> {
-        let rd_written = UInt::new(|| "rd_limbs", circuit_builder)?;
-
-        // rd_written = imm, so just enforce that U-type immediate from program
-        // table is equal to rd_written value
-        let u_insn = UInstructionConfig::construct_circuit(
-            circuit_builder,
-            InsnKind::LUI,
-            &rd_written.value(), // instruction immediate for program table lookup
-            rd_written.register_expr(),
-        )?;
-
-        Ok(LuiConfig { u_insn, rd_written })
-    }
-
-    fn assign_instance(
-        config: &Self::InstructionConfig,
-        instance: &mut [E::BaseField],
-        lk_multiplicity: &mut LkMultiplicity,
-        step: &ceno_emul::StepRecord,
-    ) -> Result<(), ZKVMError> {
-        config
-            .u_insn
-            .assign_instance(instance, lk_multiplicity, step)?;
-
-        let rd = Value::new(step.rd().unwrap().value.after, lk_multiplicity);
-        config.rd_written.assign_limbs(instance, rd.as_u16_limbs());
-
-        Ok(())
-    }
-}
diff --git a/ceno_zkvm/src/instructions/riscv/jump/test.rs b/ceno_zkvm/src/instructions/riscv/jump/test.rs
index 4453f4c3a..648b17f66 100644
--- a/ceno_zkvm/src/instructions/riscv/jump/test.rs
+++ b/ceno_zkvm/src/instructions/riscv/jump/test.rs
@@ -3,14 +3,11 @@ use goldilocks::GoldilocksExt2;
 
 use crate::{
     circuit_builder::{CircuitBuilder, ConstraintSystem},
-    instructions::{
-        Instruction,
-        riscv::test_utils::{imm_j, imm_u},
-    },
+    instructions::Instruction,
     scheme::mock_prover::{MOCK_PC_START, MockProver},
 };
 
-use super::{AuipcInstruction, JalInstruction, JalrInstruction, LuiInstruction};
+use super::{JalInstruction, JalrInstruction};
 
 #[test]
 fn test_opcode_jal() {
@@ -29,7 +26,7 @@ fn test_opcode_jal() {
 
     let pc_offset: i32 = -8i32;
     let new_pc: ByteAddr = ByteAddr(MOCK_PC_START.0.wrapping_add_signed(pc_offset));
-    let insn_code = encode_rv32(InsnKind::JAL, 0, 0, 4, imm_j(pc_offset));
+    let insn_code = encode_rv32(InsnKind::JAL, 0, 0, 4, pc_offset);
     let (raw_witin, lkm) = JalInstruction::<GoldilocksExt2>::assign_instances(
         &config,
         cb.cs.num_witin as usize,
@@ -64,7 +61,7 @@ fn test_opcode_jalr() {
     let imm = -15i32;
     let rs1_read: Word = 100u32;
     let new_pc: ByteAddr = ByteAddr(rs1_read.wrapping_add_signed(imm) & (!1));
-    let insn_code = encode_rv32(InsnKind::JALR, 2, 0, 4, imm as u32);
+    let insn_code = encode_rv32(InsnKind::JALR, 2, 0, 4, imm);
 
     let (raw_witin, lkm) = JalrInstruction::<GoldilocksExt2>::assign_instances(
         &config,
@@ -79,72 +76,5 @@ fn test_opcode_jalr() {
         )],
     )
     .unwrap();
-
-    MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
-}
-
-#[test]
-fn test_opcode_lui() {
-    let mut cs = ConstraintSystem::<GoldilocksExt2>::new(|| "riscv");
-    let mut cb = CircuitBuilder::new(&mut cs);
-    let config = cb
-        .namespace(
-            || "lui",
-            |cb| {
-                let config = LuiInstruction::<GoldilocksExt2>::construct_circuit(cb);
-                Ok(config)
-            },
-        )
-        .unwrap()
-        .unwrap();
-
-    let imm_value = imm_u(0x90005);
-    let insn_code = encode_rv32(InsnKind::LUI, 0, 0, 4, imm_value);
-    let (raw_witin, lkm) = LuiInstruction::<GoldilocksExt2>::assign_instances(
-        &config,
-        cb.cs.num_witin as usize,
-        vec![StepRecord::new_u_instruction(
-            4,
-            MOCK_PC_START,
-            insn_code,
-            Change::new(0, imm_value),
-            0,
-        )],
-    )
-    .unwrap();
-
-    MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
-}
-
-#[test]
-fn test_opcode_auipc() {
-    let mut cs = ConstraintSystem::<GoldilocksExt2>::new(|| "riscv");
-    let mut cb = CircuitBuilder::new(&mut cs);
-    let config = cb
-        .namespace(
-            || "auipc",
-            |cb| {
-                let config = AuipcInstruction::<GoldilocksExt2>::construct_circuit(cb);
-                Ok(config)
-            },
-        )
-        .unwrap()
-        .unwrap();
-
-    let imm_value = imm_u(0x90005);
-    let insn_code = encode_rv32(InsnKind::AUIPC, 0, 0, 4, imm_value);
-    let (raw_witin, lkm) = AuipcInstruction::<GoldilocksExt2>::assign_instances(
-        &config,
-        cb.cs.num_witin as usize,
-        vec![StepRecord::new_u_instruction(
-            4,
-            MOCK_PC_START,
-            insn_code,
-            Change::new(0, MOCK_PC_START.0.wrapping_add(imm_value)),
-            0,
-        )],
-    )
-    .unwrap();
-
     MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
 }
diff --git a/ceno_zkvm/src/instructions/riscv/logic_imm/logic_imm_circuit.rs b/ceno_zkvm/src/instructions/riscv/logic_imm/logic_imm_circuit.rs
index 9328e27a7..4e9bd5faa 100644
--- a/ceno_zkvm/src/instructions/riscv/logic_imm/logic_imm_circuit.rs
+++ b/ceno_zkvm/src/instructions/riscv/logic_imm/logic_imm_circuit.rs
@@ -123,7 +123,7 @@ impl<E: ExtensionField> LogicConfig<E> {
 
 #[cfg(test)]
 mod test {
-    use ceno_emul::{Change, InsnKind, PC_STEP_SIZE, StepRecord, encode_rv32};
+    use ceno_emul::{Change, InsnKind, PC_STEP_SIZE, StepRecord, encode_rv32u};
     use goldilocks::GoldilocksExt2;
 
     use crate::{
@@ -190,7 +190,7 @@ mod test {
             .unwrap()
             .unwrap();
 
-        let insn_code = encode_rv32(I::INST_KIND, 2, 0, 4, imm);
+        let insn_code = encode_rv32u(I::INST_KIND, 2, 0, 4, imm);
         let (raw_witin, lkm) = LogicInstruction::<GoldilocksExt2, I>::assign_instances(
             &config,
             cb.cs.num_witin as usize,
diff --git a/ceno_zkvm/src/instructions/riscv/memory/test.rs b/ceno_zkvm/src/instructions/riscv/memory/test.rs
index 6243f197b..4785994d3 100644
--- a/ceno_zkvm/src/instructions/riscv/memory/test.rs
+++ b/ceno_zkvm/src/instructions/riscv/memory/test.rs
@@ -74,7 +74,7 @@ fn load(mem_value: Word, insn: InsnKind, shift: u32) -> Word {
     }
 }
 
-fn impl_opcode_store<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruction<E>>(imm: u32) {
+fn impl_opcode_store<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruction<E>>(imm: i32) {
     let mut cs = ConstraintSystem::<E>::new(|| "riscv");
     let mut cb = CircuitBuilder::new(&mut cs);
     let config = cb
@@ -92,7 +92,7 @@ fn impl_opcode_store<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruct
     let prev_mem_value = 0x40302010;
     let rs2_word = Word::from(0x12345678_u32);
     let rs1_word = Word::from(0x4000000_u32);
-    let unaligned_addr = ByteAddr::from(rs1_word.wrapping_add(imm));
+    let unaligned_addr = ByteAddr::from(rs1_word.wrapping_add_signed(imm));
     let new_mem_value = match I::INST_KIND {
         InsnKind::SB => sb(prev_mem_value, rs2_word, unaligned_addr.shift()),
         InsnKind::SH => sh(prev_mem_value, rs2_word, unaligned_addr.shift()),
@@ -122,7 +122,7 @@ fn impl_opcode_store<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruct
     MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
 }
 
-fn impl_opcode_load<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruction<E>>(imm: u32) {
+fn impl_opcode_load<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruction<E>>(imm: i32) {
     let mut cs = ConstraintSystem::<E>::new(|| "riscv");
     let mut cb = CircuitBuilder::new(&mut cs);
     let config = cb
@@ -140,7 +140,7 @@ fn impl_opcode_load<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instructi
     let mem_value = 0x40302010;
     let rs1_word = Word::from(0x4000000_u32);
     let prev_rd_word = Word::from(0x12345678_u32);
-    let unaligned_addr = ByteAddr::from(rs1_word.wrapping_add(imm));
+    let unaligned_addr = ByteAddr::from(rs1_word.wrapping_add_signed(imm));
     let new_rd_word = load(mem_value, I::INST_KIND, unaligned_addr.shift());
     let rd_change = Change {
         before: prev_rd_word,
@@ -166,16 +166,16 @@ fn impl_opcode_load<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instructi
     MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
 }
 
-fn impl_opcode_sb(imm: u32) {
+fn impl_opcode_sb(imm: i32) {
     impl_opcode_store::<GoldilocksExt2, SBOp, SbInstruction<GoldilocksExt2>>(imm)
 }
 
-fn impl_opcode_sh(imm: u32) {
+fn impl_opcode_sh(imm: i32) {
     assert_eq!(imm & 0x01, 0);
     impl_opcode_store::<GoldilocksExt2, SHOp, ShInstruction<GoldilocksExt2>>(imm)
 }
 
-fn impl_opcode_sw(imm: u32) {
+fn impl_opcode_sw(imm: i32) {
     assert_eq!(imm & 0x03, 0);
     impl_opcode_store::<GoldilocksExt2, SWOp, SwInstruction<GoldilocksExt2>>(imm)
 }
@@ -187,9 +187,8 @@ fn test_sb() {
     impl_opcode_sb(10);
     impl_opcode_sb(15);
 
-    let neg_one = u32::MAX;
-    for i in 0..4 {
-        impl_opcode_sb(neg_one - i);
+    for i in -4..0 {
+        impl_opcode_sb(i);
     }
 }
 
@@ -198,9 +197,8 @@ fn test_sh() {
     impl_opcode_sh(0);
     impl_opcode_sh(2);
 
-    let neg_two = u32::MAX - 1;
-    for i in [0, 2] {
-        impl_opcode_sh(neg_two - i)
+    for i in [-4, -2] {
+        impl_opcode_sh(i)
     }
 }
 
@@ -209,8 +207,7 @@ fn test_sw() {
     impl_opcode_sw(0);
     impl_opcode_sw(4);
 
-    let neg_four = u32::MAX - 3;
-    impl_opcode_sw(neg_four);
+    impl_opcode_sw(-4);
 }
 
 #[test]
@@ -220,10 +217,8 @@ fn test_lb() {
     impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(2);
     impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(3);
 
-    let neg_one = u32::MAX;
-    // imm = -1, -2, -3
-    for i in 0..3 {
-        impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(neg_one - i);
+    for i in -3..0 {
+        impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(i);
     }
 }
 
@@ -234,10 +229,8 @@ fn test_lbu() {
     impl_opcode_load::<GoldilocksExt2, LbuOp, LbuInstruction<GoldilocksExt2>>(2);
     impl_opcode_load::<GoldilocksExt2, LbuOp, LbuInstruction<GoldilocksExt2>>(3);
 
-    let neg_one = u32::MAX;
-    // imm = -1, -2, -3
-    for i in 0..3 {
-        impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(neg_one - i);
+    for i in -3..0 {
+        impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(i);
     }
 }
 
@@ -247,10 +240,8 @@ fn test_lh() {
     impl_opcode_load::<GoldilocksExt2, LhOp, LhInstruction<GoldilocksExt2>>(2);
     impl_opcode_load::<GoldilocksExt2, LhOp, LhInstruction<GoldilocksExt2>>(4);
 
-    let neg_two = u32::MAX - 1;
-    // imm = -2, -4
-    for i in [0, 2] {
-        impl_opcode_load::<GoldilocksExt2, LhOp, LhInstruction<GoldilocksExt2>>(neg_two - i);
+    for i in [-4, -2] {
+        impl_opcode_load::<GoldilocksExt2, LhOp, LhInstruction<GoldilocksExt2>>(i);
     }
 }
 
@@ -260,10 +251,8 @@ fn test_lhu() {
     impl_opcode_load::<GoldilocksExt2, LhuOp, LhuInstruction<GoldilocksExt2>>(2);
     impl_opcode_load::<GoldilocksExt2, LhuOp, LhuInstruction<GoldilocksExt2>>(4);
 
-    let neg_two = u32::MAX - 1;
-    // imm = -2, -4
-    for i in [0, 2] {
-        impl_opcode_load::<GoldilocksExt2, LhuOp, LhuInstruction<GoldilocksExt2>>(neg_two - i);
+    for i in [-4, -2] {
+        impl_opcode_load::<GoldilocksExt2, LhuOp, LhuInstruction<GoldilocksExt2>>(i);
     }
 }
 
@@ -271,5 +260,5 @@ fn test_lhu() {
 fn test_lw() {
     impl_opcode_load::<GoldilocksExt2, LwOp, LwInstruction<GoldilocksExt2>>(0);
     impl_opcode_load::<GoldilocksExt2, LwOp, LwInstruction<GoldilocksExt2>>(4);
-    impl_opcode_load::<GoldilocksExt2, LwOp, LwInstruction<GoldilocksExt2>>(u32::MAX - 3); // imm = -4
+    impl_opcode_load::<GoldilocksExt2, LwOp, LwInstruction<GoldilocksExt2>>(-4);
 }
diff --git a/ceno_zkvm/src/instructions/riscv/rv32im.rs b/ceno_zkvm/src/instructions/riscv/rv32im.rs
index d404cd073..a00d2c7e4 100644
--- a/ceno_zkvm/src/instructions/riscv/rv32im.rs
+++ b/ceno_zkvm/src/instructions/riscv/rv32im.rs
@@ -39,10 +39,7 @@ use std::collections::{BTreeMap, BTreeSet};
 use strum::IntoEnumIterator;
 
 use super::{
-    arith::AddInstruction,
-    branch::BltuInstruction,
-    ecall::HaltInstruction,
-    jump::{JalInstruction, LuiInstruction},
+    arith::AddInstruction, branch::BltuInstruction, ecall::HaltInstruction, jump::JalInstruction,
     memory::LwInstruction,
 };
 
@@ -88,8 +85,6 @@ pub struct Rv32imConfig<E: ExtensionField> {
     // Jump Opcodes
     pub jal_config: <JalInstruction<E> as Instruction<E>>::InstructionConfig,
     pub jalr_config: <JalrInstruction<E> as Instruction<E>>::InstructionConfig,
-    pub auipc_config: <AuipcInstruction<E> as Instruction<E>>::InstructionConfig,
-    pub lui_config: <LuiInstruction<E> as Instruction<E>>::InstructionConfig,
 
     // Memory Opcodes
     pub lw_config: <LwInstruction<E> as Instruction<E>>::InstructionConfig,
@@ -155,10 +150,8 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         let bgeu_config = cs.register_opcode_circuit::<BgeuInstruction<E>>();
 
         // jump opcodes
-        let lui_config = cs.register_opcode_circuit::<LuiInstruction<E>>();
         let jal_config = cs.register_opcode_circuit::<JalInstruction<E>>();
         let jalr_config = cs.register_opcode_circuit::<JalrInstruction<E>>();
-        let auipc_config = cs.register_opcode_circuit::<AuipcInstruction<E>>();
 
         // memory opcodes
         let lw_config = cs.register_opcode_circuit::<LwInstruction<E>>();
@@ -218,10 +211,8 @@ impl<E: ExtensionField> Rv32imConfig<E> {
             bge_config,
             bgeu_config,
             // jump opcodes
-            lui_config,
             jal_config,
             jalr_config,
-            auipc_config,
             // memory opcodes
             sw_config,
             sh_config,
@@ -287,8 +278,6 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         // jump
         fixed.register_opcode_circuit::<JalInstruction<E>>(cs);
         fixed.register_opcode_circuit::<JalrInstruction<E>>(cs);
-        fixed.register_opcode_circuit::<AuipcInstruction<E>>(cs);
-        fixed.register_opcode_circuit::<LuiInstruction<E>>(cs);
         // memory
         fixed.register_opcode_circuit::<SwInstruction<E>>(cs);
         fixed.register_opcode_circuit::<ShInstruction<E>>(cs);
@@ -318,20 +307,19 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         witness: &mut ZKVMWitnesses<E>,
         steps: Vec<StepRecord>,
     ) -> Result<GroupedSteps, ZKVMError> {
-        let mut all_records: BTreeMap<usize, Vec<StepRecord>> = InsnKind::iter()
-            .map(|insn_kind| ((insn_kind as usize), Vec::new()))
+        let mut all_records: BTreeMap<InsnKind, Vec<StepRecord>> = InsnKind::iter()
+            .map(|insn_kind| (insn_kind, Vec::new()))
             .collect();
         let mut halt_records = Vec::new();
         steps.into_iter().for_each(|record| {
-            let insn_kind = record.insn().codes().kind;
+            let insn_kind = record.insn.kind;
             match insn_kind {
                 // ecall / halt
-                EANY if record.rs1().unwrap().value == Platform::ecall_halt() => {
+                InsnKind::ECALL if record.rs1().unwrap().value == Platform::ecall_halt() => {
                     halt_records.push(record);
                 }
                 // other type of ecalls are handled by dummy ecall instruction
                 _ => {
-                    let insn_kind = insn_kind as usize;
                     // it's safe to unwrap as all_records are initialized with Vec::new()
                     all_records.get_mut(&insn_kind).unwrap().push(record);
                 }
@@ -353,7 +341,7 @@ impl<E: ExtensionField> Rv32imConfig<E> {
                 witness.assign_opcode_circuit::<$instruction>(
                     cs,
                     &self.$config,
-                    all_records.remove(&($insn_kind as usize)).unwrap(),
+                    all_records.remove(&($insn_kind)).unwrap(),
                 )?;
             };
         }
@@ -393,8 +381,6 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         // jump
         assign_opcode!(JAL, JalInstruction<E>, jal_config);
         assign_opcode!(JALR, JalrInstruction<E>, jalr_config);
-        assign_opcode!(AUIPC, AuipcInstruction<E>, auipc_config);
-        assign_opcode!(LUI, LuiInstruction<E>, lui_config);
         // memory
         assign_opcode!(LW, LwInstruction<E>, lw_config);
         assign_opcode!(LB, LbInstruction<E>, lb_config);
@@ -411,9 +397,8 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         assert_eq!(
             all_records.keys().cloned().collect::<BTreeSet<_>>(),
             // these are opcodes that haven't been implemented
-            [INVALID, DIV, REM, REMU, EANY]
+            [INVALID, DIV, REM, REMU, ECALL]
                 .into_iter()
-                .map(|insn_kind| insn_kind as usize)
                 .collect::<BTreeSet<_>>(),
         );
         Ok(GroupedSteps(all_records))
@@ -439,7 +424,7 @@ impl<E: ExtensionField> Rv32imConfig<E> {
 }
 
 /// Opaque type to pass unimplemented instructions from Rv32imConfig to DummyExtraConfig.
-pub struct GroupedSteps(BTreeMap<usize, Vec<StepRecord>>);
+pub struct GroupedSteps(BTreeMap<InsnKind, Vec<StepRecord>>);
 
 /// Fake version of what is missing in Rv32imConfig, for some tests.
 pub struct DummyExtraConfig<E: ExtensionField> {
@@ -487,7 +472,7 @@ impl<E: ExtensionField> DummyExtraConfig<E> {
                 witness.assign_opcode_circuit::<$instruction>(
                     cs,
                     &self.$config,
-                    steps.remove(&($insn_kind as usize)).unwrap(),
+                    steps.remove(&($insn_kind)).unwrap(),
                 )?;
             };
         }
@@ -495,10 +480,11 @@ impl<E: ExtensionField> DummyExtraConfig<E> {
         assign_opcode!(DIV, DivDummy<E>, div_config);
         assign_opcode!(REM, RemDummy<E>, rem_config);
         assign_opcode!(REMU, RemuDummy<E>, remu_config);
-        assign_opcode!(EANY, EcallDummy<E>, ecall_config);
+        assign_opcode!(ECALL, EcallDummy<E>, ecall_config);
 
-        let _ = steps.remove(&(INVALID as usize));
-        assert!(steps.is_empty());
+        let _ = steps.remove(&INVALID);
+        let keys: Vec<&InsnKind> = steps.keys().collect::<Vec<_>>();
+        assert!(steps.is_empty(), "unimplemented opcodes: {:?}", keys);
         Ok(())
     }
 }
diff --git a/ceno_zkvm/src/instructions/riscv/shift_imm.rs b/ceno_zkvm/src/instructions/riscv/shift_imm.rs
index 50ebe4ff0..b16dd5bf2 100644
--- a/ceno_zkvm/src/instructions/riscv/shift_imm.rs
+++ b/ceno_zkvm/src/instructions/riscv/shift_imm.rs
@@ -188,7 +188,7 @@ impl<E: ExtensionField, I: RIVInstruction> Instruction<E> for ShiftImmInstructio
 
 #[cfg(test)]
 mod test {
-    use ceno_emul::{Change, InsnKind, PC_STEP_SIZE, StepRecord, encode_rv32};
+    use ceno_emul::{Change, InsnKind, PC_STEP_SIZE, StepRecord, encode_rv32u};
     use goldilocks::GoldilocksExt2;
 
     use super::{ShiftImmInstruction, SlliOp, SraiOp, SrliOp};
@@ -255,17 +255,17 @@ mod test {
         let (prefix, insn_code, rd_written) = match I::INST_KIND {
             InsnKind::SLLI => (
                 "SLLI",
-                encode_rv32(InsnKind::SLLI, 2, 0, 4, imm),
+                encode_rv32u(InsnKind::SLLI, 2, 0, 4, imm),
                 rs1_read << imm,
             ),
             InsnKind::SRAI => (
                 "SRAI",
-                encode_rv32(InsnKind::SRAI, 2, 0, 4, imm),
+                encode_rv32u(InsnKind::SRAI, 2, 0, 4, imm),
                 (rs1_read as i32 >> imm as i32) as u32,
             ),
             InsnKind::SRLI => (
                 "SRLI",
-                encode_rv32(InsnKind::SRLI, 2, 0, 4, imm),
+                encode_rv32u(InsnKind::SRLI, 2, 0, 4, imm),
                 rs1_read >> imm,
             ),
             _ => unreachable!(),
diff --git a/ceno_zkvm/src/instructions/riscv/slt.rs b/ceno_zkvm/src/instructions/riscv/slt.rs
index 08ceff8b3..a3e477350 100644
--- a/ceno_zkvm/src/instructions/riscv/slt.rs
+++ b/ceno_zkvm/src/instructions/riscv/slt.rs
@@ -83,7 +83,7 @@ impl<E: ExtensionField, I: RIVInstruction> Instruction<E> for SetLessThanInstruc
 
         let r_insn = RInstructionConfig::<E>::construct_circuit(
             cb,
-            InsnKind::SLT,
+            I::INST_KIND,
             rs1_read.register_expr(),
             rs2_read.register_expr(),
             rd_written.register_expr(),
@@ -159,7 +159,7 @@ mod test {
             .unwrap()
             .unwrap();
 
-        let insn_code = encode_rv32(InsnKind::SLT, 2, 3, 4, 0);
+        let insn_code = encode_rv32(I::INST_KIND, 2, 3, 4, 0);
         let (raw_witin, lkm) = SetLessThanInstruction::<_, I>::assign_instances(
             &config,
             cb.cs.num_witin as usize,
diff --git a/ceno_zkvm/src/instructions/riscv/slti.rs b/ceno_zkvm/src/instructions/riscv/slti.rs
index a0766b425..cbab2fe16 100644
--- a/ceno_zkvm/src/instructions/riscv/slti.rs
+++ b/ceno_zkvm/src/instructions/riscv/slti.rs
@@ -228,7 +228,7 @@ mod test {
         let mut cs = ConstraintSystem::<GoldilocksExt2>::new(|| "riscv");
         let mut cb = CircuitBuilder::new(&mut cs);
 
-        let insn_code = encode_rv32(I::INST_KIND, 2, 0, 4, imm as u32);
+        let insn_code = encode_rv32(I::INST_KIND, 2, 0, 4, imm);
 
         let config = cb
             .namespace(
diff --git a/ceno_zkvm/src/instructions/riscv/test_utils.rs b/ceno_zkvm/src/instructions/riscv/test_utils.rs
deleted file mode 100644
index 416ce628c..000000000
--- a/ceno_zkvm/src/instructions/riscv/test_utils.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-pub fn imm_b(imm: i32) -> u32 {
-    // imm is 13 bits in B-type
-    imm_with_max_valid_bits(imm, 13)
-}
-
-pub fn imm_i(imm: i32) -> u32 {
-    // imm is 12 bits in I-type
-    imm_with_max_valid_bits(imm, 12)
-}
-
-pub fn imm_j(imm: i32) -> u32 {
-    // imm is 21 bits in J-type
-    imm_with_max_valid_bits(imm, 21)
-}
-
-fn imm_with_max_valid_bits(imm: i32, bits: u32) -> u32 {
-    imm as u32 & !(u32::MAX << bits)
-}
-
-pub fn imm_u(imm: u32) -> u32 {
-    // valid imm is imm[12:31] in U-type
-    imm << 12
-}
diff --git a/ceno_zkvm/src/instructions/riscv/u_insn.rs b/ceno_zkvm/src/instructions/riscv/u_insn.rs
deleted file mode 100644
index 719ee2df1..000000000
--- a/ceno_zkvm/src/instructions/riscv/u_insn.rs
+++ /dev/null
@@ -1,65 +0,0 @@
-use ceno_emul::{InsnKind, StepRecord};
-use ff_ext::ExtensionField;
-
-use crate::{
-    chip_handler::RegisterExpr,
-    circuit_builder::CircuitBuilder,
-    error::ZKVMError,
-    expression::{Expression, ToExpr},
-    instructions::riscv::insn_base::{StateInOut, WriteRD},
-    tables::InsnRecord,
-    witness::LkMultiplicity,
-};
-
-/// This config handles the common part of the U-type instruction:
-/// - PC, cycle, fetch
-/// - Register access
-///
-/// It does not witness the output rd value or instruction immediate
-#[derive(Debug)]
-pub struct UInstructionConfig<E: ExtensionField> {
-    pub vm_state: StateInOut<E>,
-    pub rd: WriteRD<E>,
-}
-
-impl<E: ExtensionField> UInstructionConfig<E> {
-    pub fn construct_circuit(
-        circuit_builder: &mut CircuitBuilder<E>,
-        insn_kind: InsnKind,
-        imm: &Expression<E>,
-        rd_written: RegisterExpr<E>,
-    ) -> Result<Self, ZKVMError> {
-        // State in and out
-        let vm_state = StateInOut::construct_circuit(circuit_builder, false)?;
-
-        // Registers
-        let rd = WriteRD::construct_circuit(circuit_builder, rd_written, vm_state.ts)?;
-
-        // Fetch instruction
-        circuit_builder.lk_fetch(&InsnRecord::new(
-            vm_state.pc.expr(),
-            insn_kind.into(),
-            Some(rd.id.expr()),
-            0.into(),
-            0.into(),
-            imm.clone(),
-        ))?;
-
-        Ok(UInstructionConfig { vm_state, rd })
-    }
-
-    pub fn assign_instance(
-        &self,
-        instance: &mut [<E as ExtensionField>::BaseField],
-        lk_multiplicity: &mut LkMultiplicity,
-        step: &StepRecord,
-    ) -> Result<(), ZKVMError> {
-        self.vm_state.assign_instance(instance, step)?;
-        self.rd.assign_instance(instance, lk_multiplicity, step)?;
-
-        // Fetch the instruction.
-        lk_multiplicity.fetch(step.pc().before.0);
-
-        Ok(())
-    }
-}
diff --git a/ceno_zkvm/src/scheme/mock_prover.rs b/ceno_zkvm/src/scheme/mock_prover.rs
index 21b6d4c05..d2ce320ee 100644
--- a/ceno_zkvm/src/scheme/mock_prover.rs
+++ b/ceno_zkvm/src/scheme/mock_prover.rs
@@ -17,7 +17,7 @@ use crate::{
 };
 use ark_std::test_rng;
 use base64::{Engine, engine::general_purpose::STANDARD_NO_PAD};
-use ceno_emul::{ByteAddr, CENO_PLATFORM, PC_WORD_SIZE, Program};
+use ceno_emul::{ByteAddr, CENO_PLATFORM, Program};
 use ff::Field;
 use ff_ext::ExtensionField;
 use generic_static::StaticTypeMap;
@@ -26,7 +26,7 @@ use itertools::{Itertools, enumerate, izip};
 use multilinear_extensions::{mle::IntoMLEs, virtual_poly_v2::ArcMultilinearExtension};
 use rand::thread_rng;
 use std::{
-    collections::{BTreeMap, HashMap, HashSet},
+    collections::{HashMap, HashSet},
     fs::File,
     hash::Hash,
     io::{BufReader, ErrorKind},
@@ -400,7 +400,7 @@ impl<'a, E: ExtensionField + Hash> MockProver<E> {
     pub fn run(
         cb: &CircuitBuilder<E>,
         wits_in: &[ArcMultilinearExtension<'a, E>],
-        programs: &[u32],
+        programs: &[ceno_emul::Instruction],
         lkm: Option<LkMultiplicity>,
     ) -> Result<(), Vec<MockProverError<E>>> {
         Self::run_maybe_challenge(cb, wits_in, programs, &[], None, lkm)
@@ -409,33 +409,16 @@ impl<'a, E: ExtensionField + Hash> MockProver<E> {
     fn run_maybe_challenge(
         cb: &CircuitBuilder<E>,
         wits_in: &[ArcMultilinearExtension<'a, E>],
-        input_programs: &[u32],
+        input_programs: &[ceno_emul::Instruction],
         pi: &[ArcMultilinearExtension<'a, E>],
         challenge: Option<[E; 2]>,
         lkm: Option<LkMultiplicity>,
     ) -> Result<(), Vec<MockProverError<E>>> {
-        // fix the program table
-        let instructions = input_programs
-            .iter()
-            .cloned()
-            .chain(std::iter::repeat(0))
-            .take(MOCK_PROGRAM_SIZE)
-            .collect_vec();
-        let image = instructions
-            .iter()
-            .enumerate()
-            .map(|(insn_idx, &insn)| {
-                (
-                    CENO_PLATFORM.pc_base() + (insn_idx * PC_WORD_SIZE) as u32,
-                    insn,
-                )
-            })
-            .collect::<BTreeMap<u32, u32>>();
         let program = Program::new(
             CENO_PLATFORM.pc_base(),
             CENO_PLATFORM.pc_base(),
-            instructions,
-            image,
+            input_programs.to_vec(),
+            Default::default(),
         );
 
         // load tables
@@ -667,7 +650,7 @@ impl<'a, E: ExtensionField + Hash> MockProver<E> {
     pub fn assert_with_expected_errors(
         cb: &CircuitBuilder<E>,
         wits_in: &[ArcMultilinearExtension<'a, E>],
-        programs: &[u32],
+        programs: &[ceno_emul::Instruction],
         constraint_names: &[&str],
         challenge: Option<[E; 2]>,
         lkm: Option<LkMultiplicity>,
@@ -719,7 +702,7 @@ Hints:
     pub fn assert_satisfied_raw(
         cb: &CircuitBuilder<E>,
         raw_witin: RowMajorMatrix<E::BaseField>,
-        programs: &[u32],
+        programs: &[ceno_emul::Instruction],
         challenge: Option<[E; 2]>,
         lkm: Option<LkMultiplicity>,
     ) {
@@ -734,7 +717,7 @@ Hints:
     pub fn assert_satisfied(
         cb: &CircuitBuilder<E>,
         wits_in: &[ArcMultilinearExtension<'a, E>],
-        programs: &[u32],
+        programs: &[ceno_emul::Instruction],
         challenge: Option<[E; 2]>,
         lkm: Option<LkMultiplicity>,
     ) {
diff --git a/ceno_zkvm/src/scheme/prover.rs b/ceno_zkvm/src/scheme/prover.rs
index b0ff7b656..2c8cae8bc 100644
--- a/ceno_zkvm/src/scheme/prover.rs
+++ b/ceno_zkvm/src/scheme/prover.rs
@@ -216,7 +216,7 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> ZKVMProver<E, PCS> {
         pp: &PCS::ProverParam,
         circuit_pk: &ProvingKey<E, PCS>,
         witnesses: Vec<ArcMultilinearExtension<'_, E>>,
-        wits_commit: PCS::CommitmentWithData,
+        wits_commit: PCS::CommitmentWithWitness,
         pi: &[ArcMultilinearExtension<'_, E>],
         num_instances: usize,
         transcript: &mut impl Transcript<E>,
@@ -661,7 +661,7 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> ZKVMProver<E, PCS> {
         pp: &PCS::ProverParam,
         circuit_pk: &ProvingKey<E, PCS>,
         witnesses: Vec<ArcMultilinearExtension<'_, E>>,
-        wits_commit: PCS::CommitmentWithData,
+        wits_commit: PCS::CommitmentWithWitness,
         pi: &[ArcMultilinearExtension<'_, E>],
         transcript: &mut impl Transcript<E>,
         challenges: &[E; 2],
diff --git a/ceno_zkvm/src/scheme/tests.rs b/ceno_zkvm/src/scheme/tests.rs
index 945e49730..8a97ff437 100644
--- a/ceno_zkvm/src/scheme/tests.rs
+++ b/ceno_zkvm/src/scheme/tests.rs
@@ -3,8 +3,8 @@ use std::marker::PhantomData;
 use ark_std::test_rng;
 use ceno_emul::{
     CENO_PLATFORM,
-    InsnKind::{ADD, EANY},
-    PC_WORD_SIZE, Platform, Program, StepRecord, VMState,
+    InsnKind::{ADD, ECALL},
+    Platform, Program, StepRecord, VMState, encode_rv32,
 };
 use ff::Field;
 use ff_ext::ExtensionField;
@@ -18,7 +18,6 @@ use transcript::{BasicTranscript, Transcript};
 
 use crate::{
     circuit_builder::CircuitBuilder,
-    declare_program,
     error::ZKVMError,
     expression::{ToExpr, WitIn},
     instructions::{
@@ -187,23 +186,12 @@ fn test_rw_lk_expression_combination() {
     test_rw_lk_expression_combination_inner::<17, 61>();
 }
 
-const PROGRAM_SIZE: usize = 4;
-#[allow(clippy::unusual_byte_groupings)]
-const ECALL_HALT: u32 = 0b_000000000000_00000_000_00000_1110011;
-#[allow(clippy::unusual_byte_groupings)]
-const PROGRAM_CODE: [u32; PROGRAM_SIZE] = {
-    let mut program: [u32; PROGRAM_SIZE] = [ECALL_HALT; PROGRAM_SIZE];
-
-    declare_program!(
-        program,
-        // func7   rs2   rs1   f3  rd    opcode
-        0b_0000000_00100_00001_000_00100_0110011, // add x4, x4, x1 <=> addi x4, x4, 1
-        ECALL_HALT,                               // ecall halt
-        ECALL_HALT,                               // ecall halt
-        ECALL_HALT,                               // ecall halt
-    );
-    program
-};
+const PROGRAM_CODE: [ceno_emul::Instruction; 4] = [
+    encode_rv32(ADD, 4, 1, 4, 0),
+    encode_rv32(ECALL, 0, 0, 0, 0),
+    encode_rv32(ECALL, 0, 0, 0, 0),
+    encode_rv32(ECALL, 0, 0, 0, 0),
+];
 
 #[ignore = "this case is already tested in riscv_example as ecall_halt has only one instance"]
 #[test]
@@ -216,16 +204,7 @@ fn test_single_add_instance_e2e() {
         CENO_PLATFORM.pc_base(),
         CENO_PLATFORM.pc_base(),
         PROGRAM_CODE.to_vec(),
-        PROGRAM_CODE
-            .iter()
-            .enumerate()
-            .map(|(insn_idx, &insn)| {
-                (
-                    (insn_idx * PC_WORD_SIZE) as u32 + CENO_PLATFORM.pc_base(),
-                    insn,
-                )
-            })
-            .collect(),
+        Default::default(),
     );
 
     let pcs_param = Pcs::setup(1 << MAX_NUM_VARIABLES).expect("Basefold PCS setup");
@@ -271,10 +250,10 @@ fn test_single_add_instance_e2e() {
     let mut add_records = vec![];
     let mut halt_records = vec![];
     all_records.into_iter().for_each(|record| {
-        let kind = record.insn().codes().kind;
+        let kind = record.insn().kind;
         match kind {
             ADD => add_records.push(record),
-            EANY => {
+            ECALL => {
                 if record.rs1().unwrap().value == Platform::ecall_halt() {
                     halt_records.push(record);
                 }
diff --git a/ceno_zkvm/src/structs.rs b/ceno_zkvm/src/structs.rs
index af41851f9..b006b10e1 100644
--- a/ceno_zkvm/src/structs.rs
+++ b/ceno_zkvm/src/structs.rs
@@ -103,7 +103,7 @@ impl<F: Clone> PointAndEval<F> {
 #[derive(Clone, Debug)]
 pub struct ProvingKey<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> {
     pub fixed_traces: Option<Vec<DenseMultilinearExtension<E>>>,
-    pub fixed_commit_wd: Option<PCS::CommitmentWithData>,
+    pub fixed_commit_wd: Option<PCS::CommitmentWithWitness>,
     pub vk: VerifyingKey<E, PCS>,
 }
 
diff --git a/ceno_zkvm/src/tables/program.rs b/ceno_zkvm/src/tables/program.rs
index a5ae2ad7f..5695ee247 100644
--- a/ceno_zkvm/src/tables/program.rs
+++ b/ceno_zkvm/src/tables/program.rs
@@ -13,27 +13,13 @@ use crate::{
     witness::RowMajorMatrix,
 };
 use ceno_emul::{
-    DecodedInstruction, InsnCodes, InsnFormat::*, InsnKind::*, PC_STEP_SIZE, Program, WORD_SIZE,
+    InsnFormat, InsnFormat::*, InsnKind::*, Instruction, PC_STEP_SIZE, Program, WORD_SIZE,
 };
 use ff_ext::ExtensionField;
 use goldilocks::SmallField;
 use itertools::Itertools;
 use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
 
-#[macro_export]
-macro_rules! declare_program {
-    ($program:ident, $($instr:expr),* $(,)?) => {
-
-        {
-            let mut _i = 0;
-            $(
-                $program[_i] = $instr;
-                _i += 1;
-            )*
-        }
-    };
-}
-
 /// This structure establishes the order of the fields in instruction records, common to the program table and circuit fetches.
 #[derive(Clone, Debug)]
 pub struct InsnRecord<T>([T; 6]);
@@ -43,7 +29,7 @@ impl<T> InsnRecord<T> {
     where
         T: From<u32>,
     {
-        let rd = rd.unwrap_or_else(|| T::from(DecodedInstruction::RD_NULL));
+        let rd = rd.unwrap_or_else(|| T::from(Instruction::RD_NULL));
         InsnRecord([pc, kind, rd, rs1, rs2, imm_internal])
     }
 
@@ -53,10 +39,10 @@ impl<T> InsnRecord<T> {
 }
 
 impl<F: SmallField> InsnRecord<F> {
-    fn from_decoded(pc: u32, insn: &DecodedInstruction) -> Self {
+    fn from_decoded(pc: u32, insn: &Instruction) -> Self {
         InsnRecord([
             (pc as u64).into(),
-            (insn.codes().kind as u64).into(),
+            (insn.kind as u64).into(),
             (insn.rd_internal() as u64).into(),
             (insn.rs1_or_zero() as u64).into(),
             (insn.rs2_or_zero() as u64).into(),
@@ -73,25 +59,17 @@ impl InsnRecord<()> {
     /// - `as u32` and `as i32` as usual.
     /// - `i64_to_base(imm)` gives the field element going into the program table.
     /// - `as u64` in unsigned cases.
-    pub fn imm_internal(insn: &DecodedInstruction) -> i64 {
-        let imm: u32 = insn.immediate();
-        match insn.codes() {
+    pub fn imm_internal(insn: &Instruction) -> i64 {
+        match (insn.kind, InsnFormat::from(insn.kind)) {
             // Prepare the immediate for ShiftImmInstruction.
             // The shift is implemented as a multiplication/division by 1 << immediate.
-            InsnCodes {
-                kind: SLLI | SRLI | SRAI,
-                ..
-            } => 1 << (imm & 0x1F),
+            (SLLI | SRLI | SRAI, _) => 1 << insn.imm,
             // Unsigned view.
             // For example, u32::MAX is `u32::MAX mod p` in the finite field.
-            InsnCodes { format: R | U, .. }
-            | InsnCodes {
-                kind: ADDI | SLTIU | ANDI | XORI | ORI,
-                ..
-            } => imm as u64 as i64,
+            (_, R | U) | (ADDI | SLTIU | ANDI | XORI | ORI, _) => insn.imm as u32 as i64,
             // Signed view.
             // For example, u32::MAX is `-1 mod p` in the finite field.
-            _ => imm as i32 as i64,
+            _ => insn.imm as i64,
         }
     }
 }
@@ -171,7 +149,7 @@ impl<E: ExtensionField> TableCircuit<E> for ProgramTableCircuit<E> {
             .zip((0..num_instructions).into_par_iter())
             .for_each(|(row, i)| {
                 let pc = pc_base + (i * PC_STEP_SIZE) as u32;
-                let insn = DecodedInstruction::new(program.instructions[i]);
+                let insn = program.instructions[i];
                 let values: InsnRecord<_> = InsnRecord::from_decoded(pc, &insn);
 
                 // Copy all the fields.
@@ -222,29 +200,6 @@ mod tests {
     use ff::Field;
     use goldilocks::{Goldilocks as F, GoldilocksExt2 as E};
 
-    #[test]
-    #[allow(clippy::identity_op)]
-    fn test_decode_imm() {
-        for (i, expected) in [
-            // Example of I-type: ADDI.
-            // imm    | rs1     | funct3      | rd     | opcode
-            (89 << 20 | 1 << 15 | 0b000 << 12 | 1 << 7 | 0x13, 89),
-            // Shifts get a precomputed power of 2: SLLI, SRLI, SRAI.
-            (31 << 20 | 1 << 15 | 0b001 << 12 | 1 << 7 | 0x13, 1 << 31),
-            (31 << 20 | 1 << 15 | 0b101 << 12 | 1 << 7 | 0x13, 1 << 31),
-            (
-                1 << 30 | 31 << 20 | 1 << 15 | 0b101 << 12 | 1 << 7 | 0x13,
-                1 << 31,
-            ),
-            // Example of R-type with funct7: SUB.
-            // funct7     | rs2    | rs1     | funct3      | rd     | opcode
-            (0x20 << 25 | 1 << 20 | 1 << 15 | 0 << 12 | 1 << 7 | 0x33, 0),
-        ] {
-            let imm = InsnRecord::imm_internal(&DecodedInstruction::new(i));
-            assert_eq!(imm, expected);
-        }
-    }
-
     #[test]
     fn test_program_padding() {
         let mut cs = ConstraintSystem::<E>::new(|| "riscv");
diff --git a/clippy.toml b/clippy.toml
index 6e64e6f38..87fda2227 100644
--- a/clippy.toml
+++ b/clippy.toml
@@ -2,9 +2,10 @@
 # Eg removing syn is blocked by ark-ff-asm cutting a new release
 # (https://github.com/arkworks-rs/algebra/issues/813) amongst other things.
 allowed-duplicate-crates = [
-  "syn",
-  "windows-sys",
+  "hashbrown",
+  "itertools",
   "regex-automata",
   "regex-syntax",
-  "itertools",
+  "syn",
+  "windows-sys",
 ]
diff --git a/examples-builder/build.rs b/examples-builder/build.rs
index 7ea9b9628..0e8d90441 100644
--- a/examples-builder/build.rs
+++ b/examples-builder/build.rs
@@ -26,7 +26,7 @@ fn build_elfs() {
     // See git history for an attempt to do this.
     let output = Command::new("cargo")
         .args(["build", "--release", "--examples"])
-        .current_dir("../examples")
+        .current_dir("../guest/examples")
         .env_clear()
         .envs(std::env::vars().filter(|x| !x.0.starts_with("CARGO_")))
         .output()
@@ -41,11 +41,11 @@ fn build_elfs() {
             dest,
             r#"#[allow(non_upper_case_globals)]
             pub const {example}: &[u8] =
-                include_bytes!(r"{CARGO_MANIFEST_DIR}/../examples/target/riscv32im-unknown-none-elf/release/examples/{example}");"#
+                include_bytes!(r"{CARGO_MANIFEST_DIR}/../guest/target/riscv32im-unknown-none-elf/release/examples/{example}");"#
         ).expect("failed to write vars.rs");
     }
-    let input_path = "../examples/";
-    let elfs_path = "../examples/target/riscv32im-unknown-none-elf/release/examples/";
+    let input_path = "../guest/";
+    let elfs_path = "../guest/target/riscv32im-unknown-none-elf/release/examples/";
 
     println!("cargo:rerun-if-changed={input_path}");
     println!("cargo:rerun-if-changed={elfs_path}");
diff --git a/examples/.cargo/config.toml b/examples/.cargo/config.toml
deleted file mode 120000
index bade0c189..000000000
--- a/examples/.cargo/config.toml
+++ /dev/null
@@ -1 +0,0 @@
-../../ceno_rt/.cargo/config.toml
\ No newline at end of file
diff --git a/examples/Cargo.toml b/examples/Cargo.toml
deleted file mode 100644
index 85c975ac1..000000000
--- a/examples/Cargo.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-[package]
-edition = "2021"
-name = "examples"
-readme = "README.md"
-resolver = "2"
-version = "0.1.0"
-
-[dependencies]
-ceno_rt = { path = "../ceno_rt" }
diff --git a/examples/rust-toolchain.toml b/examples/rust-toolchain.toml
deleted file mode 120000
index 43fe0ada6..000000000
--- a/examples/rust-toolchain.toml
+++ /dev/null
@@ -1 +0,0 @@
-../rust-toolchain.toml
\ No newline at end of file
diff --git a/ceno_rt/.cargo/config.toml b/guest/.cargo/config.toml
similarity index 100%
rename from ceno_rt/.cargo/config.toml
rename to guest/.cargo/config.toml
diff --git a/examples/Cargo.lock b/guest/Cargo.lock
similarity index 88%
rename from examples/Cargo.lock
rename to guest/Cargo.lock
index 359355930..84c6c3ab6 100644
--- a/examples/Cargo.lock
+++ b/guest/Cargo.lock
@@ -36,9 +36,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.88"
+version = "1.0.92"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9"
+checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
 dependencies = [
  "unicode-ident",
 ]
@@ -84,9 +84,9 @@ checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
 
 [[package]]
 name = "syn"
-version = "2.0.82"
+version = "2.0.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021"
+checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -95,6 +95,6 @@ dependencies = [
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
+checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
diff --git a/guest/Cargo.toml b/guest/Cargo.toml
new file mode 100644
index 000000000..0aa06be48
--- /dev/null
+++ b/guest/Cargo.toml
@@ -0,0 +1,15 @@
+[workspace]
+members = ["examples", "ceno_rt"]
+resolver = "2"
+
+[workspace.package]
+categories = ["cryptography", "zk", "blockchain", "ceno"]
+edition = "2021"
+keywords = ["cryptography", "zk", "blockchain", "ceno"]
+license = "MIT OR Apache-2.0"
+readme = "../README.md"
+repository = "https://github.com/scroll-tech/ceno"
+version = "0.1.0"
+
+[profile.release]
+lto = "thin"
diff --git a/ceno_rt/Cargo.toml b/guest/ceno_rt/Cargo.toml
similarity index 100%
rename from ceno_rt/Cargo.toml
rename to guest/ceno_rt/Cargo.toml
diff --git a/ceno_rt/README.md b/guest/ceno_rt/README.md
similarity index 100%
rename from ceno_rt/README.md
rename to guest/ceno_rt/README.md
diff --git a/ceno_rt/build.rs b/guest/ceno_rt/build.rs
similarity index 100%
rename from ceno_rt/build.rs
rename to guest/ceno_rt/build.rs
diff --git a/ceno_rt/ceno_link.x b/guest/ceno_rt/ceno_link.x
similarity index 100%
rename from ceno_rt/ceno_link.x
rename to guest/ceno_rt/ceno_link.x
diff --git a/ceno_rt/memory.x b/guest/ceno_rt/memory.x
similarity index 100%
rename from ceno_rt/memory.x
rename to guest/ceno_rt/memory.x
diff --git a/ceno_rt/src/allocator.rs b/guest/ceno_rt/src/allocator.rs
similarity index 100%
rename from ceno_rt/src/allocator.rs
rename to guest/ceno_rt/src/allocator.rs
diff --git a/ceno_rt/src/io.rs b/guest/ceno_rt/src/io.rs
similarity index 100%
rename from ceno_rt/src/io.rs
rename to guest/ceno_rt/src/io.rs
diff --git a/ceno_rt/src/lib.rs b/guest/ceno_rt/src/lib.rs
similarity index 100%
rename from ceno_rt/src/lib.rs
rename to guest/ceno_rt/src/lib.rs
diff --git a/ceno_rt/src/params.rs b/guest/ceno_rt/src/params.rs
similarity index 100%
rename from ceno_rt/src/params.rs
rename to guest/ceno_rt/src/params.rs
diff --git a/guest/examples/Cargo.toml b/guest/examples/Cargo.toml
new file mode 100644
index 000000000..397e743b1
--- /dev/null
+++ b/guest/examples/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+categories.workspace = true
+description = "Ceno RiscV guest examples"
+edition.workspace = true
+keywords.workspace = true
+license.workspace = true
+name = "examples"
+readme = "README.md"
+repository.workspace = true
+version.workspace = true
+
+[dependencies]
+ceno_rt = { path = "../ceno_rt" }
diff --git a/examples/examples/ceno_rt_alloc.rs b/guest/examples/examples/ceno_rt_alloc.rs
similarity index 100%
rename from examples/examples/ceno_rt_alloc.rs
rename to guest/examples/examples/ceno_rt_alloc.rs
diff --git a/examples/examples/ceno_rt_io.rs b/guest/examples/examples/ceno_rt_io.rs
similarity index 100%
rename from examples/examples/ceno_rt_io.rs
rename to guest/examples/examples/ceno_rt_io.rs
diff --git a/examples/examples/ceno_rt_mem.rs b/guest/examples/examples/ceno_rt_mem.rs
similarity index 100%
rename from examples/examples/ceno_rt_mem.rs
rename to guest/examples/examples/ceno_rt_mem.rs
diff --git a/examples/examples/ceno_rt_mini.rs b/guest/examples/examples/ceno_rt_mini.rs
similarity index 100%
rename from examples/examples/ceno_rt_mini.rs
rename to guest/examples/examples/ceno_rt_mini.rs
diff --git a/examples/examples/ceno_rt_panic.rs b/guest/examples/examples/ceno_rt_panic.rs
similarity index 100%
rename from examples/examples/ceno_rt_panic.rs
rename to guest/examples/examples/ceno_rt_panic.rs
diff --git a/mpcs/src/basefold.rs b/mpcs/src/basefold.rs
index a3657422e..c7eb18f71 100644
--- a/mpcs/src/basefold.rs
+++ b/mpcs/src/basefold.rs
@@ -58,7 +58,7 @@ type SumCheck<F> = ClassicSumCheck<CoefficientsProver<F>>;
 
 mod structure;
 pub use structure::{
-    Basefold, BasefoldBasecodeParams, BasefoldCommitment, BasefoldCommitmentWithData,
+    Basefold, BasefoldBasecodeParams, BasefoldCommitment, BasefoldCommitmentWithWitness,
     BasefoldDefault, BasefoldParams, BasefoldProverParams, BasefoldRSParams,
     BasefoldVerifierParams,
 };
@@ -273,7 +273,7 @@ where
     type Param = BasefoldParams<E, Spec>;
     type ProverParam = BasefoldProverParams<E, Spec>;
     type VerifierParam = BasefoldVerifierParams<E, Spec>;
-    type CommitmentWithData = BasefoldCommitmentWithData<E>;
+    type CommitmentWithWitness = BasefoldCommitmentWithWitness<E>;
     type Commitment = BasefoldCommitment<E>;
     type CommitmentChunk = Digest<E::BaseField>;
     type Proof = BasefoldProof<E>;
@@ -307,7 +307,7 @@ where
     fn commit(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-    ) -> Result<Self::CommitmentWithData, Error> {
+    ) -> Result<Self::CommitmentWithWitness, Error> {
         let timer = start_timer!(|| "Basefold::commit");
 
         let is_base = match poly.evaluations {
@@ -325,9 +325,9 @@ where
             PolyEvalsCodeword::Normal((bh_evals, codeword)) => {
                 let codeword_tree = MerkleTree::<E>::from_leaves(codeword);
 
-                // All these values are stored in the `CommitmentWithData` because
+                // All these values are stored in the `CommitmentWithWitness` because
                 // they are useful in opening, and we don't want to recompute them.
-                Ok(Self::CommitmentWithData {
+                Ok(Self::CommitmentWithWitness {
                     codeword_tree,
                     polynomials_bh_evals: vec![bh_evals],
                     num_vars: poly.num_vars,
@@ -338,9 +338,9 @@ where
             PolyEvalsCodeword::TooSmall(evals) => {
                 let codeword_tree = MerkleTree::<E>::from_leaves(evals.clone());
 
-                // All these values are stored in the `CommitmentWithData` because
+                // All these values are stored in the `CommitmentWithWitness` because
                 // they are useful in opening, and we don't want to recompute them.
-                Ok(Self::CommitmentWithData {
+                Ok(Self::CommitmentWithWitness {
                     codeword_tree,
                     polynomials_bh_evals: vec![evals],
                     num_vars: poly.num_vars,
@@ -359,7 +359,7 @@ where
     fn batch_commit(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-    ) -> Result<Self::CommitmentWithData, Error> {
+    ) -> Result<Self::CommitmentWithWitness, Error> {
         // assumptions
         // 1. there must be at least one polynomial
         // 2. all polynomials must exist in the same field type
@@ -413,7 +413,7 @@ where
                     })
                     .collect::<(Vec<_>, Vec<_>)>();
                 let codeword_tree = MerkleTree::<E>::from_batch_leaves(codewords);
-                Self::CommitmentWithData {
+                Self::CommitmentWithWitness {
                     codeword_tree,
                     polynomials_bh_evals: bh_evals,
                     num_vars: polys[0].num_vars,
@@ -433,7 +433,7 @@ where
                     })
                     .collect::<Vec<_>>();
                 let codeword_tree = MerkleTree::<E>::from_batch_leaves(bh_evals.clone());
-                Self::CommitmentWithData {
+                Self::CommitmentWithWitness {
                     codeword_tree,
                     polynomials_bh_evals: bh_evals,
                     num_vars: polys[0].num_vars,
@@ -457,7 +457,7 @@ where
         Ok(())
     }
 
-    fn get_pure_commitment(comm: &Self::CommitmentWithData) -> Self::Commitment {
+    fn get_pure_commitment(comm: &Self::CommitmentWithWitness) -> Self::Commitment {
         comm.to_commitment()
     }
 
@@ -467,7 +467,7 @@ where
     fn open(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         _eval: &E, // Opening does not need eval, except for sanity check
         transcript: &mut impl Transcript<E>,
@@ -547,7 +547,7 @@ where
     fn batch_open(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-        comms: &[Self::CommitmentWithData],
+        comms: &[Self::CommitmentWithWitness],
         points: &[Vec<E>],
         evals: &[Evaluation<E>],
         transcript: &mut impl Transcript<E>,
@@ -769,7 +769,7 @@ where
     fn simple_batch_open(
         pp: &Self::ProverParam,
         polys: &[ArcMultilinearExtension<E>],
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         evals: &[E],
         transcript: &mut impl Transcript<E>,
@@ -947,8 +947,6 @@ where
         transcript: &mut impl Transcript<E>,
     ) -> Result<(), Error> {
         let timer = start_timer!(|| "Basefold::batch_verify");
-        // 	let key = "RAYON_NUM_THREADS";
-        // 	env::set_var(key, "32");
         let comms = comms.iter().collect_vec();
         let num_vars = points.iter().map(|point| point.len()).max().unwrap();
         let num_rounds = num_vars - Spec::get_basecode_msg_size_log();
diff --git a/mpcs/src/basefold/commit_phase.rs b/mpcs/src/basefold/commit_phase.rs
index 7d6e0b949..55a6acea5 100644
--- a/mpcs/src/basefold/commit_phase.rs
+++ b/mpcs/src/basefold/commit_phase.rs
@@ -27,13 +27,13 @@ use rayon::prelude::{
     ParallelSlice,
 };
 
-use super::structure::BasefoldCommitmentWithData;
+use super::structure::BasefoldCommitmentWithWitness;
 
 // outputs (trees, sumcheck_oracles, oracles, bh_evals, eq, eval)
 pub fn commit_phase<E: ExtensionField, Spec: BasefoldSpec<E>>(
     pp: &<Spec::EncodingScheme as EncodingScheme<E>>::ProverParameters,
     point: &[E],
-    comm: &BasefoldCommitmentWithData<E>,
+    comm: &BasefoldCommitmentWithWitness<E>,
     transcript: &mut impl Transcript<E>,
     num_vars: usize,
     num_rounds: usize,
@@ -179,7 +179,7 @@ where
 pub fn batch_commit_phase<E: ExtensionField, Spec: BasefoldSpec<E>>(
     pp: &<Spec::EncodingScheme as EncodingScheme<E>>::ProverParameters,
     point: &[E],
-    comms: &[BasefoldCommitmentWithData<E>],
+    comms: &[BasefoldCommitmentWithWitness<E>],
     transcript: &mut impl Transcript<E>,
     num_vars: usize,
     num_rounds: usize,
@@ -350,7 +350,7 @@ pub fn simple_batch_commit_phase<E: ExtensionField, Spec: BasefoldSpec<E>>(
     pp: &<Spec::EncodingScheme as EncodingScheme<E>>::ProverParameters,
     point: &[E],
     batch_coeffs: &[E],
-    comm: &BasefoldCommitmentWithData<E>,
+    comm: &BasefoldCommitmentWithWitness<E>,
     transcript: &mut impl Transcript<E>,
     num_vars: usize,
     num_rounds: usize,
diff --git a/mpcs/src/basefold/query_phase.rs b/mpcs/src/basefold/query_phase.rs
index f0db40890..9ec15d36b 100644
--- a/mpcs/src/basefold/query_phase.rs
+++ b/mpcs/src/basefold/query_phase.rs
@@ -25,12 +25,12 @@ use rayon::{
 
 use super::{
     encoding::EncodingScheme,
-    structure::{BasefoldCommitment, BasefoldCommitmentWithData, BasefoldSpec},
+    structure::{BasefoldCommitment, BasefoldCommitmentWithWitness, BasefoldSpec},
 };
 
 pub fn prover_query_phase<E: ExtensionField>(
     transcript: &mut impl Transcript<E>,
-    comm: &BasefoldCommitmentWithData<E>,
+    comm: &BasefoldCommitmentWithWitness<E>,
     trees: &[MerkleTree<E>],
     num_verifier_queries: usize,
 ) -> QueriesResult<E>
@@ -67,7 +67,7 @@ where
 pub fn batch_prover_query_phase<E: ExtensionField>(
     transcript: &mut impl Transcript<E>,
     codeword_size: usize,
-    comms: &[BasefoldCommitmentWithData<E>],
+    comms: &[BasefoldCommitmentWithWitness<E>],
     trees: &[MerkleTree<E>],
     num_verifier_queries: usize,
 ) -> BatchedQueriesResult<E>
@@ -103,7 +103,7 @@ where
 
 pub fn simple_batch_prover_query_phase<E: ExtensionField>(
     transcript: &mut impl Transcript<E>,
-    comm: &BasefoldCommitmentWithData<E>,
+    comm: &BasefoldCommitmentWithWitness<E>,
     trees: &[MerkleTree<E>],
     num_verifier_queries: usize,
 ) -> SimpleBatchQueriesResult<E>
@@ -411,7 +411,7 @@ where
 }
 
 fn batch_basefold_get_query<E: ExtensionField>(
-    comms: &[BasefoldCommitmentWithData<E>],
+    comms: &[BasefoldCommitmentWithWitness<E>],
     trees: &[MerkleTree<E>],
     codeword_size: usize,
     x_index: usize,
@@ -828,7 +828,7 @@ where
     pub fn from_single_query_result(
         single_query_result: SingleQueryResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitment: &BasefoldCommitmentWithData<E>,
+        commitment: &BasefoldCommitmentWithWitness<E>,
     ) -> Self {
         assert!(commitment.codeword_tree.height() > 0);
         Self {
@@ -927,7 +927,7 @@ where
     pub fn from_query_result(
         query_result: QueriesResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitment: &BasefoldCommitmentWithData<E>,
+        commitment: &BasefoldCommitmentWithWitness<E>,
     ) -> Self {
         Self {
             inner: query_result
@@ -1002,7 +1002,7 @@ where
     pub fn from_batched_single_query_result(
         batched_single_query_result: BatchedSingleQueryResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitments: &[BasefoldCommitmentWithData<E>],
+        commitments: &[BasefoldCommitmentWithWitness<E>],
     ) -> Self {
         Self {
             oracle_query: OracleListQueryResultWithMerklePath::from_query_and_trees(
@@ -1147,7 +1147,7 @@ where
     pub fn from_batched_query_result(
         batched_query_result: BatchedQueriesResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitments: &[BasefoldCommitmentWithData<E>],
+        commitments: &[BasefoldCommitmentWithWitness<E>],
     ) -> Self {
         Self {
             inner: batched_query_result
@@ -1307,7 +1307,7 @@ where
     pub fn from_single_query_result(
         single_query_result: SimpleBatchSingleQueryResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitment: &BasefoldCommitmentWithData<E>,
+        commitment: &BasefoldCommitmentWithWitness<E>,
     ) -> Self {
         Self {
             oracle_query: OracleListQueryResultWithMerklePath::from_query_and_trees(
@@ -1403,7 +1403,7 @@ where
     pub fn from_query_result(
         query_result: SimpleBatchQueriesResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitment: &BasefoldCommitmentWithData<E>,
+        commitment: &BasefoldCommitmentWithWitness<E>,
     ) -> Self {
         Self {
             inner: query_result
diff --git a/mpcs/src/basefold/structure.rs b/mpcs/src/basefold/structure.rs
index 0cb0748a0..129409efd 100644
--- a/mpcs/src/basefold/structure.rs
+++ b/mpcs/src/basefold/structure.rs
@@ -58,9 +58,8 @@ pub struct BasefoldVerifierParams<E: ExtensionField, Spec: BasefoldSpec<E>> {
 
 /// A polynomial commitment together with all the data (e.g., the codeword, and Merkle tree)
 /// used to generate this commitment and for assistant in opening
-#[derive(Clone, Debug, Default, Serialize, Deserialize)]
-#[serde(bound(serialize = "E: Serialize", deserialize = "E: DeserializeOwned"))]
-pub struct BasefoldCommitmentWithData<E: ExtensionField>
+#[derive(Clone, Debug, Default)]
+pub struct BasefoldCommitmentWithWitness<E: ExtensionField>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
@@ -71,7 +70,7 @@ where
     pub(crate) num_polys: usize,
 }
 
-impl<E: ExtensionField> BasefoldCommitmentWithData<E>
+impl<E: ExtensionField> BasefoldCommitmentWithWitness<E>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
@@ -133,20 +132,20 @@ where
     }
 }
 
-impl<E: ExtensionField> From<BasefoldCommitmentWithData<E>> for Digest<E::BaseField>
+impl<E: ExtensionField> From<BasefoldCommitmentWithWitness<E>> for Digest<E::BaseField>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
-    fn from(val: BasefoldCommitmentWithData<E>) -> Self {
+    fn from(val: BasefoldCommitmentWithWitness<E>) -> Self {
         val.get_root_as()
     }
 }
 
-impl<E: ExtensionField> From<&BasefoldCommitmentWithData<E>> for BasefoldCommitment<E>
+impl<E: ExtensionField> From<&BasefoldCommitmentWithWitness<E>> for BasefoldCommitment<E>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
-    fn from(val: &BasefoldCommitmentWithData<E>) -> Self {
+    fn from(val: &BasefoldCommitmentWithWitness<E>) -> Self {
         val.to_commitment()
     }
 }
@@ -194,7 +193,7 @@ where
     }
 }
 
-impl<E: ExtensionField> PartialEq for BasefoldCommitmentWithData<E>
+impl<E: ExtensionField> PartialEq for BasefoldCommitmentWithWitness<E>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
@@ -204,7 +203,7 @@ where
     }
 }
 
-impl<E: ExtensionField> Eq for BasefoldCommitmentWithData<E> where
+impl<E: ExtensionField> Eq for BasefoldCommitmentWithWitness<E> where
     E::BaseField: Serialize + DeserializeOwned
 {
 }
@@ -266,7 +265,7 @@ where
     }
 }
 
-impl<E: ExtensionField> AsRef<[Digest<E::BaseField>]> for BasefoldCommitmentWithData<E>
+impl<E: ExtensionField> AsRef<[Digest<E::BaseField>]> for BasefoldCommitmentWithWitness<E>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
diff --git a/mpcs/src/lib.rs b/mpcs/src/lib.rs
index e9c67b866..dcc6b6725 100644
--- a/mpcs/src/lib.rs
+++ b/mpcs/src/lib.rs
@@ -12,7 +12,8 @@ pub mod util;
 
 pub type Commitment<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::Commitment;
 pub type CommitmentChunk<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::CommitmentChunk;
-pub type CommitmentWithData<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::CommitmentWithData;
+pub type CommitmentWithWitness<E, Pcs> =
+    <Pcs as PolynomialCommitmentScheme<E>>::CommitmentWithWitness;
 
 pub type Param<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::Param;
 pub type ProverParam<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::ProverParam;
@@ -34,7 +35,7 @@ pub fn pcs_trim<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
 pub fn pcs_commit<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
     pp: &Pcs::ProverParam,
     poly: &DenseMultilinearExtension<E>,
-) -> Result<Pcs::CommitmentWithData, Error> {
+) -> Result<Pcs::CommitmentWithWitness, Error> {
     Pcs::commit(pp, poly)
 }
 
@@ -42,14 +43,14 @@ pub fn pcs_commit_and_write<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E
     pp: &Pcs::ProverParam,
     poly: &DenseMultilinearExtension<E>,
     transcript: &mut impl Transcript<E>,
-) -> Result<Pcs::CommitmentWithData, Error> {
+) -> Result<Pcs::CommitmentWithWitness, Error> {
     Pcs::commit_and_write(pp, poly, transcript)
 }
 
 pub fn pcs_batch_commit<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
     pp: &Pcs::ProverParam,
     polys: &[DenseMultilinearExtension<E>],
-) -> Result<Pcs::CommitmentWithData, Error> {
+) -> Result<Pcs::CommitmentWithWitness, Error> {
     Pcs::batch_commit(pp, polys)
 }
 
@@ -57,14 +58,14 @@ pub fn pcs_batch_commit_and_write<E: ExtensionField, Pcs: PolynomialCommitmentSc
     pp: &Pcs::ProverParam,
     polys: &[DenseMultilinearExtension<E>],
     transcript: &mut impl Transcript<E>,
-) -> Result<Pcs::CommitmentWithData, Error> {
+) -> Result<Pcs::CommitmentWithWitness, Error> {
     Pcs::batch_commit_and_write(pp, polys, transcript)
 }
 
 pub fn pcs_open<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
     pp: &Pcs::ProverParam,
     poly: &DenseMultilinearExtension<E>,
-    comm: &Pcs::CommitmentWithData,
+    comm: &Pcs::CommitmentWithWitness,
     point: &[E],
     eval: &E,
     transcript: &mut impl Transcript<E>,
@@ -75,7 +76,7 @@ pub fn pcs_open<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
 pub fn pcs_batch_open<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
     pp: &Pcs::ProverParam,
     polys: &[DenseMultilinearExtension<E>],
-    comms: &[Pcs::CommitmentWithData],
+    comms: &[Pcs::CommitmentWithWitness],
     points: &[Vec<E>],
     evals: &[Evaluation<E>],
     transcript: &mut impl Transcript<E>,
@@ -112,7 +113,7 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     type Param: Clone + Debug + Serialize + DeserializeOwned;
     type ProverParam: Clone + Debug + Serialize + DeserializeOwned;
     type VerifierParam: Clone + Debug + Serialize + DeserializeOwned;
-    type CommitmentWithData: Clone + Debug + Default + Serialize + DeserializeOwned;
+    type CommitmentWithWitness: Clone + Debug;
     type Commitment: Clone + Debug + Default + Serialize + DeserializeOwned;
     type CommitmentChunk: Clone + Debug + Default;
     type Proof: Clone + Debug + Serialize + DeserializeOwned;
@@ -127,13 +128,13 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     fn commit(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-    ) -> Result<Self::CommitmentWithData, Error>;
+    ) -> Result<Self::CommitmentWithWitness, Error>;
 
     fn commit_and_write(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
         transcript: &mut impl Transcript<E>,
-    ) -> Result<Self::CommitmentWithData, Error> {
+    ) -> Result<Self::CommitmentWithWitness, Error> {
         let comm = Self::commit(pp, poly)?;
         Self::write_commitment(&Self::get_pure_commitment(&comm), transcript)?;
         Ok(comm)
@@ -144,18 +145,18 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
         transcript: &mut impl Transcript<E>,
     ) -> Result<(), Error>;
 
-    fn get_pure_commitment(comm: &Self::CommitmentWithData) -> Self::Commitment;
+    fn get_pure_commitment(comm: &Self::CommitmentWithWitness) -> Self::Commitment;
 
     fn batch_commit(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-    ) -> Result<Self::CommitmentWithData, Error>;
+    ) -> Result<Self::CommitmentWithWitness, Error>;
 
     fn batch_commit_and_write(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
         transcript: &mut impl Transcript<E>,
-    ) -> Result<Self::CommitmentWithData, Error> {
+    ) -> Result<Self::CommitmentWithWitness, Error> {
         let comm = Self::batch_commit(pp, polys)?;
         Self::write_commitment(&Self::get_pure_commitment(&comm), transcript)?;
         Ok(comm)
@@ -164,7 +165,7 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     fn open(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         eval: &E,
         transcript: &mut impl Transcript<E>,
@@ -173,7 +174,7 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     fn batch_open(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-        comms: &[Self::CommitmentWithData],
+        comms: &[Self::CommitmentWithWitness],
         points: &[Vec<E>],
         evals: &[Evaluation<E>],
         transcript: &mut impl Transcript<E>,
@@ -186,7 +187,7 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     fn simple_batch_open(
         pp: &Self::ProverParam,
         polys: &[ArcMultilinearExtension<E>],
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         evals: &[E],
         transcript: &mut impl Transcript<E>,
@@ -228,7 +229,7 @@ where
     fn ni_open(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         eval: &E,
     ) -> Result<Self::Proof, Error> {
@@ -239,7 +240,7 @@ where
     fn ni_batch_open(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-        comms: &[Self::CommitmentWithData],
+        comms: &[Self::CommitmentWithWitness],
         points: &[Vec<E>],
         evals: &[Evaluation<E>],
     ) -> Result<Self::Proof, Error> {
@@ -315,7 +316,7 @@ pub enum Error {
 mod basefold;
 pub use basefold::{
     Basecode, BasecodeDefaultSpec, Basefold, BasefoldBasecodeParams, BasefoldCommitment,
-    BasefoldCommitmentWithData, BasefoldDefault, BasefoldParams, BasefoldRSParams, BasefoldSpec,
+    BasefoldCommitmentWithWitness, BasefoldDefault, BasefoldParams, BasefoldRSParams, BasefoldSpec,
     EncodingScheme, RSCode, RSCodeDefaultSpec, coset_fft, fft, fft_root_table, one_level_eval_hc,
     one_level_interp_hc,
 };
@@ -436,7 +437,7 @@ pub mod test_util {
         pp: &Pcs::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
         transcript: &mut impl Transcript<E>,
-    ) -> Vec<Pcs::CommitmentWithData> {
+    ) -> Vec<Pcs::CommitmentWithWitness> {
         polys
             .iter()
             .map(|poly| Pcs::commit_and_write(pp, poly, transcript).unwrap())