From 027f7b0baac71da6443f0cbb8cf557bcbe8e7a00 Mon Sep 17 00:00:00 2001
From: Cyte Zhang <yczhangsjtu@gmail.com>
Date: Fri, 13 Dec 2024 10:00:22 +0800
Subject: [PATCH 1/5] MPCS: Rename CommitmentWithData to CommitmentWithWitness
 & remove unnecessary trait requirement. (#745)

- The name "CommitmentWithData" is vague, renamed to
`CommitmentWithWitness` to be clearer
- It doesn't actually need to be serialized anywhere in the protocol,
and never need to create a default instance, remove the unnecessary
trait bounds (which doesn't cause any compilation error at all), and to
avoid implementing these trait unnecessarily in WHIR wrapper
---
 ceno_zkvm/src/scheme/prover.rs    |  4 +--
 ceno_zkvm/src/structs.rs          |  2 +-
 mpcs/src/basefold.rs              | 28 ++++++++++-----------
 mpcs/src/basefold/commit_phase.rs |  8 +++---
 mpcs/src/basefold/query_phase.rs  | 22 ++++++++---------
 mpcs/src/basefold/structure.rs    | 21 ++++++++--------
 mpcs/src/lib.rs                   | 41 ++++++++++++++++---------------
 7 files changed, 63 insertions(+), 63 deletions(-)
diff --git a/ceno_zkvm/src/scheme/prover.rs b/ceno_zkvm/src/scheme/prover.rs
index b0ff7b656..2c8cae8bc 100644
--- a/ceno_zkvm/src/scheme/prover.rs
+++ b/ceno_zkvm/src/scheme/prover.rs
@@ -216,7 +216,7 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> ZKVMProver<E, PCS> {
         pp: &PCS::ProverParam,
         circuit_pk: &ProvingKey<E, PCS>,
         witnesses: Vec<ArcMultilinearExtension<'_, E>>,
-        wits_commit: PCS::CommitmentWithData,
+        wits_commit: PCS::CommitmentWithWitness,
         pi: &[ArcMultilinearExtension<'_, E>],
         num_instances: usize,
         transcript: &mut impl Transcript<E>,
@@ -661,7 +661,7 @@ impl<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> ZKVMProver<E, PCS> {
         pp: &PCS::ProverParam,
         circuit_pk: &ProvingKey<E, PCS>,
         witnesses: Vec<ArcMultilinearExtension<'_, E>>,
-        wits_commit: PCS::CommitmentWithData,
+        wits_commit: PCS::CommitmentWithWitness,
         pi: &[ArcMultilinearExtension<'_, E>],
         transcript: &mut impl Transcript<E>,
         challenges: &[E; 2],
diff --git a/ceno_zkvm/src/structs.rs b/ceno_zkvm/src/structs.rs
index af41851f9..b006b10e1 100644
--- a/ceno_zkvm/src/structs.rs
+++ b/ceno_zkvm/src/structs.rs
@@ -103,7 +103,7 @@ impl<F: Clone> PointAndEval<F> {
 #[derive(Clone, Debug)]
 pub struct ProvingKey<E: ExtensionField, PCS: PolynomialCommitmentScheme<E>> {
     pub fixed_traces: Option<Vec<DenseMultilinearExtension<E>>>,
-    pub fixed_commit_wd: Option<PCS::CommitmentWithData>,
+    pub fixed_commit_wd: Option<PCS::CommitmentWithWitness>,
     pub vk: VerifyingKey<E, PCS>,
 }
 
diff --git a/mpcs/src/basefold.rs b/mpcs/src/basefold.rs
index a3657422e..1aec0fb9d 100644
--- a/mpcs/src/basefold.rs
+++ b/mpcs/src/basefold.rs
@@ -58,7 +58,7 @@ type SumCheck<F> = ClassicSumCheck<CoefficientsProver<F>>;
 
 mod structure;
 pub use structure::{
-    Basefold, BasefoldBasecodeParams, BasefoldCommitment, BasefoldCommitmentWithData,
+    Basefold, BasefoldBasecodeParams, BasefoldCommitment, BasefoldCommitmentWithWitness,
     BasefoldDefault, BasefoldParams, BasefoldProverParams, BasefoldRSParams,
     BasefoldVerifierParams,
 };
@@ -273,7 +273,7 @@ where
     type Param = BasefoldParams<E, Spec>;
     type ProverParam = BasefoldProverParams<E, Spec>;
     type VerifierParam = BasefoldVerifierParams<E, Spec>;
-    type CommitmentWithData = BasefoldCommitmentWithData<E>;
+    type CommitmentWithWitness = BasefoldCommitmentWithWitness<E>;
     type Commitment = BasefoldCommitment<E>;
     type CommitmentChunk = Digest<E::BaseField>;
     type Proof = BasefoldProof<E>;
@@ -307,7 +307,7 @@ where
     fn commit(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-    ) -> Result<Self::CommitmentWithData, Error> {
+    ) -> Result<Self::CommitmentWithWitness, Error> {
         let timer = start_timer!(|| "Basefold::commit");
 
         let is_base = match poly.evaluations {
@@ -325,9 +325,9 @@ where
             PolyEvalsCodeword::Normal((bh_evals, codeword)) => {
                 let codeword_tree = MerkleTree::<E>::from_leaves(codeword);
 
-                // All these values are stored in the `CommitmentWithData` because
+                // All these values are stored in the `CommitmentWithWitness` because
                 // they are useful in opening, and we don't want to recompute them.
-                Ok(Self::CommitmentWithData {
+                Ok(Self::CommitmentWithWitness {
                     codeword_tree,
                     polynomials_bh_evals: vec![bh_evals],
                     num_vars: poly.num_vars,
@@ -338,9 +338,9 @@ where
             PolyEvalsCodeword::TooSmall(evals) => {
                 let codeword_tree = MerkleTree::<E>::from_leaves(evals.clone());
 
-                // All these values are stored in the `CommitmentWithData` because
+                // All these values are stored in the `CommitmentWithWitness` because
                 // they are useful in opening, and we don't want to recompute them.
-                Ok(Self::CommitmentWithData {
+                Ok(Self::CommitmentWithWitness {
                     codeword_tree,
                     polynomials_bh_evals: vec![evals],
                     num_vars: poly.num_vars,
@@ -359,7 +359,7 @@ where
     fn batch_commit(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-    ) -> Result<Self::CommitmentWithData, Error> {
+    ) -> Result<Self::CommitmentWithWitness, Error> {
         // assumptions
         // 1. there must be at least one polynomial
         // 2. all polynomials must exist in the same field type
@@ -413,7 +413,7 @@ where
                     })
                     .collect::<(Vec<_>, Vec<_>)>();
                 let codeword_tree = MerkleTree::<E>::from_batch_leaves(codewords);
-                Self::CommitmentWithData {
+                Self::CommitmentWithWitness {
                     codeword_tree,
                     polynomials_bh_evals: bh_evals,
                     num_vars: polys[0].num_vars,
@@ -433,7 +433,7 @@ where
                     })
                     .collect::<Vec<_>>();
                 let codeword_tree = MerkleTree::<E>::from_batch_leaves(bh_evals.clone());
-                Self::CommitmentWithData {
+                Self::CommitmentWithWitness {
                     codeword_tree,
                     polynomials_bh_evals: bh_evals,
                     num_vars: polys[0].num_vars,
@@ -457,7 +457,7 @@ where
         Ok(())
     }
 
-    fn get_pure_commitment(comm: &Self::CommitmentWithData) -> Self::Commitment {
+    fn get_pure_commitment(comm: &Self::CommitmentWithWitness) -> Self::Commitment {
         comm.to_commitment()
     }
 
@@ -467,7 +467,7 @@ where
     fn open(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         _eval: &E, // Opening does not need eval, except for sanity check
         transcript: &mut impl Transcript<E>,
@@ -547,7 +547,7 @@ where
     fn batch_open(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-        comms: &[Self::CommitmentWithData],
+        comms: &[Self::CommitmentWithWitness],
         points: &[Vec<E>],
         evals: &[Evaluation<E>],
         transcript: &mut impl Transcript<E>,
@@ -769,7 +769,7 @@ where
     fn simple_batch_open(
         pp: &Self::ProverParam,
         polys: &[ArcMultilinearExtension<E>],
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         evals: &[E],
         transcript: &mut impl Transcript<E>,
diff --git a/mpcs/src/basefold/commit_phase.rs b/mpcs/src/basefold/commit_phase.rs
index 7d6e0b949..55a6acea5 100644
--- a/mpcs/src/basefold/commit_phase.rs
+++ b/mpcs/src/basefold/commit_phase.rs
@@ -27,13 +27,13 @@ use rayon::prelude::{
     ParallelSlice,
 };
 
-use super::structure::BasefoldCommitmentWithData;
+use super::structure::BasefoldCommitmentWithWitness;
 
 // outputs (trees, sumcheck_oracles, oracles, bh_evals, eq, eval)
 pub fn commit_phase<E: ExtensionField, Spec: BasefoldSpec<E>>(
     pp: &<Spec::EncodingScheme as EncodingScheme<E>>::ProverParameters,
     point: &[E],
-    comm: &BasefoldCommitmentWithData<E>,
+    comm: &BasefoldCommitmentWithWitness<E>,
     transcript: &mut impl Transcript<E>,
     num_vars: usize,
     num_rounds: usize,
@@ -179,7 +179,7 @@ where
 pub fn batch_commit_phase<E: ExtensionField, Spec: BasefoldSpec<E>>(
     pp: &<Spec::EncodingScheme as EncodingScheme<E>>::ProverParameters,
     point: &[E],
-    comms: &[BasefoldCommitmentWithData<E>],
+    comms: &[BasefoldCommitmentWithWitness<E>],
     transcript: &mut impl Transcript<E>,
     num_vars: usize,
     num_rounds: usize,
@@ -350,7 +350,7 @@ pub fn simple_batch_commit_phase<E: ExtensionField, Spec: BasefoldSpec<E>>(
     pp: &<Spec::EncodingScheme as EncodingScheme<E>>::ProverParameters,
     point: &[E],
     batch_coeffs: &[E],
-    comm: &BasefoldCommitmentWithData<E>,
+    comm: &BasefoldCommitmentWithWitness<E>,
     transcript: &mut impl Transcript<E>,
     num_vars: usize,
     num_rounds: usize,
diff --git a/mpcs/src/basefold/query_phase.rs b/mpcs/src/basefold/query_phase.rs
index f0db40890..9ec15d36b 100644
--- a/mpcs/src/basefold/query_phase.rs
+++ b/mpcs/src/basefold/query_phase.rs
@@ -25,12 +25,12 @@ use rayon::{
 
 use super::{
     encoding::EncodingScheme,
-    structure::{BasefoldCommitment, BasefoldCommitmentWithData, BasefoldSpec},
+    structure::{BasefoldCommitment, BasefoldCommitmentWithWitness, BasefoldSpec},
 };
 
 pub fn prover_query_phase<E: ExtensionField>(
     transcript: &mut impl Transcript<E>,
-    comm: &BasefoldCommitmentWithData<E>,
+    comm: &BasefoldCommitmentWithWitness<E>,
     trees: &[MerkleTree<E>],
     num_verifier_queries: usize,
 ) -> QueriesResult<E>
@@ -67,7 +67,7 @@ where
 pub fn batch_prover_query_phase<E: ExtensionField>(
     transcript: &mut impl Transcript<E>,
     codeword_size: usize,
-    comms: &[BasefoldCommitmentWithData<E>],
+    comms: &[BasefoldCommitmentWithWitness<E>],
     trees: &[MerkleTree<E>],
     num_verifier_queries: usize,
 ) -> BatchedQueriesResult<E>
@@ -103,7 +103,7 @@ where
 
 pub fn simple_batch_prover_query_phase<E: ExtensionField>(
     transcript: &mut impl Transcript<E>,
-    comm: &BasefoldCommitmentWithData<E>,
+    comm: &BasefoldCommitmentWithWitness<E>,
     trees: &[MerkleTree<E>],
     num_verifier_queries: usize,
 ) -> SimpleBatchQueriesResult<E>
@@ -411,7 +411,7 @@ where
 }
 
 fn batch_basefold_get_query<E: ExtensionField>(
-    comms: &[BasefoldCommitmentWithData<E>],
+    comms: &[BasefoldCommitmentWithWitness<E>],
     trees: &[MerkleTree<E>],
     codeword_size: usize,
     x_index: usize,
@@ -828,7 +828,7 @@ where
     pub fn from_single_query_result(
         single_query_result: SingleQueryResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitment: &BasefoldCommitmentWithData<E>,
+        commitment: &BasefoldCommitmentWithWitness<E>,
     ) -> Self {
         assert!(commitment.codeword_tree.height() > 0);
         Self {
@@ -927,7 +927,7 @@ where
     pub fn from_query_result(
         query_result: QueriesResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitment: &BasefoldCommitmentWithData<E>,
+        commitment: &BasefoldCommitmentWithWitness<E>,
     ) -> Self {
         Self {
             inner: query_result
@@ -1002,7 +1002,7 @@ where
     pub fn from_batched_single_query_result(
         batched_single_query_result: BatchedSingleQueryResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitments: &[BasefoldCommitmentWithData<E>],
+        commitments: &[BasefoldCommitmentWithWitness<E>],
     ) -> Self {
         Self {
             oracle_query: OracleListQueryResultWithMerklePath::from_query_and_trees(
@@ -1147,7 +1147,7 @@ where
     pub fn from_batched_query_result(
         batched_query_result: BatchedQueriesResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitments: &[BasefoldCommitmentWithData<E>],
+        commitments: &[BasefoldCommitmentWithWitness<E>],
     ) -> Self {
         Self {
             inner: batched_query_result
@@ -1307,7 +1307,7 @@ where
     pub fn from_single_query_result(
         single_query_result: SimpleBatchSingleQueryResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitment: &BasefoldCommitmentWithData<E>,
+        commitment: &BasefoldCommitmentWithWitness<E>,
     ) -> Self {
         Self {
             oracle_query: OracleListQueryResultWithMerklePath::from_query_and_trees(
@@ -1403,7 +1403,7 @@ where
     pub fn from_query_result(
         query_result: SimpleBatchQueriesResult<E>,
         oracle_trees: &[MerkleTree<E>],
-        commitment: &BasefoldCommitmentWithData<E>,
+        commitment: &BasefoldCommitmentWithWitness<E>,
     ) -> Self {
         Self {
             inner: query_result
diff --git a/mpcs/src/basefold/structure.rs b/mpcs/src/basefold/structure.rs
index 0cb0748a0..129409efd 100644
--- a/mpcs/src/basefold/structure.rs
+++ b/mpcs/src/basefold/structure.rs
@@ -58,9 +58,8 @@ pub struct BasefoldVerifierParams<E: ExtensionField, Spec: BasefoldSpec<E>> {
 
 /// A polynomial commitment together with all the data (e.g., the codeword, and Merkle tree)
 /// used to generate this commitment and for assistant in opening
-#[derive(Clone, Debug, Default, Serialize, Deserialize)]
-#[serde(bound(serialize = "E: Serialize", deserialize = "E: DeserializeOwned"))]
-pub struct BasefoldCommitmentWithData<E: ExtensionField>
+#[derive(Clone, Debug, Default)]
+pub struct BasefoldCommitmentWithWitness<E: ExtensionField>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
@@ -71,7 +70,7 @@ where
     pub(crate) num_polys: usize,
 }
 
-impl<E: ExtensionField> BasefoldCommitmentWithData<E>
+impl<E: ExtensionField> BasefoldCommitmentWithWitness<E>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
@@ -133,20 +132,20 @@ where
     }
 }
 
-impl<E: ExtensionField> From<BasefoldCommitmentWithData<E>> for Digest<E::BaseField>
+impl<E: ExtensionField> From<BasefoldCommitmentWithWitness<E>> for Digest<E::BaseField>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
-    fn from(val: BasefoldCommitmentWithData<E>) -> Self {
+    fn from(val: BasefoldCommitmentWithWitness<E>) -> Self {
         val.get_root_as()
     }
 }
 
-impl<E: ExtensionField> From<&BasefoldCommitmentWithData<E>> for BasefoldCommitment<E>
+impl<E: ExtensionField> From<&BasefoldCommitmentWithWitness<E>> for BasefoldCommitment<E>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
-    fn from(val: &BasefoldCommitmentWithData<E>) -> Self {
+    fn from(val: &BasefoldCommitmentWithWitness<E>) -> Self {
         val.to_commitment()
     }
 }
@@ -194,7 +193,7 @@ where
     }
 }
 
-impl<E: ExtensionField> PartialEq for BasefoldCommitmentWithData<E>
+impl<E: ExtensionField> PartialEq for BasefoldCommitmentWithWitness<E>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
@@ -204,7 +203,7 @@ where
     }
 }
 
-impl<E: ExtensionField> Eq for BasefoldCommitmentWithData<E> where
+impl<E: ExtensionField> Eq for BasefoldCommitmentWithWitness<E> where
     E::BaseField: Serialize + DeserializeOwned
 {
 }
@@ -266,7 +265,7 @@ where
     }
 }
 
-impl<E: ExtensionField> AsRef<[Digest<E::BaseField>]> for BasefoldCommitmentWithData<E>
+impl<E: ExtensionField> AsRef<[Digest<E::BaseField>]> for BasefoldCommitmentWithWitness<E>
 where
     E::BaseField: Serialize + DeserializeOwned,
 {
diff --git a/mpcs/src/lib.rs b/mpcs/src/lib.rs
index e9c67b866..dcc6b6725 100644
--- a/mpcs/src/lib.rs
+++ b/mpcs/src/lib.rs
@@ -12,7 +12,8 @@ pub mod util;
 
 pub type Commitment<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::Commitment;
 pub type CommitmentChunk<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::CommitmentChunk;
-pub type CommitmentWithData<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::CommitmentWithData;
+pub type CommitmentWithWitness<E, Pcs> =
+    <Pcs as PolynomialCommitmentScheme<E>>::CommitmentWithWitness;
 
 pub type Param<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::Param;
 pub type ProverParam<E, Pcs> = <Pcs as PolynomialCommitmentScheme<E>>::ProverParam;
@@ -34,7 +35,7 @@ pub fn pcs_trim<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
 pub fn pcs_commit<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
     pp: &Pcs::ProverParam,
     poly: &DenseMultilinearExtension<E>,
-) -> Result<Pcs::CommitmentWithData, Error> {
+) -> Result<Pcs::CommitmentWithWitness, Error> {
     Pcs::commit(pp, poly)
 }
 
@@ -42,14 +43,14 @@ pub fn pcs_commit_and_write<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E
     pp: &Pcs::ProverParam,
     poly: &DenseMultilinearExtension<E>,
     transcript: &mut impl Transcript<E>,
-) -> Result<Pcs::CommitmentWithData, Error> {
+) -> Result<Pcs::CommitmentWithWitness, Error> {
     Pcs::commit_and_write(pp, poly, transcript)
 }
 
 pub fn pcs_batch_commit<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
     pp: &Pcs::ProverParam,
     polys: &[DenseMultilinearExtension<E>],
-) -> Result<Pcs::CommitmentWithData, Error> {
+) -> Result<Pcs::CommitmentWithWitness, Error> {
     Pcs::batch_commit(pp, polys)
 }
 
@@ -57,14 +58,14 @@ pub fn pcs_batch_commit_and_write<E: ExtensionField, Pcs: PolynomialCommitmentSc
     pp: &Pcs::ProverParam,
     polys: &[DenseMultilinearExtension<E>],
     transcript: &mut impl Transcript<E>,
-) -> Result<Pcs::CommitmentWithData, Error> {
+) -> Result<Pcs::CommitmentWithWitness, Error> {
     Pcs::batch_commit_and_write(pp, polys, transcript)
 }
 
 pub fn pcs_open<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
     pp: &Pcs::ProverParam,
     poly: &DenseMultilinearExtension<E>,
-    comm: &Pcs::CommitmentWithData,
+    comm: &Pcs::CommitmentWithWitness,
     point: &[E],
     eval: &E,
     transcript: &mut impl Transcript<E>,
@@ -75,7 +76,7 @@ pub fn pcs_open<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
 pub fn pcs_batch_open<E: ExtensionField, Pcs: PolynomialCommitmentScheme<E>>(
     pp: &Pcs::ProverParam,
     polys: &[DenseMultilinearExtension<E>],
-    comms: &[Pcs::CommitmentWithData],
+    comms: &[Pcs::CommitmentWithWitness],
     points: &[Vec<E>],
     evals: &[Evaluation<E>],
     transcript: &mut impl Transcript<E>,
@@ -112,7 +113,7 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     type Param: Clone + Debug + Serialize + DeserializeOwned;
     type ProverParam: Clone + Debug + Serialize + DeserializeOwned;
     type VerifierParam: Clone + Debug + Serialize + DeserializeOwned;
-    type CommitmentWithData: Clone + Debug + Default + Serialize + DeserializeOwned;
+    type CommitmentWithWitness: Clone + Debug;
     type Commitment: Clone + Debug + Default + Serialize + DeserializeOwned;
     type CommitmentChunk: Clone + Debug + Default;
     type Proof: Clone + Debug + Serialize + DeserializeOwned;
@@ -127,13 +128,13 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     fn commit(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-    ) -> Result<Self::CommitmentWithData, Error>;
+    ) -> Result<Self::CommitmentWithWitness, Error>;
 
     fn commit_and_write(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
         transcript: &mut impl Transcript<E>,
-    ) -> Result<Self::CommitmentWithData, Error> {
+    ) -> Result<Self::CommitmentWithWitness, Error> {
         let comm = Self::commit(pp, poly)?;
         Self::write_commitment(&Self::get_pure_commitment(&comm), transcript)?;
         Ok(comm)
@@ -144,18 +145,18 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
         transcript: &mut impl Transcript<E>,
     ) -> Result<(), Error>;
 
-    fn get_pure_commitment(comm: &Self::CommitmentWithData) -> Self::Commitment;
+    fn get_pure_commitment(comm: &Self::CommitmentWithWitness) -> Self::Commitment;
 
     fn batch_commit(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-    ) -> Result<Self::CommitmentWithData, Error>;
+    ) -> Result<Self::CommitmentWithWitness, Error>;
 
     fn batch_commit_and_write(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
         transcript: &mut impl Transcript<E>,
-    ) -> Result<Self::CommitmentWithData, Error> {
+    ) -> Result<Self::CommitmentWithWitness, Error> {
         let comm = Self::batch_commit(pp, polys)?;
         Self::write_commitment(&Self::get_pure_commitment(&comm), transcript)?;
         Ok(comm)
@@ -164,7 +165,7 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     fn open(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         eval: &E,
         transcript: &mut impl Transcript<E>,
@@ -173,7 +174,7 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     fn batch_open(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-        comms: &[Self::CommitmentWithData],
+        comms: &[Self::CommitmentWithWitness],
         points: &[Vec<E>],
         evals: &[Evaluation<E>],
         transcript: &mut impl Transcript<E>,
@@ -186,7 +187,7 @@ pub trait PolynomialCommitmentScheme<E: ExtensionField>: Clone + Debug {
     fn simple_batch_open(
         pp: &Self::ProverParam,
         polys: &[ArcMultilinearExtension<E>],
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         evals: &[E],
         transcript: &mut impl Transcript<E>,
@@ -228,7 +229,7 @@ where
     fn ni_open(
         pp: &Self::ProverParam,
         poly: &DenseMultilinearExtension<E>,
-        comm: &Self::CommitmentWithData,
+        comm: &Self::CommitmentWithWitness,
         point: &[E],
         eval: &E,
     ) -> Result<Self::Proof, Error> {
@@ -239,7 +240,7 @@ where
     fn ni_batch_open(
         pp: &Self::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
-        comms: &[Self::CommitmentWithData],
+        comms: &[Self::CommitmentWithWitness],
         points: &[Vec<E>],
         evals: &[Evaluation<E>],
     ) -> Result<Self::Proof, Error> {
@@ -315,7 +316,7 @@ pub enum Error {
 mod basefold;
 pub use basefold::{
     Basecode, BasecodeDefaultSpec, Basefold, BasefoldBasecodeParams, BasefoldCommitment,
-    BasefoldCommitmentWithData, BasefoldDefault, BasefoldParams, BasefoldRSParams, BasefoldSpec,
+    BasefoldCommitmentWithWitness, BasefoldDefault, BasefoldParams, BasefoldRSParams, BasefoldSpec,
     EncodingScheme, RSCode, RSCodeDefaultSpec, coset_fft, fft, fft_root_table, one_level_eval_hc,
     one_level_interp_hc,
 };
@@ -436,7 +437,7 @@ pub mod test_util {
         pp: &Pcs::ProverParam,
         polys: &[DenseMultilinearExtension<E>],
         transcript: &mut impl Transcript<E>,
-    ) -> Vec<Pcs::CommitmentWithData> {
+    ) -> Vec<Pcs::CommitmentWithWitness> {
         polys
             .iter()
             .map(|poly| Pcs::commit_and_write(pp, poly, transcript).unwrap())

From b58ef82592b8968e2de3cb0fa1b2d3bd80944c2f Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matthias=20G=C3=B6rgens?= <matthias.goergens@gmail.com>
Date: Fri, 13 Dec 2024 12:29:11 +0800
Subject: [PATCH 2/5] Implement the TODOs in the Harvard Architecture PR (#711)

In the [Harvard Architecture
PR](https://github.com/scroll-tech/ceno/pull/688) we left a few TODOs.
Here we make good on them.

There's a few things happening in this PR:

- We remove the fake instruction `EANY`. `EANY` was only ever an
artifact of Risc0's incredible 'interesting' approach to decoding. We
use the real instructions `ECALL` and `EBREAK` instead.

- Remove `AUIPC` and `LUI`. Both of them are now implemented as
pseudo-instructions, that translate to `ADDI` during decoding.

- Use the same library as SP1 for RiscV decoding, instead of
copy-and-pasting-and-editing Risc0's 'interesting' decoder. That
simplifies our code, and comes with a lot more tests than we ever had.
Both because of explicit tests in the library, and because of the usage
in SP1 and other projects. This gets rid of much error prone bit
manipulating code.

- Use `struct Instruction` throughout the code when handling and testing
instructions, instead of `u32`. That makes specifying tests a lot
simpler and more readable. No more
`0b_000000001010_00000_000_00001_0010011, // addi x1, x0, 10` in the
code.

- Remove the notion of executable vs non-executable ROM. This is only
necessary for a von-Neumann architecture: everything that's in our
instruction-cache is meant to be executable already. (We can
re-implement this restriction later by controlling what is allowed to
make it into the instruction cache when we eg decode the ELF. But it's
unnecessary: we already honour the executable flag for memory sections
in the ELF.)
---
 Cargo.lock                                    |  75 ++
 ceno_emul/Cargo.toml                          |   1 +
 ceno_emul/src/disassemble/mod.rs              | 378 ++++++++
 ceno_emul/src/elf.rs                          |   8 +-
 ceno_emul/src/lib.rs                          |   7 +-
 ceno_emul/src/platform.rs                     |   5 -
 ceno_emul/src/rv32im.rs                       | 895 ++++++------------
 ceno_emul/src/rv32im_encode.rs                | 116 ---
 ceno_emul/src/tracer.rs                       |  46 +-
 ceno_emul/src/vm_state.rs                     |  20 +-
 ceno_emul/tests/test_elf.rs                   |   2 +-
 ceno_emul/tests/test_vm_trace.rs              |  61 +-
 ceno_zkvm/examples/riscv_opcodes.rs           |  57 +-
 ceno_zkvm/src/e2e.rs                          |   9 +-
 ceno_zkvm/src/instructions/riscv.rs           |   5 +-
 ceno_zkvm/src/instructions/riscv/arith_imm.rs |   7 +-
 .../src/instructions/riscv/branch/test.rs     |  16 +-
 .../instructions/riscv/dummy/dummy_circuit.rs |  18 +-
 .../src/instructions/riscv/dummy/test.rs      |   2 +-
 ceno_zkvm/src/instructions/riscv/ecall.rs     |   2 +-
 .../src/instructions/riscv/ecall_insn.rs      |   4 +-
 ceno_zkvm/src/instructions/riscv/jump.rs      |   4 -
 .../src/instructions/riscv/jump/auipc.rs      |  94 --
 ceno_zkvm/src/instructions/riscv/jump/lui.rs  |  64 --
 ceno_zkvm/src/instructions/riscv/jump/test.rs |  78 +-
 .../riscv/logic_imm/logic_imm_circuit.rs      |   4 +-
 .../src/instructions/riscv/memory/test.rs     |  53 +-
 ceno_zkvm/src/instructions/riscv/rv32im.rs    |  40 +-
 ceno_zkvm/src/instructions/riscv/shift_imm.rs |   8 +-
 ceno_zkvm/src/instructions/riscv/slti.rs      |   2 +-
 .../src/instructions/riscv/test_utils.rs      |  23 -
 ceno_zkvm/src/instructions/riscv/u_insn.rs    |  65 --
 ceno_zkvm/src/scheme/mock_prover.rs           |  35 +-
 ceno_zkvm/src/scheme/tests.rs                 |  43 +-
 ceno_zkvm/src/tables/program.rs               |  65 +-
 clippy.toml                                   |   7 +-
 36 files changed, 959 insertions(+), 1360 deletions(-)
 create mode 100644 ceno_emul/src/disassemble/mod.rs
 delete mode 100644 ceno_emul/src/rv32im_encode.rs
 delete mode 100644 ceno_zkvm/src/instructions/riscv/jump/auipc.rs
 delete mode 100644 ceno_zkvm/src/instructions/riscv/jump/lui.rs
 delete mode 100644 ceno_zkvm/src/instructions/riscv/test_utils.rs
 delete mode 100644 ceno_zkvm/src/instructions/riscv/u_insn.rs

diff --git a/Cargo.lock b/Cargo.lock
index d84685eb4..865b783f5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -238,6 +238,7 @@ dependencies = [
  "itertools 0.13.0",
  "num-derive",
  "num-traits",
+ "rrs-succinct",
  "strum",
  "strum_macros",
  "tracing",
@@ -583,6 +584,12 @@ dependencies = [
  "winapi",
 ]
 
+[[package]]
+name = "downcast-rs"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "75b325c5dbd37f80359721ad39aca5a29fb04c89279657cffdda8736d0c0b9d2"
+
 [[package]]
 name = "either"
 version = "1.13.0"
@@ -1194,6 +1201,27 @@ dependencies = [
  "autocfg",
 ]
 
+[[package]]
+name = "num_enum"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f646caf906c20226733ed5b1374287eb97e3c2a5c227ce668c1f2ce20ae57c9"
+dependencies = [
+ "num_enum_derive",
+]
+
+[[package]]
+name = "num_enum_derive"
+version = "0.5.11"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "dcbff9bc912032c62bf65ef1d5aea88983b420f4f839db1e9b0c281a25c9c799"
+dependencies = [
+ "proc-macro-crate",
+ "proc-macro2",
+ "quote",
+ "syn 1.0.109",
+]
+
 [[package]]
 name = "object"
 version = "0.36.5"
@@ -1424,6 +1452,16 @@ dependencies = [
  "uint",
 ]
 
+[[package]]
+name = "proc-macro-crate"
+version = "1.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7f4c021e1093a56626774e81216a4ce732a735e5bad4868a03f3ed65ca0c3919"
+dependencies = [
+ "once_cell",
+ "toml_edit",
+]
+
 [[package]]
 name = "proc-macro2"
 version = "1.0.92"
@@ -1611,6 +1649,17 @@ version = "0.2.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
 
+[[package]]
+name = "rrs-succinct"
+version = "0.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3372685893a9f67d18e98e792d690017287fd17379a83d798d958e517d380fa9"
+dependencies = [
+ "downcast-rs",
+ "num_enum",
+ "paste",
+]
+
 [[package]]
 name = "rustc-demangle"
 version = "0.1.24"
@@ -1927,6 +1976,23 @@ dependencies = [
  "serde_json",
 ]
 
+[[package]]
+name = "toml_datetime"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0dd7358ecb8fc2f8d014bf86f6f638ce72ba252a2c3a2572f2a795f1d23efb41"
+
+[[package]]
+name = "toml_edit"
+version = "0.19.15"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1b5bb770da30e5cbfde35a2d7b9b8a2c4b8ef89548a7a6aeab5c9a576e3e7421"
+dependencies = [
+ "indexmap",
+ "toml_datetime",
+ "winnow",
+]
+
 [[package]]
 name = "tracing"
 version = "0.1.41"
@@ -2345,6 +2411,15 @@ version = "0.52.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
 
+[[package]]
+name = "winnow"
+version = "0.5.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f593a95398737aeed53e489c785df13f3618e41dbcd6718c6addbf1395aa6876"
+dependencies = [
+ "memchr",
+]
+
 [[package]]
 name = "wyz"
 version = "0.5.1"
diff --git a/ceno_emul/Cargo.toml b/ceno_emul/Cargo.toml
index 38f0a8bfd..74562142a 100644
--- a/ceno_emul/Cargo.toml
+++ b/ceno_emul/Cargo.toml
@@ -15,6 +15,7 @@ elf = "0.7"
 itertools.workspace = true
 num-derive.workspace = true
 num-traits.workspace = true
+rrs_lib = { package = "rrs-succinct", version = "0.1.0" }
 strum.workspace = true
 strum_macros.workspace = true
 tracing.workspace = true
diff --git a/ceno_emul/src/disassemble/mod.rs b/ceno_emul/src/disassemble/mod.rs
new file mode 100644
index 000000000..ca6161000
--- /dev/null
+++ b/ceno_emul/src/disassemble/mod.rs
@@ -0,0 +1,378 @@
+use crate::rv32im::{InsnKind, Instruction};
+use itertools::izip;
+use rrs_lib::{
+    InstructionProcessor,
+    instruction_formats::{BType, IType, ITypeCSR, ITypeShamt, JType, RType, SType, UType},
+    process_instruction,
+};
+
+/// A transpiler that converts the 32-bit encoded instructions into instructions.
+pub(crate) struct InstructionTranspiler {
+    pc: u32,
+    word: u32,
+}
+
+impl Instruction {
+    /// Create a new [`Instruction`] from an R-type instruction.
+    #[must_use]
+    pub const fn from_r_type(kind: InsnKind, dec_insn: &RType, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: dec_insn.rd,
+            rs1: dec_insn.rs1,
+            rs2: dec_insn.rs2,
+            imm: 0,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] from an I-type instruction.
+    #[must_use]
+    pub const fn from_i_type(kind: InsnKind, dec_insn: &IType, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: dec_insn.rd,
+            rs1: dec_insn.rs1,
+            imm: dec_insn.imm,
+            rs2: 0,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] from an I-type instruction with a shamt.
+    #[must_use]
+    pub const fn from_i_type_shamt(kind: InsnKind, dec_insn: &ITypeShamt, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: dec_insn.rd,
+            rs1: dec_insn.rs1,
+            imm: dec_insn.shamt as i32,
+            rs2: 0,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] from an S-type instruction.
+    #[must_use]
+    pub const fn from_s_type(kind: InsnKind, dec_insn: &SType, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: 0,
+            rs1: dec_insn.rs1,
+            rs2: dec_insn.rs2,
+            imm: dec_insn.imm,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] from a B-type instruction.
+    #[must_use]
+    pub const fn from_b_type(kind: InsnKind, dec_insn: &BType, raw: u32) -> Self {
+        Self {
+            kind,
+            rd: 0,
+            rs1: dec_insn.rs1,
+            rs2: dec_insn.rs2,
+            imm: dec_insn.imm,
+            raw,
+        }
+    }
+
+    /// Create a new [`Instruction`] that is not implemented.
+    #[must_use]
+    pub const fn unimp(raw: u32) -> Self {
+        Self {
+            kind: InsnKind::INVALID,
+            rd: 0,
+            rs1: 0,
+            rs2: 0,
+            imm: 0,
+            raw,
+        }
+    }
+}
+
+impl InstructionProcessor for InstructionTranspiler {
+    type InstructionResult = Instruction;
+
+    fn process_add(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::ADD, &dec_insn, self.word)
+    }
+
+    fn process_addi(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::ADDI, &dec_insn, self.word)
+    }
+
+    fn process_sub(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SUB, &dec_insn, self.word)
+    }
+
+    fn process_xor(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::XOR, &dec_insn, self.word)
+    }
+
+    fn process_xori(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::XORI, &dec_insn, self.word)
+    }
+
+    fn process_or(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::OR, &dec_insn, self.word)
+    }
+
+    fn process_ori(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::ORI, &dec_insn, self.word)
+    }
+
+    fn process_and(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::AND, &dec_insn, self.word)
+    }
+
+    fn process_andi(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::ANDI, &dec_insn, self.word)
+    }
+
+    fn process_sll(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SLL, &dec_insn, self.word)
+    }
+
+    fn process_slli(&mut self, dec_insn: ITypeShamt) -> Self::InstructionResult {
+        Instruction::from_i_type_shamt(InsnKind::SLLI, &dec_insn, self.word)
+    }
+
+    fn process_srl(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SRL, &dec_insn, self.word)
+    }
+
+    fn process_srli(&mut self, dec_insn: ITypeShamt) -> Self::InstructionResult {
+        Instruction::from_i_type_shamt(InsnKind::SRLI, &dec_insn, self.word)
+    }
+
+    fn process_sra(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SRA, &dec_insn, self.word)
+    }
+
+    fn process_srai(&mut self, dec_insn: ITypeShamt) -> Self::InstructionResult {
+        Instruction::from_i_type_shamt(InsnKind::SRAI, &dec_insn, self.word)
+    }
+
+    fn process_slt(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SLT, &dec_insn, self.word)
+    }
+
+    fn process_slti(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::SLTI, &dec_insn, self.word)
+    }
+
+    fn process_sltu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::SLTU, &dec_insn, self.word)
+    }
+
+    fn process_sltui(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::SLTIU, &dec_insn, self.word)
+    }
+
+    fn process_lb(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LB, &dec_insn, self.word)
+    }
+
+    fn process_lh(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LH, &dec_insn, self.word)
+    }
+
+    fn process_lw(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LW, &dec_insn, self.word)
+    }
+
+    fn process_lbu(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LBU, &dec_insn, self.word)
+    }
+
+    fn process_lhu(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction::from_i_type(InsnKind::LHU, &dec_insn, self.word)
+    }
+
+    fn process_sb(&mut self, dec_insn: SType) -> Self::InstructionResult {
+        Instruction::from_s_type(InsnKind::SB, &dec_insn, self.word)
+    }
+
+    fn process_sh(&mut self, dec_insn: SType) -> Self::InstructionResult {
+        Instruction::from_s_type(InsnKind::SH, &dec_insn, self.word)
+    }
+
+    fn process_sw(&mut self, dec_insn: SType) -> Self::InstructionResult {
+        Instruction::from_s_type(InsnKind::SW, &dec_insn, self.word)
+    }
+
+    fn process_beq(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BEQ, &dec_insn, self.word)
+    }
+
+    fn process_bne(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BNE, &dec_insn, self.word)
+    }
+
+    fn process_blt(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BLT, &dec_insn, self.word)
+    }
+
+    fn process_bge(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BGE, &dec_insn, self.word)
+    }
+
+    fn process_bltu(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BLTU, &dec_insn, self.word)
+    }
+
+    fn process_bgeu(&mut self, dec_insn: BType) -> Self::InstructionResult {
+        Instruction::from_b_type(InsnKind::BGEU, &dec_insn, self.word)
+    }
+
+    fn process_jal(&mut self, dec_insn: JType) -> Self::InstructionResult {
+        Instruction {
+            kind: InsnKind::JAL,
+            rd: dec_insn.rd,
+            rs1: 0,
+            rs2: 0,
+            imm: dec_insn.imm,
+            raw: self.word,
+        }
+    }
+
+    fn process_jalr(&mut self, dec_insn: IType) -> Self::InstructionResult {
+        Instruction {
+            kind: InsnKind::JALR,
+            rd: dec_insn.rd,
+            rs1: dec_insn.rs1,
+            rs2: 0,
+            imm: dec_insn.imm,
+            raw: self.word,
+        }
+    }
+
+    /// Convert LUI to ADDI.
+    fn process_lui(&mut self, dec_insn: UType) -> Self::InstructionResult {
+        // Verify assumption that the immediate is already shifted left by 12 bits.
+        assert_eq!(dec_insn.imm & 0xfff, 0);
+        Instruction {
+            kind: InsnKind::ADDI,
+            rd: dec_insn.rd,
+            rs1: 0,
+            rs2: 0,
+            imm: dec_insn.imm,
+            raw: self.word,
+        }
+    }
+
+    /// Convert AUIPC to ADDI.
+    fn process_auipc(&mut self, dec_insn: UType) -> Self::InstructionResult {
+        let pc = self.pc;
+        // Verify our assumption that the immediate is already shifted left by 12 bits.
+        assert_eq!(dec_insn.imm & 0xfff, 0);
+        Instruction {
+            kind: InsnKind::ADDI,
+            rd: dec_insn.rd,
+            rs1: 0,
+            rs2: 0,
+            imm: dec_insn.imm.wrapping_add(pc as i32),
+            raw: self.word,
+        }
+    }
+
+    fn process_ecall(&mut self) -> Self::InstructionResult {
+        Instruction {
+            kind: InsnKind::ECALL,
+            rd: 0,
+            rs1: 0,
+            rs2: 0,
+            imm: 0,
+            raw: self.word,
+        }
+    }
+
+    fn process_ebreak(&mut self) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_mul(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::MUL, &dec_insn, self.word)
+    }
+
+    fn process_mulh(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::MULH, &dec_insn, self.word)
+    }
+
+    fn process_mulhu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::MULHU, &dec_insn, self.word)
+    }
+
+    fn process_mulhsu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::MULHSU, &dec_insn, self.word)
+    }
+
+    fn process_div(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::DIV, &dec_insn, self.word)
+    }
+
+    fn process_divu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::DIVU, &dec_insn, self.word)
+    }
+
+    fn process_rem(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::REM, &dec_insn, self.word)
+    }
+
+    fn process_remu(&mut self, dec_insn: RType) -> Self::InstructionResult {
+        Instruction::from_r_type(InsnKind::REMU, &dec_insn, self.word)
+    }
+
+    fn process_csrrc(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrci(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrs(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrsi(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrw(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_csrrwi(&mut self, _: ITypeCSR) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_fence(&mut self, _: IType) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_mret(&mut self) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+
+    fn process_wfi(&mut self) -> Self::InstructionResult {
+        Instruction::unimp(self.word)
+    }
+}
+
+/// Transpile the [`Instruction`]s from the 32-bit encoded instructions.
+#[must_use]
+pub fn transpile(base: u32, instructions_u32: &[u32]) -> Vec<Instruction> {
+    izip!(enumerate(base, 4), instructions_u32)
+        .map(|(pc, &word)| {
+            process_instruction(&mut InstructionTranspiler { pc, word }, word)
+                .unwrap_or(Instruction::unimp(word))
+        })
+        .collect()
+}
+
+fn enumerate(start: u32, step: u32) -> impl Iterator<Item = u32> {
+    std::iter::successors(Some(start), move |&i| Some(i + step))
+}
diff --git a/ceno_emul/src/elf.rs b/ceno_emul/src/elf.rs
index 82697c619..ee59d3de3 100644
--- a/ceno_emul/src/elf.rs
+++ b/ceno_emul/src/elf.rs
@@ -18,7 +18,7 @@ extern crate alloc;
 
 use alloc::collections::BTreeMap;
 
-use crate::addr::WORD_SIZE;
+use crate::{addr::WORD_SIZE, disassemble::transpile, rv32im::Instruction};
 use anyhow::{Context, Result, anyhow, bail};
 use elf::{
     ElfBytes,
@@ -35,7 +35,7 @@ pub struct Program {
     /// This is the lowest address of the program's executable code
     pub base_address: u32,
     /// The instructions of the program
-    pub instructions: Vec<u32>,
+    pub instructions: Vec<Instruction>,
     /// The initial memory image
     pub image: BTreeMap<u32, u32>,
 }
@@ -45,7 +45,7 @@ impl Program {
     pub fn new(
         entry: u32,
         base_address: u32,
-        instructions: Vec<u32>,
+        instructions: Vec<Instruction>,
         image: BTreeMap<u32, u32>,
     ) -> Program {
         Self {
@@ -162,6 +162,8 @@ impl Program {
         assert!(entry >= base_address);
         assert!((entry - base_address) as usize <= instructions.len() * WORD_SIZE);
 
+        let instructions = transpile(base_address, &instructions);
+
         Ok(Program {
             entry,
             base_address,
diff --git a/ceno_emul/src/lib.rs b/ceno_emul/src/lib.rs
index c734b1794..1a855006e 100644
--- a/ceno_emul/src/lib.rs
+++ b/ceno_emul/src/lib.rs
@@ -12,10 +12,11 @@ mod vm_state;
 pub use vm_state::VMState;
 
 mod rv32im;
-pub use rv32im::{DecodedInstruction, EmuContext, InsnCategory, InsnCodes, InsnFormat, InsnKind};
+pub use rv32im::{
+    EmuContext, InsnCategory, InsnFormat, InsnKind, Instruction, encode_rv32, encode_rv32u,
+};
 
 mod elf;
 pub use elf::Program;
 
-mod rv32im_encode;
-pub use rv32im_encode::encode_rv32;
+pub mod disassemble;
diff --git a/ceno_emul/src/platform.rs b/ceno_emul/src/platform.rs
index c28bcda40..fec7e0181 100644
--- a/ceno_emul/src/platform.rs
+++ b/ceno_emul/src/platform.rs
@@ -73,10 +73,6 @@ impl Platform {
         self.is_ram(addr)
     }
 
-    pub fn can_execute(&self, addr: Addr) -> bool {
-        self.is_rom(addr)
-    }
-
     // Environment calls.
 
     /// Register containing the ecall function code. (x5, t0)
@@ -113,7 +109,6 @@ mod tests {
     #[test]
     fn test_no_overlap() {
         let p = CENO_PLATFORM;
-        assert!(p.can_execute(p.pc_base()));
         // ROM and RAM do not overlap.
         assert!(!p.is_rom(p.ram.start));
         assert!(!p.is_rom(p.ram.end - WORD_SIZE as Addr));
diff --git a/ceno_emul/src/rv32im.rs b/ceno_emul/src/rv32im.rs
index 52fac1508..e63fa761b 100644
--- a/ceno_emul/src/rv32im.rs
+++ b/ceno_emul/src/rv32im.rs
@@ -15,13 +15,39 @@
 // limitations under the License.
 
 use anyhow::{Result, anyhow};
-use itertools::enumerate;
 use num_derive::ToPrimitive;
-use std::sync::OnceLock;
 use strum_macros::{Display, EnumIter};
 
 use super::addr::{ByteAddr, RegIdx, WORD_SIZE, Word, WordAddr};
 
+/// Convenience function to create an `Instruction` with the given fields.
+///
+/// Pass 0 for unused fields.
+pub const fn encode_rv32(kind: InsnKind, rs1: u32, rs2: u32, rd: u32, imm: i32) -> Instruction {
+    Instruction {
+        kind,
+        rs1: rs1 as usize,
+        rs2: rs2 as usize,
+        rd: rd as usize,
+        imm,
+        raw: 0,
+    }
+}
+
+/// Convenience function to create an `Instruction` with the given fields.
+///
+/// Pass 0 for unused fields.
+pub const fn encode_rv32u(kind: InsnKind, rs1: u32, rs2: u32, rd: u32, imm: u32) -> Instruction {
+    Instruction {
+        kind,
+        rs1: rs1 as usize,
+        rs2: rs2 as usize,
+        rd: rd as usize,
+        imm: imm as i32,
+        raw: 0,
+    }
+}
+
 pub trait EmuContext {
     // Handle environment call
     fn ecall(&mut self) -> Result<bool>;
@@ -29,11 +55,8 @@ pub trait EmuContext {
     // Handle a trap
     fn trap(&self, cause: TrapCause) -> Result<bool>;
 
-    // Callback when instructions are decoded
-    fn on_insn_decoded(&mut self, _decoded: &DecodedInstruction) {}
-
     // Callback when instructions end normally
-    fn on_normal_end(&mut self, _decoded: &DecodedInstruction) {}
+    fn on_normal_end(&mut self, _decoded: &Instruction) {}
 
     // Get the program counter
     fn get_pc(&self) -> ByteAddr;
@@ -60,14 +83,7 @@ pub trait EmuContext {
     fn peek_memory(&self, addr: WordAddr) -> Word;
 
     /// Load from instruction cache
-    // TODO(Matthias): this should really return `Result<DecodedInstruction>`
-    // because the instruction cache should contain instructions, not just words.
-    fn fetch(&mut self, pc: WordAddr) -> Option<Word>;
-
-    // Check access for instruction load
-    fn check_insn_load(&self, _addr: ByteAddr) -> bool {
-        true
-    }
+    fn fetch(&mut self, pc: WordAddr) -> Option<Instruction>;
 
     // Check access for data load
     fn check_data_load(&self, _addr: ByteAddr) -> bool {
@@ -80,11 +96,6 @@ pub trait EmuContext {
     }
 }
 
-/// An implementation of the basic ISA (RV32IM), that is instruction decoding and functional units.
-pub struct Emulator {
-    table: &'static FastDecodeTable,
-}
-
 #[derive(Debug)]
 pub enum TrapCause {
     InstructionAddressMisaligned,
@@ -98,17 +109,18 @@ pub enum TrapCause {
     EcallError,
 }
 
-#[derive(Clone, Debug, Default)]
-pub struct DecodedInstruction {
-    insn: u32,
-    top_bit: u32,
-    // The bit fields of the instruction encoding, regardless of the instruction format.
-    func7: u32,
-    rs2: u32,
-    rs1: u32,
-    func3: u32,
-    rd: u32,
-    opcode: u32,
+#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, PartialOrd, Ord)]
+pub struct Instruction {
+    pub kind: InsnKind,
+    pub rs1: RegIdx,
+    pub rs2: RegIdx,
+    pub rd: RegIdx,
+    pub imm: i32,
+    /// `raw` is there only to produce better logging and error messages.
+    ///
+    /// Set to 0, if you are creating an instruction directly,
+    /// instead of decoding it from a raw 32-bit `Word`.
+    pub raw: Word,
 }
 
 #[derive(Clone, Copy, Debug)]
@@ -133,9 +145,12 @@ pub enum InsnFormat {
 }
 use InsnFormat::*;
 
-#[derive(Clone, Copy, Display, Debug, PartialEq, Eq, PartialOrd, Ord, EnumIter, ToPrimitive)]
+#[derive(
+    Clone, Copy, Display, Debug, PartialEq, Eq, PartialOrd, Ord, EnumIter, ToPrimitive, Default,
+)]
 #[allow(clippy::upper_case_acronyms)]
 pub enum InsnKind {
+    #[default]
     INVALID,
     ADD,
     SUB,
@@ -164,8 +179,6 @@ pub enum InsnKind {
     BGEU,
     JAL,
     JALR,
-    LUI,
-    AUIPC,
     MUL,
     MULH,
     MULHSU,
@@ -182,611 +195,331 @@ pub enum InsnKind {
     SB,
     SH,
     SW,
-    /// ECALL and EBREAK etc.
-    EANY,
+    ECALL,
 }
 use InsnKind::*;
 
-impl InsnKind {
-    pub const fn codes(self) -> InsnCodes {
-        RV32IM_ISA[self as usize]
+impl From<InsnKind> for InsnCategory {
+    fn from(kind: InsnKind) -> Self {
+        match kind {
+            INVALID => Invalid,
+            ADD | SUB | XOR | OR | AND | SLL | SRL | SRA | SLT | SLTU | MUL | MULH | MULHSU
+            | MULHU | DIV | DIVU | REM | REMU => Compute,
+            ADDI | XORI | ORI | ANDI | SLLI | SRLI | SRAI | SLTI | SLTIU => Compute,
+            BEQ | BNE | BLT | BGE | BLTU | BGEU => Branch,
+            JAL | JALR => Compute,
+            LB | LH | LW | LBU | LHU => Load,
+            SB | SH | SW => Store,
+            ECALL => System,
+        }
     }
 }
 
-#[derive(Clone, Copy, Debug)]
-pub struct InsnCodes {
-    pub format: InsnFormat,
-    pub kind: InsnKind,
-    pub category: InsnCategory,
-    pub(crate) opcode: u32,
-    pub(crate) func3: u32,
-    pub(crate) func7: u32,
-}
-
-impl DecodedInstruction {
-    /// A virtual register which absorbs the writes to x0.
-    pub const RD_NULL: u32 = 32;
-
-    pub fn new(insn: u32) -> Self {
-        Self {
-            insn,
-            top_bit: (insn & 0x80000000) >> 31,
-            func7: (insn & 0xfe000000) >> 25,
-            rs2: (insn & 0x01f00000) >> 20,
-            rs1: (insn & 0x000f8000) >> 15,
-            func3: (insn & 0x00007000) >> 12,
-            rd: (insn & 0x00000f80) >> 7,
-            opcode: insn & 0x0000007f,
+// For encoding, which is useful for tests.
+impl From<InsnKind> for InsnFormat {
+    fn from(kind: InsnKind) -> Self {
+        match kind {
+            ADD | SUB | XOR | OR | AND | SLL | SRL | SRA | SLT | SLTU | MUL | MULH | MULHSU
+            | MULHU | DIV | DIVU | REM | REMU => R,
+            ADDI | XORI | ORI | ANDI | SLLI | SRLI | SRAI | SLTI | SLTIU => I,
+            BEQ | BNE | BLT | BGE | BLTU | BGEU => B,
+            JAL => J,
+            JALR => I,
+            LB | LH | LW | LBU | LHU => I,
+            SB | SH | SW => S,
+            ECALL => I,
+            INVALID => I,
         }
     }
+}
 
-    pub fn encoded(&self) -> u32 {
-        self.insn
-    }
-
-    pub fn opcode(&self) -> u32 {
-        self.opcode
-    }
-
-    /// The internal register destination. It is either the regular rd, or an internal RD_NULL if
-    /// the instruction does not write to a register or writes to x0.
+impl Instruction {
+    pub const RD_NULL: u32 = 32;
     pub fn rd_internal(&self) -> u32 {
-        match self.codes().format {
-            R | I | U | J if self.rd != 0 => self.rd,
+        match InsnFormat::from(self.kind) {
+            R | I | U | J if self.rd != 0 => self.rd as u32,
             _ => Self::RD_NULL,
         }
     }
-
-    /// Get the rs1 field, regardless of the instruction format.
-    pub fn rs1(&self) -> u32 {
-        self.rs1
-    }
-
     /// Get the register source 1, or zero if the instruction does not use rs1.
     pub fn rs1_or_zero(&self) -> u32 {
-        match self.codes().format {
-            R | I | S | B => self.rs1,
+        match InsnFormat::from(self.kind) {
+            R | I | S | B => self.rs1 as u32,
             _ => 0,
         }
     }
-
-    /// Get the rs2 field, regardless of the instruction format.
-    pub fn rs2(&self) -> u32 {
-        self.rs2
-    }
-
     /// Get the register source 2, or zero if the instruction does not use rs2.
     pub fn rs2_or_zero(&self) -> u32 {
-        match self.codes().format {
-            R | S | B => self.rs2,
+        match InsnFormat::from(self.kind) {
+            R | S | B => self.rs2 as u32,
             _ => 0,
         }
     }
-
-    pub fn immediate(&self) -> u32 {
-        match self.codes().format {
-            R => 0,
-            I => self.imm_i(),
-            S => self.imm_s(),
-            B => self.imm_b(),
-            U => self.imm_u(),
-            J => self.imm_j(),
-        }
-    }
-
-    pub fn codes(&self) -> InsnCodes {
-        FastDecodeTable::get().lookup(self)
-    }
-
-    fn imm_b(&self) -> u32 {
-        (self.top_bit * 0xfffff000)
-            | ((self.rd & 1) << 11)
-            | ((self.func7 & 0x3f) << 5)
-            | (self.rd & 0x1e)
-    }
-
-    fn imm_i(&self) -> u32 {
-        (self.top_bit * 0xffff_f000) | (self.func7 << 5) | self.rs2
-    }
-
-    fn imm_s(&self) -> u32 {
-        (self.top_bit * 0xfffff000) | (self.func7 << 5) | self.rd
-    }
-
-    fn imm_j(&self) -> u32 {
-        (self.top_bit * 0xfff00000)
-            | (self.rs1 << 15)
-            | (self.func3 << 12)
-            | ((self.rs2 & 1) << 11)
-            | ((self.func7 & 0x3f) << 5)
-            | (self.rs2 & 0x1e)
-    }
-
-    fn imm_u(&self) -> u32 {
-        self.insn & 0xfffff000
-    }
 }
 
-const fn insn(
-    format: InsnFormat,
-    kind: InsnKind,
-    category: InsnCategory,
-    opcode: u32,
-    func3: i32,
-    func7: i32,
-) -> InsnCodes {
-    InsnCodes {
-        format,
-        kind,
-        category,
-        opcode,
-        func3: func3 as u32,
-        func7: func7 as u32,
-    }
-}
-
-type InstructionTable = [InsnCodes; 47];
-type FastInstructionTable = [u8; 1 << 10];
-
-const RV32IM_ISA: InstructionTable = [
-    insn(R, INVALID, Invalid, 0x00, 0x0, 0x00),
-    insn(R, ADD, Compute, 0x33, 0x0, 0x00),
-    insn(R, SUB, Compute, 0x33, 0x0, 0x20),
-    insn(R, XOR, Compute, 0x33, 0x4, 0x00),
-    insn(R, OR, Compute, 0x33, 0x6, 0x00),
-    insn(R, AND, Compute, 0x33, 0x7, 0x00),
-    insn(R, SLL, Compute, 0x33, 0x1, 0x00),
-    insn(R, SRL, Compute, 0x33, 0x5, 0x00),
-    insn(R, SRA, Compute, 0x33, 0x5, 0x20),
-    insn(R, SLT, Compute, 0x33, 0x2, 0x00),
-    insn(R, SLTU, Compute, 0x33, 0x3, 0x00),
-    insn(I, ADDI, Compute, 0x13, 0x0, -1),
-    insn(I, XORI, Compute, 0x13, 0x4, -1),
-    insn(I, ORI, Compute, 0x13, 0x6, -1),
-    insn(I, ANDI, Compute, 0x13, 0x7, -1),
-    insn(I, SLLI, Compute, 0x13, 0x1, 0x00),
-    insn(I, SRLI, Compute, 0x13, 0x5, 0x00),
-    insn(I, SRAI, Compute, 0x13, 0x5, 0x20),
-    insn(I, SLTI, Compute, 0x13, 0x2, -1),
-    insn(I, SLTIU, Compute, 0x13, 0x3, -1),
-    insn(B, BEQ, Branch, 0x63, 0x0, -1),
-    insn(B, BNE, Branch, 0x63, 0x1, -1),
-    insn(B, BLT, Branch, 0x63, 0x4, -1),
-    insn(B, BGE, Branch, 0x63, 0x5, -1),
-    insn(B, BLTU, Branch, 0x63, 0x6, -1),
-    insn(B, BGEU, Branch, 0x63, 0x7, -1),
-    insn(J, JAL, Compute, 0x6f, -1, -1),
-    insn(I, JALR, Compute, 0x67, 0x0, -1),
-    insn(U, LUI, Compute, 0x37, -1, -1),
-    insn(U, AUIPC, Compute, 0x17, -1, -1),
-    insn(R, MUL, Compute, 0x33, 0x0, 0x01),
-    insn(R, MULH, Compute, 0x33, 0x1, 0x01),
-    insn(R, MULHSU, Compute, 0x33, 0x2, 0x01),
-    insn(R, MULHU, Compute, 0x33, 0x3, 0x01),
-    insn(R, DIV, Compute, 0x33, 0x4, 0x01),
-    insn(R, DIVU, Compute, 0x33, 0x5, 0x01),
-    insn(R, REM, Compute, 0x33, 0x6, 0x01),
-    insn(R, REMU, Compute, 0x33, 0x7, 0x01),
-    insn(I, LB, Load, 0x03, 0x0, -1),
-    insn(I, LH, Load, 0x03, 0x1, -1),
-    insn(I, LW, Load, 0x03, 0x2, -1),
-    insn(I, LBU, Load, 0x03, 0x4, -1),
-    insn(I, LHU, Load, 0x03, 0x5, -1),
-    insn(S, SB, Store, 0x23, 0x0, -1),
-    insn(S, SH, Store, 0x23, 0x1, -1),
-    insn(S, SW, Store, 0x23, 0x2, -1),
-    insn(I, EANY, System, 0x73, 0x0, 0x00),
-];
-
-#[cfg(test)]
-#[test]
-fn test_isa_table() {
-    use strum::IntoEnumIterator;
-    for kind in InsnKind::iter() {
-        assert_eq!(kind.codes().kind, kind);
-    }
-}
-
-// RISC-V instruction are determined by 3 parts:
-// - Opcode: 7 bits
-// - Func3: 3 bits
-// - Func7: 7 bits
-// In many cases, func7 and/or func3 is ignored.  A standard trick is to decode
-// via a table, but a 17 bit lookup table destroys L1 cache.  Luckily for us,
-// in practice the low 2 bits of opcode are always 11, so we can drop them, and
-// also func7 is always either 0, 1, 0x20 or don't care, so we can reduce func7
-// to 2 bits, which gets us to 10 bits, which is only 1k.
-struct FastDecodeTable {
-    table: FastInstructionTable,
-}
-
-impl FastDecodeTable {
-    fn new() -> Self {
-        let mut table: FastInstructionTable = [0; 1 << 10];
-        for (isa_idx, insn) in enumerate(&RV32IM_ISA) {
-            Self::add_insn(&mut table, insn, isa_idx);
-        }
-        Self { table }
-    }
-
-    fn get() -> &'static Self {
-        FAST_DECODE_TABLE.get_or_init(Self::new)
-    }
-
-    // Map to 10 bit format
-    fn map10(opcode: u32, func3: u32, func7: u32) -> usize {
-        let op_high = opcode >> 2;
-        // Map 0 -> 0, 1 -> 1, 0x20 -> 2, everything else to 3
-        let func72bits = if func7 <= 1 {
-            func7
-        } else if func7 == 0x20 {
-            2
-        } else {
-            3
-        };
-        ((op_high << 5) | (func72bits << 3) | func3) as usize
-    }
-
-    fn add_insn(table: &mut FastInstructionTable, insn: &InsnCodes, isa_idx: usize) {
-        let op_high = insn.opcode >> 2;
-        if (insn.func3 as i32) < 0 {
-            for f3 in 0..8 {
-                for f7b in 0..4 {
-                    let idx = (op_high << 5) | (f7b << 3) | f3;
-                    table[idx as usize] = isa_idx as u8;
-                }
-            }
-        } else if (insn.func7 as i32) < 0 {
-            for f7b in 0..4 {
-                let idx = (op_high << 5) | (f7b << 3) | insn.func3;
-                table[idx as usize] = isa_idx as u8;
-            }
-        } else {
-            table[Self::map10(insn.opcode, insn.func3, insn.func7)] = isa_idx as u8;
-        }
-    }
-
-    fn lookup(&self, decoded: &DecodedInstruction) -> InsnCodes {
-        let isa_idx = self.table[Self::map10(decoded.opcode, decoded.func3, decoded.func7)];
-        RV32IM_ISA[isa_idx as usize]
-    }
+pub fn step<C: EmuContext>(ctx: &mut C) -> Result<()> {
+    let pc = ctx.get_pc();
+
+    let Some(insn) = ctx.fetch(pc.waddr()) else {
+        ctx.trap(TrapCause::InstructionAccessFault)?;
+        return Err(anyhow!(
+            "Fatal: could not fetch instruction at pc={pc:?}, ELF does not have instructions there."
+        ));
+    };
+
+    tracing::trace!("pc: {:x}, kind: {:?}", pc.0, insn.kind);
+
+    if match InsnCategory::from(insn.kind) {
+        InsnCategory::Compute => step_compute(ctx, insn.kind, &insn)?,
+        InsnCategory::Branch => step_branch(ctx, insn.kind, &insn)?,
+        InsnCategory::Load => step_load(ctx, insn.kind, &insn)?,
+        InsnCategory::Store => step_store(ctx, insn.kind, &insn)?,
+        InsnCategory::System => step_system(ctx, insn.kind, &insn)?,
+        InsnCategory::Invalid => ctx.trap(TrapCause::IllegalInstruction(insn.raw))?,
+    } {
+        ctx.on_normal_end(&insn);
+    };
+
+    Ok(())
 }
 
-static FAST_DECODE_TABLE: OnceLock<FastDecodeTable> = OnceLock::new();
-
-impl Emulator {
-    pub fn new() -> Self {
-        Self {
-            table: FastDecodeTable::get(),
+fn step_compute<M: EmuContext>(ctx: &mut M, kind: InsnKind, insn: &Instruction) -> Result<bool> {
+    use super::InsnKind::*;
+
+    let pc = ctx.get_pc();
+    let mut new_pc = pc + WORD_SIZE;
+    let imm_i = insn.imm as u32;
+    let out = match kind {
+        // Instructions that do not read rs1 nor rs2.
+        JAL => {
+            new_pc = pc.wrapping_add(insn.imm as u32);
+            (pc + WORD_SIZE).0
         }
-    }
-
-    pub fn step<C: EmuContext>(&self, ctx: &mut C) -> Result<()> {
-        let pc = ctx.get_pc();
-
-        // TODO(Matthias): `check_insn_load` should be unnecessary: we can statically
-        // check in `fn new_from_elf` that the program only has instructions where
-        // our platform accepts them.
-        if !ctx.check_insn_load(pc) {
-            ctx.trap(TrapCause::InstructionAccessFault)?;
-            return Err(anyhow!("Fatal: could not fetch instruction at pc={:?}", pc));
-        }
-        let Some(word) = ctx.fetch(pc.waddr()) else {
-            ctx.trap(TrapCause::InstructionAccessFault)?;
-            return Err(anyhow!("Fatal: could not fetch instruction at pc={:?}", pc));
-        };
-
-        // TODO(Matthias): our `Program` that we are fetching from should really store
-        // already decoded instructions, instead of doing this weird, partial checking
-        // for `0x03` here.
-        //
-        // Note how we can fail here with an IllegalInstruction, and again further down
-        // when we match against the decoded instruction. We should centralise that. And
-        // our `step` function here shouldn't need to know anything about how instruction
-        // are encoded as numbers.
-        //
-        // One way to centralise is to do the check once when loading the program from the
-        // ELF.
-        if word & 0x03 != 0x03 {
-            // Opcode must end in 0b11 in RV32IM.
-            ctx.trap(TrapCause::IllegalInstruction(word))?;
-            return Err(anyhow!(
-                "Fatal: illegal instruction at pc={:?}: 0x{:08x}",
-                pc,
-                word
-            ));
-        }
-
-        let decoded = DecodedInstruction::new(word);
-        let insn = self.table.lookup(&decoded);
-        ctx.on_insn_decoded(&decoded);
-        tracing::trace!("pc: {:x}, kind: {:?}", pc.0, insn.kind);
-
-        if match insn.category {
-            InsnCategory::Compute => self.step_compute(ctx, insn.kind, &decoded)?,
-            InsnCategory::Branch => self.step_branch(ctx, insn.kind, &decoded)?,
-            InsnCategory::Load => self.step_load(ctx, insn.kind, &decoded)?,
-            InsnCategory::Store => self.step_store(ctx, insn.kind, &decoded)?,
-            InsnCategory::System => self.step_system(ctx, insn.kind, &decoded)?,
-            InsnCategory::Invalid => ctx.trap(TrapCause::IllegalInstruction(word))?,
-        } {
-            ctx.on_normal_end(&decoded);
-        };
-
-        Ok(())
-    }
-
-    fn step_compute<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        use InsnKind::*;
-
-        let pc = ctx.get_pc();
-        let mut new_pc = pc + WORD_SIZE;
-        let imm_i = decoded.imm_i();
-        let out = match kind {
-            // Instructions that do not read rs1 nor rs2.
-            JAL => {
-                new_pc = pc.wrapping_add(decoded.imm_j());
-                (pc + WORD_SIZE).0
-            }
-            LUI => decoded.imm_u(),
-            AUIPC => (pc.wrapping_add(decoded.imm_u())).0,
-
-            _ => {
-                // Instructions that read rs1 but not rs2.
-                let rs1 = ctx.load_register(decoded.rs1 as usize)?;
-
-                match kind {
-                    ADDI => rs1.wrapping_add(imm_i),
-                    XORI => rs1 ^ imm_i,
-                    ORI => rs1 | imm_i,
-                    ANDI => rs1 & imm_i,
-                    SLLI => rs1 << (imm_i & 0x1f),
-                    SRLI => rs1 >> (imm_i & 0x1f),
-                    SRAI => ((rs1 as i32) >> (imm_i & 0x1f)) as u32,
-                    SLTI => {
-                        if (rs1 as i32) < (imm_i as i32) {
-                            1
-                        } else {
-                            0
-                        }
+        _ => {
+            // Instructions that read rs1 but not rs2.
+            let rs1 = ctx.load_register(insn.rs1)?;
+
+            match kind {
+                ADDI => rs1.wrapping_add(imm_i),
+                XORI => rs1 ^ imm_i,
+                ORI => rs1 | imm_i,
+                ANDI => rs1 & imm_i,
+                SLLI => rs1 << (imm_i & 0x1f),
+                SRLI => rs1 >> (imm_i & 0x1f),
+                SRAI => ((rs1 as i32) >> (imm_i & 0x1f)) as u32,
+                SLTI => {
+                    if (rs1 as i32) < (imm_i as i32) {
+                        1
+                    } else {
+                        0
                     }
-                    SLTIU => {
-                        if rs1 < imm_i {
-                            1
-                        } else {
-                            0
-                        }
-                    }
-                    JALR => {
-                        new_pc = ByteAddr(rs1.wrapping_add(imm_i) & 0xfffffffe);
-                        (pc + WORD_SIZE).0
+                }
+                SLTIU => {
+                    if rs1 < imm_i {
+                        1
+                    } else {
+                        0
                     }
+                }
+                JALR => {
+                    new_pc = ByteAddr(rs1.wrapping_add(imm_i) & !1);
+                    (pc + WORD_SIZE).0
+                }
 
-                    _ => {
-                        // Instructions that use rs1 and rs2.
-                        let rs2 = ctx.load_register(decoded.rs2 as usize)?;
-
-                        match kind {
-                            ADD => rs1.wrapping_add(rs2),
-                            SUB => rs1.wrapping_sub(rs2),
-                            XOR => rs1 ^ rs2,
-                            OR => rs1 | rs2,
-                            AND => rs1 & rs2,
-                            SLL => rs1 << (rs2 & 0x1f),
-                            SRL => rs1 >> (rs2 & 0x1f),
-                            SRA => ((rs1 as i32) >> (rs2 & 0x1f)) as u32,
-                            SLT => {
-                                if (rs1 as i32) < (rs2 as i32) {
-                                    1
-                                } else {
-                                    0
-                                }
-                            }
-                            SLTU => {
-                                if rs1 < rs2 {
-                                    1
-                                } else {
-                                    0
-                                }
+                _ => {
+                    // Instructions that use rs1 and rs2.
+                    let rs2 = ctx.load_register(insn.rs2)?;
+
+                    match kind {
+                        ADD => rs1.wrapping_add(rs2),
+                        SUB => rs1.wrapping_sub(rs2),
+                        XOR => rs1 ^ rs2,
+                        OR => rs1 | rs2,
+                        AND => rs1 & rs2,
+                        SLL => rs1 << (rs2 & 0x1f),
+                        SRL => rs1 >> (rs2 & 0x1f),
+                        SRA => ((rs1 as i32) >> (rs2 & 0x1f)) as u32,
+                        SLT => {
+                            if (rs1 as i32) < (rs2 as i32) {
+                                1
+                            } else {
+                                0
                             }
-                            MUL => rs1.wrapping_mul(rs2),
-                            MULH => {
-                                (sign_extend_u32(rs1).wrapping_mul(sign_extend_u32(rs2)) >> 32)
-                                    as u32
+                        }
+                        SLTU => {
+                            if rs1 < rs2 {
+                                1
+                            } else {
+                                0
                             }
-                            MULHSU => (sign_extend_u32(rs1).wrapping_mul(rs2 as i64) >> 32) as u32,
-                            MULHU => (((rs1 as u64).wrapping_mul(rs2 as u64)) >> 32) as u32,
-                            DIV => {
-                                if rs2 == 0 {
-                                    u32::MAX
-                                } else {
-                                    ((rs1 as i32).wrapping_div(rs2 as i32)) as u32
-                                }
+                        }
+                        MUL => rs1.wrapping_mul(rs2),
+                        MULH => {
+                            (sign_extend_u32(rs1).wrapping_mul(sign_extend_u32(rs2)) >> 32) as u32
+                        }
+                        MULHSU => (sign_extend_u32(rs1).wrapping_mul(rs2 as i64) >> 32) as u32,
+                        MULHU => (((rs1 as u64).wrapping_mul(rs2 as u64)) >> 32) as u32,
+                        DIV => {
+                            if rs2 == 0 {
+                                u32::MAX
+                            } else {
+                                ((rs1 as i32).wrapping_div(rs2 as i32)) as u32
                             }
-                            DIVU => {
-                                if rs2 == 0 {
-                                    u32::MAX
-                                } else {
-                                    rs1 / rs2
-                                }
+                        }
+                        DIVU => {
+                            if rs2 == 0 {
+                                u32::MAX
+                            } else {
+                                rs1 / rs2
                             }
-                            REM => {
-                                if rs2 == 0 {
-                                    rs1
-                                } else {
-                                    ((rs1 as i32).wrapping_rem(rs2 as i32)) as u32
-                                }
+                        }
+                        REM => {
+                            if rs2 == 0 {
+                                rs1
+                            } else {
+                                ((rs1 as i32).wrapping_rem(rs2 as i32)) as u32
                             }
-                            REMU => {
-                                if rs2 == 0 {
-                                    rs1
-                                } else {
-                                    rs1 % rs2
-                                }
+                        }
+                        REMU => {
+                            if rs2 == 0 {
+                                rs1
+                            } else {
+                                rs1 % rs2
                             }
-
-                            _ => unreachable!("Illegal compute instruction: {:?}", kind),
                         }
+
+                        _ => unreachable!("Illegal compute instruction: {:?}", kind),
                     }
                 }
             }
-        };
-        if !new_pc.is_aligned() {
-            return ctx.trap(TrapCause::InstructionAddressMisaligned);
         }
-        ctx.store_register(decoded.rd_internal() as usize, out)?;
-        ctx.set_pc(new_pc);
-        Ok(true)
+    };
+    if !new_pc.is_aligned() {
+        return ctx.trap(TrapCause::InstructionAddressMisaligned);
     }
+    ctx.store_register(insn.rd_internal() as usize, out)?;
+    ctx.set_pc(new_pc);
+    Ok(true)
+}
 
-    fn step_branch<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        use InsnKind::*;
-
-        let pc = ctx.get_pc();
-        let rs1 = ctx.load_register(decoded.rs1 as RegIdx)?;
-        let rs2 = ctx.load_register(decoded.rs2 as RegIdx)?;
-
-        let taken = match kind {
-            BEQ => rs1 == rs2,
-            BNE => rs1 != rs2,
-            BLT => (rs1 as i32) < (rs2 as i32),
-            BGE => (rs1 as i32) >= (rs2 as i32),
-            BLTU => rs1 < rs2,
-            BGEU => rs1 >= rs2,
-            _ => unreachable!("Illegal branch instruction: {:?}", kind),
-        };
-
-        let new_pc = if taken {
-            pc.wrapping_add(decoded.imm_b())
-        } else {
-            pc + WORD_SIZE
-        };
-
-        if !new_pc.is_aligned() {
-            return ctx.trap(TrapCause::InstructionAddressMisaligned);
-        }
-        ctx.set_pc(new_pc);
-        Ok(true)
-    }
+fn step_branch<M: EmuContext>(ctx: &mut M, kind: InsnKind, decoded: &Instruction) -> Result<bool> {
+    use super::InsnKind::*;
+
+    let pc = ctx.get_pc();
+    let rs1 = ctx.load_register(decoded.rs1 as RegIdx)?;
+    let rs2 = ctx.load_register(decoded.rs2 as RegIdx)?;
+
+    let taken = match kind {
+        BEQ => rs1 == rs2,
+        BNE => rs1 != rs2,
+        BLT => (rs1 as i32) < (rs2 as i32),
+        BGE => (rs1 as i32) >= (rs2 as i32),
+        BLTU => rs1 < rs2,
+        BGEU => rs1 >= rs2,
+        _ => unreachable!("Illegal branch instruction: {:?}", kind),
+    };
+
+    let new_pc = if taken {
+        pc.wrapping_add(decoded.imm as u32)
+    } else {
+        pc + WORD_SIZE
+    };
+
+    if !new_pc.is_aligned() {
+        return ctx.trap(TrapCause::InstructionAddressMisaligned);
+    }
+    ctx.set_pc(new_pc);
+    Ok(true)
+}
 
-    fn step_load<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        let rs1 = ctx.load_register(decoded.rs1 as usize)?;
-        // LOAD instructions do not read rs2.
-        let addr = ByteAddr(rs1.wrapping_add(decoded.imm_i()));
-        if !ctx.check_data_load(addr) {
-            return ctx.trap(TrapCause::LoadAccessFault(addr));
+fn step_load<M: EmuContext>(ctx: &mut M, kind: InsnKind, decoded: &Instruction) -> Result<bool> {
+    let rs1 = ctx.load_register(decoded.rs1)?;
+    // LOAD instructions do not read rs2.
+    let addr = ByteAddr(rs1.wrapping_add_signed(decoded.imm));
+    if !ctx.check_data_load(addr) {
+        return ctx.trap(TrapCause::LoadAccessFault(addr));
+    }
+    let data = ctx.load_memory(addr.waddr())?;
+    let shift = 8 * (addr.0 & 3);
+    let out = match kind {
+        InsnKind::LB => {
+            let mut out = (data >> shift) & 0xff;
+            if out & 0x80 != 0 {
+                out |= 0xffffff00;
+            }
+            out
         }
-        let data = ctx.load_memory(addr.waddr())?;
-        let shift = 8 * (addr.0 & 3);
-        let out = match kind {
-            InsnKind::LB => {
-                let mut out = (data >> shift) & 0xff;
-                if out & 0x80 != 0 {
-                    out |= 0xffffff00;
-                }
-                out
+        InsnKind::LH => {
+            if addr.0 & 0x01 != 0 {
+                return ctx.trap(TrapCause::LoadAddressMisaligned);
             }
-            InsnKind::LH => {
-                if addr.0 & 0x01 != 0 {
-                    return ctx.trap(TrapCause::LoadAddressMisaligned);
-                }
-                let mut out = (data >> shift) & 0xffff;
-                if out & 0x8000 != 0 {
-                    out |= 0xffff0000;
-                }
-                out
+            let mut out = (data >> shift) & 0xffff;
+            if out & 0x8000 != 0 {
+                out |= 0xffff0000;
             }
-            InsnKind::LW => {
-                if addr.0 & 0x03 != 0 {
-                    return ctx.trap(TrapCause::LoadAddressMisaligned);
-                }
-                data
+            out
+        }
+        InsnKind::LW => {
+            if addr.0 & 0x03 != 0 {
+                return ctx.trap(TrapCause::LoadAddressMisaligned);
             }
-            InsnKind::LBU => (data >> shift) & 0xff,
-            InsnKind::LHU => {
-                if addr.0 & 0x01 != 0 {
-                    return ctx.trap(TrapCause::LoadAddressMisaligned);
-                }
-                (data >> shift) & 0xffff
+            data
+        }
+        InsnKind::LBU => (data >> shift) & 0xff,
+        InsnKind::LHU => {
+            if addr.0 & 0x01 != 0 {
+                return ctx.trap(TrapCause::LoadAddressMisaligned);
             }
-            _ => unreachable!(),
-        };
-        ctx.store_register(decoded.rd_internal() as usize, out)?;
-        ctx.set_pc(ctx.get_pc() + WORD_SIZE);
-        Ok(true)
-    }
+            (data >> shift) & 0xffff
+        }
+        _ => unreachable!(),
+    };
+    ctx.store_register(decoded.rd_internal() as usize, out)?;
+    ctx.set_pc(ctx.get_pc() + WORD_SIZE);
+    Ok(true)
+}
 
-    fn step_store<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        let rs1 = ctx.load_register(decoded.rs1 as usize)?;
-        let rs2 = ctx.load_register(decoded.rs2 as usize)?;
-        let addr = ByteAddr(rs1.wrapping_add(decoded.imm_s()));
-        let shift = 8 * (addr.0 & 3);
-        if !ctx.check_data_store(addr) {
-            tracing::error!("mstore: addr={:x?},rs1={:x}", addr, rs1);
-            return ctx.trap(TrapCause::StoreAccessFault);
+fn step_store<M: EmuContext>(ctx: &mut M, kind: InsnKind, decoded: &Instruction) -> Result<bool> {
+    let rs1 = ctx.load_register(decoded.rs1)?;
+    let rs2 = ctx.load_register(decoded.rs2)?;
+    let addr = ByteAddr(rs1.wrapping_add(decoded.imm as u32));
+    let shift = 8 * (addr.0 & 3);
+    if !ctx.check_data_store(addr) {
+        tracing::error!("mstore: addr={:x?},rs1={:x}", addr, rs1);
+        return ctx.trap(TrapCause::StoreAccessFault);
+    }
+    let mut data = ctx.peek_memory(addr.waddr());
+    match kind {
+        InsnKind::SB => {
+            data ^= data & (0xff << shift);
+            data |= (rs2 & 0xff) << shift;
         }
-        let mut data = ctx.peek_memory(addr.waddr());
-        match kind {
-            InsnKind::SB => {
-                data ^= data & (0xff << shift);
-                data |= (rs2 & 0xff) << shift;
+        InsnKind::SH => {
+            if addr.0 & 0x01 != 0 {
+                tracing::debug!("Misaligned SH");
+                return ctx.trap(TrapCause::StoreAddressMisaligned(addr));
             }
-            InsnKind::SH => {
-                if addr.0 & 0x01 != 0 {
-                    tracing::debug!("Misaligned SH");
-                    return ctx.trap(TrapCause::StoreAddressMisaligned(addr));
-                }
-                data ^= data & (0xffff << shift);
-                data |= (rs2 & 0xffff) << shift;
-            }
-            InsnKind::SW => {
-                if addr.0 & 0x03 != 0 {
-                    tracing::debug!("Misaligned SW");
-                    return ctx.trap(TrapCause::StoreAddressMisaligned(addr));
-                }
-                data = rs2;
+            data ^= data & (0xffff << shift);
+            data |= (rs2 & 0xffff) << shift;
+        }
+        InsnKind::SW => {
+            if addr.0 & 0x03 != 0 {
+                tracing::debug!("Misaligned SW");
+                return ctx.trap(TrapCause::StoreAddressMisaligned(addr));
             }
-            _ => unreachable!(),
+            data = rs2;
         }
-        ctx.store_memory(addr.waddr(), data)?;
-        ctx.set_pc(ctx.get_pc() + WORD_SIZE);
-        Ok(true)
+        _ => unreachable!(),
     }
+    ctx.store_memory(addr.waddr(), data)?;
+    ctx.set_pc(ctx.get_pc() + WORD_SIZE);
+    Ok(true)
+}
 
-    fn step_system<M: EmuContext>(
-        &self,
-        ctx: &mut M,
-        kind: InsnKind,
-        decoded: &DecodedInstruction,
-    ) -> Result<bool> {
-        match kind {
-            InsnKind::EANY => match decoded.rs2 {
-                0 => ctx.ecall(),
-                1 => ctx.trap(TrapCause::Breakpoint),
-                _ => ctx.trap(TrapCause::IllegalInstruction(decoded.insn)),
-            },
-            _ => unreachable!(),
-        }
+fn step_system<M: EmuContext>(ctx: &mut M, kind: InsnKind, decoded: &Instruction) -> Result<bool> {
+    match kind {
+        InsnKind::ECALL => ctx.ecall(),
+        _ => ctx.trap(TrapCause::IllegalInstruction(decoded.raw)),
     }
 }
 
diff --git a/ceno_emul/src/rv32im_encode.rs b/ceno_emul/src/rv32im_encode.rs
deleted file mode 100644
index b6a80f32b..000000000
--- a/ceno_emul/src/rv32im_encode.rs
+++ /dev/null
@@ -1,116 +0,0 @@
-use crate::{InsnKind, rv32im::InsnFormat};
-
-const MASK_4_BITS: u32 = 0xF;
-const MASK_5_BITS: u32 = 0x1F;
-const MASK_6_BITS: u32 = 0x3F;
-const MASK_7_BITS: u32 = 0x7F;
-const MASK_8_BITS: u32 = 0xFF;
-const MASK_10_BITS: u32 = 0x3FF;
-const MASK_12_BITS: u32 = 0xFFF;
-
-/// Generate bit encoding of a RISC-V instruction.
-///
-/// Values `rs1`, `rs2` and `rd1` are 5-bit register indices, and `imm` is of
-/// bit length depending on the requirements of the instruction format type.
-///
-/// Fields not required by the instruction's format type are ignored, so one can
-/// safely pass an arbitrary value for these, say 0.
-pub const fn encode_rv32(kind: InsnKind, rs1: u32, rs2: u32, rd: u32, imm: u32) -> u32 {
-    match kind.codes().format {
-        InsnFormat::R => encode_r(kind, rs1, rs2, rd),
-        InsnFormat::I => encode_i(kind, rs1, rd, imm),
-        InsnFormat::S => encode_s(kind, rs1, rs2, imm),
-        InsnFormat::B => encode_b(kind, rs1, rs2, imm),
-        InsnFormat::U => encode_u(kind, rd, imm),
-        InsnFormat::J => encode_j(kind, rd, imm),
-    }
-}
-
-// R-Type
-//        25    20    15       12   7       0
-// +------+-----+-----+--------+----+-------+
-// funct7 | rs2 | rs1 | funct3 | rd | opcode
-const fn encode_r(kind: InsnKind, rs1: u32, rs2: u32, rd: u32) -> u32 {
-    let rs2 = rs2 & MASK_5_BITS; // 5-bits mask
-    let rs1 = rs1 & MASK_5_BITS;
-    let rd = rd & MASK_5_BITS;
-    let func7 = kind.codes().func7;
-    let func3 = kind.codes().func3;
-    let opcode = kind.codes().opcode;
-    func7 << 25 | rs2 << 20 | rs1 << 15 | func3 << 12 | rd << 7 | opcode
-}
-
-// I-Type
-//           20    15       12   7       0
-// +---------+-----+--------+----+-------+
-// imm[0:11] | rs1 | funct3 | rd | opcode
-const fn encode_i(kind: InsnKind, rs1: u32, rd: u32, imm: u32) -> u32 {
-    let rs1 = rs1 & MASK_5_BITS;
-    let rd = rd & MASK_5_BITS;
-    let func3 = kind.codes().func3;
-    let opcode = kind.codes().opcode;
-    // SRLI/SRAI use a specialization of the I-type format with the shift type in imm[10].
-    let is_arithmetic_right_shift = (matches!(kind, InsnKind::SRAI) as u32) << 10;
-    let imm = imm & MASK_12_BITS | is_arithmetic_right_shift;
-    imm << 20 | rs1 << 15 | func3 << 12 | rd << 7 | opcode
-}
-
-// S-Type
-//           25    20    15       12         7       0
-// +---------+-----+-----+--------+----------+-------+
-// imm[5:11] | rs2 | rs1 | funct3 | imm[0:4] | opcode
-const fn encode_s(kind: InsnKind, rs1: u32, rs2: u32, imm: u32) -> u32 {
-    let rs2 = rs2 & MASK_5_BITS;
-    let rs1 = rs1 & MASK_5_BITS;
-    let func3 = kind.codes().func3;
-    let opcode = kind.codes().opcode;
-    let imm_lo = imm & MASK_5_BITS;
-    let imm_hi = (imm >> 5) & MASK_7_BITS; // 7-bits mask
-    imm_hi << 25 | rs2 << 20 | rs1 << 15 | func3 << 12 | imm_lo << 7 | opcode
-}
-
-// B-Type
-//         31          25    20    15       12         8         7       0
-// +-------+-----------+-----+-----+--------+----------+---------+-------+
-// imm[12] | imm[5:10] | rs2 | rs1 | funct3 | imm[1:4] | imm[11] | opcode
-const fn encode_b(kind: InsnKind, rs1: u32, rs2: u32, imm: u32) -> u32 {
-    let rs2 = rs2 & MASK_5_BITS;
-    let rs1 = rs1 & MASK_5_BITS;
-    let func3 = kind.codes().func3;
-    let opcode = kind.codes().opcode;
-    let imm_1_4 = (imm >> 1) & MASK_4_BITS; // skip imm[0]
-    let imm_5_10 = (imm >> 5) & MASK_6_BITS;
-    ((imm >> 12) & 1) << 31
-        | imm_5_10 << 25
-        | rs2 << 20
-        | rs1 << 15
-        | func3 << 12
-        | imm_1_4 << 8
-        | ((imm >> 11) & 1) << 7
-        | opcode
-}
-
-// J-Type
-//         31          21        20           12   7       0
-// +-------+-----------+---------+------------+----+-------+
-// imm[20] | imm[1:10] | imm[11] | imm[12:19] | rd | opcode
-const fn encode_j(kind: InsnKind, rd: u32, imm: u32) -> u32 {
-    let rd = rd & MASK_5_BITS;
-    let opcode = kind.codes().opcode;
-    let imm_1_10 = (imm >> 1) & MASK_10_BITS; // skip imm[0]
-    let imm_12_19 = (imm >> 12) & MASK_8_BITS;
-    ((imm >> 20) & 1) << 31
-        | imm_1_10 << 21
-        | ((imm >> 11) & 1) << 20
-        | imm_12_19 << 12
-        | rd << 7
-        | opcode
-}
-
-// U-Type
-//            12   7        0
-// +----------+----+--------+
-// imm[12:31] | rd | opcode
-const fn encode_u(kind: InsnKind, rd: u32, imm: u32) -> u32 {
-    (imm >> 12) << 12 | (rd & MASK_5_BITS) << 7 | kind.codes().opcode
-}
diff --git a/ceno_emul/src/tracer.rs b/ceno_emul/src/tracer.rs
index a1df363e2..01ad33e01 100644
--- a/ceno_emul/src/tracer.rs
+++ b/ceno_emul/src/tracer.rs
@@ -1,10 +1,9 @@
 use std::{collections::HashMap, fmt, mem};
 
 use crate::{
-    CENO_PLATFORM, InsnKind, PC_STEP_SIZE, Platform,
+    CENO_PLATFORM, InsnKind, Instruction, PC_STEP_SIZE, Platform,
     addr::{ByteAddr, Cycle, RegIdx, Word, WordAddr},
     encode_rv32,
-    rv32im::DecodedInstruction,
 };
 
 /// An instruction and its context in an execution trace. That is concrete values of registers and memory.
@@ -22,7 +21,7 @@ use crate::{
 pub struct StepRecord {
     cycle: Cycle,
     pc: Change<ByteAddr>,
-    insn_code: Word,
+    pub insn: Instruction,
 
     rs1: Option<ReadOp>,
     rs2: Option<ReadOp>,
@@ -57,7 +56,7 @@ impl StepRecord {
     pub fn new_r_instruction(
         cycle: Cycle,
         pc: ByteAddr,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rs2_read: Word,
         rd: Change<Word>,
@@ -79,7 +78,7 @@ impl StepRecord {
     pub fn new_b_instruction(
         cycle: Cycle,
         pc: Change<ByteAddr>,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rs2_read: Word,
         prev_cycle: Cycle,
@@ -99,7 +98,7 @@ impl StepRecord {
     pub fn new_i_instruction(
         cycle: Cycle,
         pc: Change<ByteAddr>,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rd: Change<Word>,
         prev_cycle: Cycle,
@@ -119,7 +118,7 @@ impl StepRecord {
     pub fn new_im_instruction(
         cycle: Cycle,
         pc: ByteAddr,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rd: Change<Word>,
         mem_op: ReadOp,
@@ -148,7 +147,7 @@ impl StepRecord {
     pub fn new_u_instruction(
         cycle: Cycle,
         pc: ByteAddr,
-        insn_code: u32,
+        insn_code: Instruction,
         rd: Change<Word>,
         prev_cycle: Cycle,
     ) -> StepRecord {
@@ -159,7 +158,7 @@ impl StepRecord {
     pub fn new_j_instruction(
         cycle: Cycle,
         pc: Change<ByteAddr>,
-        insn_code: u32,
+        insn_code: Instruction,
         rd: Change<Word>,
         prev_cycle: Cycle,
     ) -> StepRecord {
@@ -169,7 +168,7 @@ impl StepRecord {
     pub fn new_s_instruction(
         cycle: Cycle,
         pc: ByteAddr,
-        insn_code: u32,
+        insn_code: Instruction,
         rs1_read: Word,
         rs2_read: Word,
         memory_op: WriteOp,
@@ -194,7 +193,7 @@ impl StepRecord {
         Self::new_insn(
             cycle,
             Change::new(pc, pc + PC_STEP_SIZE),
-            encode_rv32(InsnKind::EANY, 0, 0, 0, 0),
+            encode_rv32(InsnKind::ECALL, 0, 0, 0, 0),
             Some(value),
             Some(value),
             Some(Change::new(value, value)),
@@ -214,25 +213,23 @@ impl StepRecord {
     fn new_insn(
         cycle: Cycle,
         pc: Change<ByteAddr>,
-        insn_code: u32,
+        insn: Instruction,
         rs1_read: Option<Word>,
         rs2_read: Option<Word>,
         rd: Option<Change<Word>>,
         memory_op: Option<WriteOp>,
         previous_cycle: Cycle,
     ) -> StepRecord {
-        let insn = DecodedInstruction::new(insn_code);
         StepRecord {
             cycle,
             pc,
-            insn_code,
             rs1: rs1_read.map(|rs1| ReadOp {
-                addr: Platform::register_vma(insn.rs1() as RegIdx).into(),
+                addr: Platform::register_vma(insn.rs1).into(),
                 value: rs1,
                 previous_cycle,
             }),
             rs2: rs2_read.map(|rs2| ReadOp {
-                addr: Platform::register_vma(insn.rs2() as RegIdx).into(),
+                addr: Platform::register_vma(insn.rs2).into(),
                 value: rs2,
                 previous_cycle,
             }),
@@ -241,6 +238,7 @@ impl StepRecord {
                 value: rd,
                 previous_cycle,
             }),
+            insn,
             memory_op,
         }
     }
@@ -253,14 +251,9 @@ impl StepRecord {
         self.pc
     }
 
-    /// The instruction as a raw code.
-    pub fn insn_code(&self) -> Word {
-        self.insn_code
-    }
-
     /// The instruction as a decoded structure.
-    pub fn insn(&self) -> DecodedInstruction {
-        DecodedInstruction::new(self.insn_code)
+    pub fn insn(&self) -> Instruction {
+        self.insn
     }
 
     pub fn rs1(&self) -> Option<ReadOp> {
@@ -327,12 +320,9 @@ impl Tracer {
         self.record.pc.after = pc;
     }
 
-    // TODO(Matthias): this should perhaps record `DecodedInstruction`s instead
-    // of raw codes, or perhaps only the pc, because we can always look up the
-    // instruction in the program.
-    pub fn fetch(&mut self, pc: WordAddr, value: Word) {
+    pub fn fetch(&mut self, pc: WordAddr, value: Instruction) {
         self.record.pc.before = pc.baddr();
-        self.record.insn_code = value;
+        self.record.insn = value;
     }
 
     pub fn load_register(&mut self, idx: RegIdx, value: Word) {
diff --git a/ceno_emul/src/vm_state.rs b/ceno_emul/src/vm_state.rs
index e071cb000..838779979 100644
--- a/ceno_emul/src/vm_state.rs
+++ b/ceno_emul/src/vm_state.rs
@@ -5,7 +5,7 @@ use crate::{
     PC_STEP_SIZE, Program, WORD_SIZE,
     addr::{ByteAddr, RegIdx, Word, WordAddr},
     platform::Platform,
-    rv32im::{DecodedInstruction, Emulator, TrapCause},
+    rv32im::{Instruction, TrapCause},
     tracer::{Change, StepRecord, Tracer},
 };
 use anyhow::{Result, anyhow};
@@ -77,18 +77,17 @@ impl VMState {
     }
 
     pub fn iter_until_halt(&mut self) -> impl Iterator<Item = Result<StepRecord>> + '_ {
-        let emu = Emulator::new();
         from_fn(move || {
             if self.halted() {
                 None
             } else {
-                Some(self.step(&emu))
+                Some(self.step())
             }
         })
     }
 
-    fn step(&mut self, emu: &Emulator) -> Result<StepRecord> {
-        emu.step(self)?;
+    fn step(&mut self) -> Result<StepRecord> {
+        crate::rv32im::step(self)?;
         let step = self.tracer.advance();
         if step.is_busy_loop() && !self.halted() {
             Err(anyhow!("Stuck in loop {}", "{}"))
@@ -121,7 +120,7 @@ impl EmuContext for VMState {
             // Treat unknown ecalls as all powerful instructions:
             // Read two registers, write one register, write one memory word, and branch.
             tracing::warn!("ecall ignored: syscall_id={}", function);
-            self.store_register(DecodedInstruction::RD_NULL as RegIdx, 0)?;
+            self.store_register(Instruction::RD_NULL as RegIdx, 0)?;
             // Example ecall effect - any writable address will do.
             let addr = (self.platform.stack_top - WORD_SIZE as u32).into();
             self.store_memory(addr, self.peek_memory(addr))?;
@@ -136,7 +135,7 @@ impl EmuContext for VMState {
         Err(anyhow!("Trap {:?}", cause)) // Crash.
     }
 
-    fn on_normal_end(&mut self, _decoded: &DecodedInstruction) {
+    fn on_normal_end(&mut self, _decoded: &Instruction) {
         self.tracer.store_pc(ByteAddr(self.pc));
     }
 
@@ -189,8 +188,7 @@ impl EmuContext for VMState {
         *self.memory.get(&addr).unwrap_or(&0)
     }
 
-    // TODO(Matthias): this should really return `Result<DecodedInstruction>`
-    fn fetch(&mut self, pc: WordAddr) -> Option<Word> {
+    fn fetch(&mut self, pc: WordAddr) -> Option<Instruction> {
         let byte_pc: ByteAddr = pc.into();
         let relative_pc = byte_pc.0.wrapping_sub(self.program.base_address);
         let idx = (relative_pc / WORD_SIZE as u32) as usize;
@@ -206,8 +204,4 @@ impl EmuContext for VMState {
     fn check_data_store(&self, addr: ByteAddr) -> bool {
         self.platform.can_write(addr.0)
     }
-
-    fn check_insn_load(&self, addr: ByteAddr) -> bool {
-        self.platform.can_execute(addr.0)
-    }
 }
diff --git a/ceno_emul/tests/test_elf.rs b/ceno_emul/tests/test_elf.rs
index ca7a14c1c..7448d4508 100644
--- a/ceno_emul/tests/test_elf.rs
+++ b/ceno_emul/tests/test_elf.rs
@@ -15,7 +15,7 @@ fn test_ceno_rt_panic() -> Result<()> {
     let mut state = VMState::new_from_elf(CENO_PLATFORM, program_elf)?;
     let steps = run(&mut state)?;
     let last = steps.last().unwrap();
-    assert_eq!(last.insn().codes().kind, InsnKind::EANY);
+    assert_eq!(last.insn().kind, InsnKind::ECALL);
     assert_eq!(last.rs1().unwrap().value, Platform::ecall_halt());
     assert_eq!(last.rs2().unwrap().value, 1); // panic / halt(1)
     Ok(())
diff --git a/ceno_emul/tests/test_vm_trace.rs b/ceno_emul/tests/test_vm_trace.rs
index 2aa5f0da2..ef3b8820e 100644
--- a/ceno_emul/tests/test_vm_trace.rs
+++ b/ceno_emul/tests/test_vm_trace.rs
@@ -6,8 +6,8 @@ use std::{
 };
 
 use ceno_emul::{
-    CENO_PLATFORM, Cycle, EmuContext, InsnKind, Platform, Program, StepRecord, Tracer, VMState,
-    WORD_SIZE, WordAddr,
+    CENO_PLATFORM, Cycle, EmuContext, InsnKind, Instruction, Platform, Program, StepRecord, Tracer,
+    VMState, WordAddr, encode_rv32,
 };
 
 #[test]
@@ -15,17 +15,8 @@ fn test_vm_trace() -> Result<()> {
     let program = Program::new(
         CENO_PLATFORM.pc_base(),
         CENO_PLATFORM.pc_base(),
-        PROGRAM_FIBONACCI_20.to_vec(),
-        PROGRAM_FIBONACCI_20
-            .iter()
-            .enumerate()
-            .map(|(insn_idx, &insn)| {
-                (
-                    CENO_PLATFORM.pc_base() + (WORD_SIZE * insn_idx) as u32,
-                    insn,
-                )
-            })
-            .collect(),
+        program_fibonacci_20(),
+        Default::default(),
     );
     let mut ctx = VMState::new(CENO_PLATFORM, Arc::new(program));
 
@@ -36,7 +27,7 @@ fn test_vm_trace() -> Result<()> {
     assert_eq!(ctx.peek_register(2), x2);
     assert_eq!(ctx.peek_register(3), x3);
 
-    let ops: Vec<InsnKind> = steps.iter().map(|step| step.insn().codes().kind).collect();
+    let ops: Vec<InsnKind> = steps.iter().map(|step| step.insn().kind).collect();
     assert_eq!(ops, expected_ops_fibonacci_20());
 
     assert_eq!(
@@ -66,27 +57,25 @@ fn run(state: &mut VMState) -> Result<Vec<StepRecord>> {
 }
 
 /// Example in RISC-V bytecode and assembly.
-const PROGRAM_FIBONACCI_20: [u32; 7] = [
-    // x1 = 10;
-    // x3 = 1;
-    // immediate    rs1  f3   rd   opcode
-    0b_000000001010_00000_000_00001_0010011, // addi x1, x0, 10
-    0b_000000000001_00000_000_00011_0010011, // addi x3, x0, 1
-    // loop {
-    //     x1 -= 1;
-    // immediate    rs1  f3   rd   opcode
-    0b_111111111111_00001_000_00001_0010011, // addi x1, x1, -1
-    //     x2 += x3;
-    //     x3 += x2;
-    // zeros   rs2   rs1   f3  rd    opcode
-    0b_0000000_00011_00010_000_00010_0110011, // add x2, x2, x3
-    0b_0000000_00011_00010_000_00011_0110011, // add x3, x2, x3
-    //     if x1 == 0 { break }
-    // imm      rs2   rs1   f3  imm    opcode
-    0b_1_111111_00000_00001_001_1010_1_1100011, // bne x1, x0, -12
-    // ecall HALT, SUCCESS
-    0b_000000000000_00000_000_00000_1110011,
-];
+pub fn program_fibonacci_20() -> Vec<Instruction> {
+    vec![
+        // x1 = 10;
+        // x3 = 1;
+        encode_rv32(InsnKind::ADDI, 0, 0, 1, 10),
+        encode_rv32(InsnKind::ADDI, 0, 0, 3, 1),
+        // loop {
+        //     x1 -= 1;
+        encode_rv32(InsnKind::ADDI, 1, 0, 1, -1),
+        //     x2 += x3;
+        //     x3 += x2;
+        encode_rv32(InsnKind::ADD, 2, 3, 2, 0),
+        encode_rv32(InsnKind::ADD, 2, 3, 3, 0),
+        //     if x1 == 0 { break }
+        encode_rv32(InsnKind::BNE, 1, 0, 0, -12),
+        // ecall HALT, SUCCESS
+        encode_rv32(InsnKind::ECALL, 0, 0, 0, 0),
+    ]
+}
 
 /// Rust version of the example. Reconstruct the output.
 fn expected_fibonacci_20() -> (u32, u32, u32) {
@@ -115,7 +104,7 @@ fn expected_ops_fibonacci_20() -> Vec<InsnKind> {
     for _ in 0..10 {
         ops.extend(&[ADDI, ADD, ADD, BNE]);
     }
-    ops.push(EANY);
+    ops.push(ECALL);
     ops
 }
 
diff --git a/ceno_zkvm/examples/riscv_opcodes.rs b/ceno_zkvm/examples/riscv_opcodes.rs
index fe119bb25..81d1f6eb3 100644
--- a/ceno_zkvm/examples/riscv_opcodes.rs
+++ b/ceno_zkvm/examples/riscv_opcodes.rs
@@ -1,7 +1,6 @@
 use std::{panic, sync::Arc, time::Instant};
 
 use ceno_zkvm::{
-    declare_program,
     instructions::riscv::{MemPadder, MmuConfig, Rv32imConfig, constants::EXIT_PC},
     scheme::{mock_prover::MockProver, prover::ZKVMProver},
     state::GlobalState,
@@ -13,8 +12,9 @@ use clap::Parser;
 
 use ceno_emul::{
     CENO_PLATFORM, EmuContext,
-    InsnKind::{ADD, BLTU, EANY, LUI, LW},
-    PC_WORD_SIZE, Platform, Program, StepRecord, Tracer, VMState, Word, WordAddr, encode_rv32,
+    InsnKind::{ADD, ADDI, BLTU, ECALL, LW},
+    Instruction, Platform, Program, StepRecord, Tracer, VMState, Word, WordAddr, encode_rv32,
+    encode_rv32u,
 };
 use ceno_zkvm::{
     scheme::{PublicValues, constants::MAX_NUM_VARIABLES, verifier::ZKVMVerifier},
@@ -28,33 +28,26 @@ use mpcs::{Basefold, BasefoldRSParams, PolynomialCommitmentScheme};
 use sumcheck::macros::{entered_span, exit_span};
 use tracing_subscriber::{EnvFilter, Registry, fmt, fmt::format::FmtSpan, layer::SubscriberExt};
 use transcript::BasicTranscript as Transcript;
-const PROGRAM_SIZE: usize = 16;
 // For now, we assume registers
 //  - x0 is not touched,
 //  - x1 is initialized to 1,
 //  - x2 is initialized to -1,
 //  - x3 is initialized to loop bound.
 // we use x4 to hold the acc_sum.
-#[allow(clippy::unusual_byte_groupings)]
-const ECALL_HALT: u32 = 0b_000000000000_00000_000_00000_1110011;
-#[allow(clippy::unusual_byte_groupings)]
-const PROGRAM_CODE: [u32; PROGRAM_SIZE] = {
-    let mut program: [u32; PROGRAM_SIZE] = [ECALL_HALT; PROGRAM_SIZE];
-    declare_program!(
-        program,
-        encode_rv32(LUI, 0, 0, 10, CENO_PLATFORM.public_io.start), // lui x10, public_io
-        encode_rv32(LW, 10, 0, 1, 0),                              // lw x1, 0(x10)
-        encode_rv32(LW, 10, 0, 2, 4),                              // lw x2, 4(x10)
-        encode_rv32(LW, 10, 0, 3, 8),                              // lw x3, 8(x10)
+fn program_code() -> Vec<Instruction> {
+    vec![
+        encode_rv32u(ADDI, 0, 0, 10, CENO_PLATFORM.public_io.start), // lui x10, public_io
+        encode_rv32(LW, 10, 0, 1, 0),                                // lw x1, 0(x10)
+        encode_rv32(LW, 10, 0, 2, 4),                                // lw x2, 4(x10)
+        encode_rv32(LW, 10, 0, 3, 8),                                // lw x3, 8(x10)
         // Main loop.
-        encode_rv32(ADD, 1, 4, 4, 0),              // add x4, x1, x4
-        encode_rv32(ADD, 2, 3, 3, 0),              // add x3, x2, x3
-        encode_rv32(BLTU, 0, 3, 0, -8_i32 as u32), // bltu x0, x3, -8
+        encode_rv32(ADD, 1, 4, 4, 0),       // add x4, x1, x4
+        encode_rv32(ADD, 2, 3, 3, 0),       // add x3, x2, x3
+        encode_rv32(BLTU, 0, 3, 0, -8_i32), // bltu x0, x3, -8
         // End.
-        ECALL_HALT, // ecall halt
-    );
-    program
-};
+        encode_rv32(ECALL, 0, 0, 0, 0), // ecall (halt)
+    ]
+}
 type ExampleProgramTableCircuit<E> = ProgramTableCircuit<E>;
 
 /// Simple program to greet a person
@@ -74,21 +67,14 @@ fn main() {
     let args = Args::parse();
     type E = GoldilocksExt2;
     type Pcs = Basefold<GoldilocksExt2, BasefoldRSParams>;
+    let program_code = program_code();
+    let program_size = program_code.len();
 
     let program = Program::new(
         CENO_PLATFORM.pc_base(),
         CENO_PLATFORM.pc_base(),
-        PROGRAM_CODE.to_vec(),
-        PROGRAM_CODE
-            .iter()
-            .enumerate()
-            .map(|(insn_idx, &insn)| {
-                (
-                    (insn_idx * PC_WORD_SIZE) as u32 + CENO_PLATFORM.pc_base(),
-                    insn,
-                )
-            })
-            .collect(),
+        program_code,
+        Default::default(),
     );
     let mem_addresses = CENO_PLATFORM.ram.clone();
     let io_addresses = CENO_PLATFORM.public_io.clone();
@@ -117,7 +103,7 @@ fn main() {
     let pcs_param = Pcs::setup(1 << MAX_NUM_VARIABLES).expect("Basefold PCS setup");
     let (pp, vp) = Pcs::trim(pcs_param, 1 << MAX_NUM_VARIABLES).expect("Basefold trim");
     let program_params = ProgramParams {
-        program_size: PROGRAM_SIZE,
+        program_size,
         ..Default::default()
     };
     let mut zkvm_cs = ZKVMConstraintSystem::new_with_platform(program_params);
@@ -193,8 +179,7 @@ fn main() {
             .iter()
             .rev()
             .find(|record| {
-                record.insn().codes().kind == EANY
-                    && record.rs1().unwrap().value == Platform::ecall_halt()
+                record.insn().kind == ECALL && record.rs1().unwrap().value == Platform::ecall_halt()
             })
             .expect("halt record not found");
 
diff --git a/ceno_zkvm/src/e2e.rs b/ceno_zkvm/src/e2e.rs
index b1d5b4d77..95bcb73be 100644
--- a/ceno_zkvm/src/e2e.rs
+++ b/ceno_zkvm/src/e2e.rs
@@ -11,8 +11,8 @@ use crate::{
     tables::{MemFinalRecord, MemInitRecord, ProgramTableCircuit, ProgramTableConfig},
 };
 use ceno_emul::{
-    ByteAddr, EmuContext, InsnKind::EANY, IterAddresses, Platform, Program, StepRecord, Tracer,
-    VMState, WORD_SIZE, WordAddr,
+    ByteAddr, EmuContext, InsnKind, IterAddresses, Platform, Program, StepRecord, Tracer, VMState,
+    WORD_SIZE, WordAddr,
 };
 use ff_ext::ExtensionField;
 use itertools::{Itertools, MinMaxResult, chain};
@@ -73,7 +73,7 @@ fn emulate_program(
         .iter()
         .rev()
         .find(|record| {
-            record.insn().codes().kind == EANY
+            record.insn().kind == InsnKind::ECALL
                 && record.rs1().unwrap().value == Platform::ecall_halt()
         })
         .and_then(|halt_record| halt_record.rs2())
@@ -537,9 +537,8 @@ fn format_segments(
 
 fn format_segment(platform: &Platform, addr: u32) -> String {
     format!(
-        "{}{}{}",
+        "{}{}",
         if platform.can_read(addr) { "R" } else { "-" },
         if platform.can_write(addr) { "W" } else { "-" },
-        if platform.can_execute(addr) { "X" } else { "-" },
     )
 }
diff --git a/ceno_zkvm/src/instructions/riscv.rs b/ceno_zkvm/src/instructions/riscv.rs
index de0dfc771..cc5e12b12 100644
--- a/ceno_zkvm/src/instructions/riscv.rs
+++ b/ceno_zkvm/src/instructions/riscv.rs
@@ -28,7 +28,6 @@ mod i_insn;
 mod insn_base;
 mod j_insn;
 mod r_insn;
-mod u_insn;
 
 mod ecall_insn;
 
@@ -37,15 +36,13 @@ mod memory;
 mod s_insn;
 #[cfg(test)]
 mod test;
-#[cfg(test)]
-mod test_utils;
 
 pub trait RIVInstruction {
     const INST_KIND: InsnKind;
 }
 
 pub use arith::{AddInstruction, SubInstruction};
-pub use jump::{AuipcInstruction, JalInstruction, JalrInstruction, LuiInstruction};
+pub use jump::{JalInstruction, JalrInstruction};
 pub use memory::{
     LbInstruction, LbuInstruction, LhInstruction, LhuInstruction, LwInstruction, SbInstruction,
     ShInstruction, SwInstruction,
diff --git a/ceno_zkvm/src/instructions/riscv/arith_imm.rs b/ceno_zkvm/src/instructions/riscv/arith_imm.rs
index c4c09629b..b7f50e898 100644
--- a/ceno_zkvm/src/instructions/riscv/arith_imm.rs
+++ b/ceno_zkvm/src/instructions/riscv/arith_imm.rs
@@ -89,7 +89,7 @@ mod test {
 
     use crate::{
         circuit_builder::{CircuitBuilder, ConstraintSystem},
-        instructions::{Instruction, riscv::test_utils::imm_i},
+        instructions::Instruction,
         scheme::mock_prover::{MOCK_PC_START, MockProver},
     };
 
@@ -110,7 +110,7 @@ mod test {
             .unwrap()
             .unwrap();
 
-        let insn_code = encode_rv32(InsnKind::ADDI, 2, 0, 4, imm_i(3));
+        let insn_code = encode_rv32(InsnKind::ADDI, 2, 0, 4, 3);
         let (raw_witin, lkm) = AddiInstruction::<GoldilocksExt2>::assign_instances(
             &config,
             cb.cs.num_witin as usize,
@@ -143,7 +143,8 @@ mod test {
             .unwrap()
             .unwrap();
 
-        let insn_code = encode_rv32(InsnKind::ADDI, 2, 0, 4, imm_i(-3));
+        let insn_code = encode_rv32(InsnKind::ADDI, 2, 0, 4, -3);
+
         let (raw_witin, lkm) = AddiInstruction::<GoldilocksExt2>::assign_instances(
             &config,
             cb.cs.num_witin as usize,
diff --git a/ceno_zkvm/src/instructions/riscv/branch/test.rs b/ceno_zkvm/src/instructions/riscv/branch/test.rs
index 9847c66d5..5b53ec904 100644
--- a/ceno_zkvm/src/instructions/riscv/branch/test.rs
+++ b/ceno_zkvm/src/instructions/riscv/branch/test.rs
@@ -5,7 +5,7 @@ use super::*;
 use crate::{
     circuit_builder::{CircuitBuilder, ConstraintSystem},
     error::ZKVMError,
-    instructions::{Instruction, riscv::test_utils::imm_b},
+    instructions::Instruction,
     scheme::mock_prover::{MOCK_PC_START, MockProver},
 };
 
@@ -32,7 +32,7 @@ fn impl_opcode_beq(equal: bool) {
         .unwrap()
         .unwrap();
 
-    let insn_code = encode_rv32(InsnKind::BEQ, 2, 3, 0, imm_b(8));
+    let insn_code = encode_rv32(InsnKind::BEQ, 2, 3, 0, 8);
     let pc_offset = if equal { 8 } else { PC_STEP_SIZE };
     let (raw_witin, lkm) =
         BeqInstruction::assign_instances(&config, cb.cs.num_witin as usize, vec![
@@ -70,7 +70,7 @@ fn impl_opcode_bne(equal: bool) {
         .unwrap()
         .unwrap();
 
-    let insn_code = encode_rv32(InsnKind::BNE, 2, 3, 0, imm_b(8));
+    let insn_code = encode_rv32(InsnKind::BNE, 2, 3, 0, 8);
     let pc_offset = if equal { PC_STEP_SIZE } else { 8 };
     let (raw_witin, lkm) =
         BneInstruction::assign_instances(&config, cb.cs.num_witin as usize, vec![
@@ -111,8 +111,8 @@ fn impl_bltu_circuit(taken: bool, a: u32, b: u32) -> Result<(), ZKVMError> {
         MOCK_PC_START + PC_STEP_SIZE
     };
 
-    let insn_code = encode_rv32(InsnKind::BLTU, 2, 3, 0, imm_b(-8));
-    println!("{:#b}", insn_code);
+    let insn_code = encode_rv32(InsnKind::BLTU, 2, 3, 0, -8);
+    println!("{:?}", insn_code);
     let (raw_witin, lkm) =
         BltuInstruction::assign_instances(&config, circuit_builder.cs.num_witin as usize, vec![
             StepRecord::new_b_instruction(
@@ -153,7 +153,7 @@ fn impl_bgeu_circuit(taken: bool, a: u32, b: u32) -> Result<(), ZKVMError> {
         MOCK_PC_START + PC_STEP_SIZE
     };
 
-    let insn_code = encode_rv32(InsnKind::BGEU, 2, 3, 0, imm_b(-8));
+    let insn_code = encode_rv32(InsnKind::BGEU, 2, 3, 0, -8);
     let (raw_witin, lkm) =
         BgeuInstruction::assign_instances(&config, circuit_builder.cs.num_witin as usize, vec![
             StepRecord::new_b_instruction(
@@ -195,7 +195,7 @@ fn impl_blt_circuit(taken: bool, a: i32, b: i32) -> Result<(), ZKVMError> {
         MOCK_PC_START + PC_STEP_SIZE
     };
 
-    let insn_code = encode_rv32(InsnKind::BLT, 2, 3, 0, imm_b(-8));
+    let insn_code = encode_rv32(InsnKind::BLT, 2, 3, 0, -8);
     let (raw_witin, lkm) =
         BltInstruction::assign_instances(&config, circuit_builder.cs.num_witin as usize, vec![
             StepRecord::new_b_instruction(
@@ -237,7 +237,7 @@ fn impl_bge_circuit(taken: bool, a: i32, b: i32) -> Result<(), ZKVMError> {
         MOCK_PC_START + PC_STEP_SIZE
     };
 
-    let insn_code = encode_rv32(InsnKind::BGE, 2, 3, 0, imm_b(-8));
+    let insn_code = encode_rv32(InsnKind::BGE, 2, 3, 0, -8);
     let (raw_witin, lkm) =
         BgeInstruction::assign_instances(&config, circuit_builder.cs.num_witin as usize, vec![
             StepRecord::new_b_instruction(
diff --git a/ceno_zkvm/src/instructions/riscv/dummy/dummy_circuit.rs b/ceno_zkvm/src/instructions/riscv/dummy/dummy_circuit.rs
index 1d98d1271..b73c932a1 100644
--- a/ceno_zkvm/src/instructions/riscv/dummy/dummy_circuit.rs
+++ b/ceno_zkvm/src/instructions/riscv/dummy/dummy_circuit.rs
@@ -33,13 +33,15 @@ impl<E: ExtensionField, I: RIVInstruction> Instruction<E> for DummyInstruction<E
     fn construct_circuit(
         circuit_builder: &mut CircuitBuilder<E>,
     ) -> Result<Self::InstructionConfig, ZKVMError> {
-        let codes = I::INST_KIND.codes();
+        let kind = I::INST_KIND;
+        let format = InsnFormat::from(kind);
+        let category = InsnCategory::from(kind);
 
         // ECALL can do everything.
-        let is_ecall = matches!(codes.kind, InsnKind::EANY);
+        let is_ecall = matches!(kind, InsnKind::ECALL);
 
         // Regular instructions do what is implied by their format.
-        let (with_rs1, with_rs2, with_rd) = match codes.format {
+        let (with_rs1, with_rs2, with_rd) = match format {
             _ if is_ecall => (true, true, true),
             InsnFormat::R => (true, true, true),
             InsnFormat::I => (true, false, true),
@@ -48,10 +50,10 @@ impl<E: ExtensionField, I: RIVInstruction> Instruction<E> for DummyInstruction<E
             InsnFormat::U => (false, false, true),
             InsnFormat::J => (false, false, true),
         };
-        let with_mem_write = matches!(codes.category, InsnCategory::Store) || is_ecall;
-        let with_mem_read = matches!(codes.category, InsnCategory::Load);
-        let branching = matches!(codes.category, InsnCategory::Branch)
-            || matches!(codes.kind, InsnKind::JAL | InsnKind::JALR)
+        let with_mem_write = matches!(category, InsnCategory::Store) || is_ecall;
+        let with_mem_read = matches!(category, InsnCategory::Load);
+        let branching = matches!(category, InsnCategory::Branch)
+            || matches!(kind, InsnKind::JAL | InsnKind::JALR)
             || is_ecall;
 
         DummyConfig::construct_circuit(
@@ -174,7 +176,7 @@ impl<E: ExtensionField> DummyConfig<E> {
         // Fetch instruction
 
         // The register IDs of ECALL is fixed, not encoded.
-        let is_ecall = matches!(kind, InsnKind::EANY);
+        let is_ecall = matches!(kind, InsnKind::ECALL);
         let rs1_id = match &rs1 {
             Some((r, _)) if !is_ecall => r.id.expr(),
             _ => 0.into(),
diff --git a/ceno_zkvm/src/instructions/riscv/dummy/test.rs b/ceno_zkvm/src/instructions/riscv/dummy/test.rs
index df1eb0572..5f3a03bb9 100644
--- a/ceno_zkvm/src/instructions/riscv/dummy/test.rs
+++ b/ceno_zkvm/src/instructions/riscv/dummy/test.rs
@@ -30,7 +30,7 @@ fn test_dummy_ecall() {
         .unwrap();
 
     let step = StepRecord::new_ecall_any(4, MOCK_PC_START);
-    let insn_code = step.insn_code();
+    let insn_code = step.insn();
     let (raw_witin, lkm) =
         EcallDummy::assign_instances(&config, cb.cs.num_witin as usize, vec![step]).unwrap();
 
diff --git a/ceno_zkvm/src/instructions/riscv/ecall.rs b/ceno_zkvm/src/instructions/riscv/ecall.rs
index 0d7a3315a..261772638 100644
--- a/ceno_zkvm/src/instructions/riscv/ecall.rs
+++ b/ceno_zkvm/src/instructions/riscv/ecall.rs
@@ -7,7 +7,7 @@ use super::{RIVInstruction, dummy::DummyInstruction};
 
 pub struct EcallOp;
 impl RIVInstruction for EcallOp {
-    const INST_KIND: InsnKind = InsnKind::EANY;
+    const INST_KIND: InsnKind = InsnKind::ECALL;
 }
 /// Unsafe. A dummy ecall circuit that ignores unimplemented functions.
 pub type EcallDummy<E> = DummyInstruction<E, EcallOp>;
diff --git a/ceno_zkvm/src/instructions/riscv/ecall_insn.rs b/ceno_zkvm/src/instructions/riscv/ecall_insn.rs
index 078223617..8de7e02c8 100644
--- a/ceno_zkvm/src/instructions/riscv/ecall_insn.rs
+++ b/ceno_zkvm/src/instructions/riscv/ecall_insn.rs
@@ -10,7 +10,7 @@ use crate::{
     tables::InsnRecord,
     witness::LkMultiplicity,
 };
-use ceno_emul::{InsnKind::EANY, PC_STEP_SIZE, Platform, StepRecord, Tracer};
+use ceno_emul::{InsnKind::ECALL, PC_STEP_SIZE, Platform, StepRecord, Tracer};
 use ff_ext::ExtensionField;
 
 pub struct EcallInstructionConfig {
@@ -38,7 +38,7 @@ impl EcallInstructionConfig {
 
         cb.lk_fetch(&InsnRecord::new(
             pc.expr(),
-            EANY.into(),
+            ECALL.into(),
             None,
             0.into(),
             0.into(),
diff --git a/ceno_zkvm/src/instructions/riscv/jump.rs b/ceno_zkvm/src/instructions/riscv/jump.rs
index 50708d07a..b57aadbbb 100644
--- a/ceno_zkvm/src/instructions/riscv/jump.rs
+++ b/ceno_zkvm/src/instructions/riscv/jump.rs
@@ -1,12 +1,8 @@
-mod auipc;
 mod jal;
 mod jalr;
-mod lui;
 
-pub use auipc::AuipcInstruction;
 pub use jal::JalInstruction;
 pub use jalr::JalrInstruction;
-pub use lui::LuiInstruction;
 
 #[cfg(test)]
 mod test;
diff --git a/ceno_zkvm/src/instructions/riscv/jump/auipc.rs b/ceno_zkvm/src/instructions/riscv/jump/auipc.rs
deleted file mode 100644
index ed1f07d1f..000000000
--- a/ceno_zkvm/src/instructions/riscv/jump/auipc.rs
+++ /dev/null
@@ -1,94 +0,0 @@
-use std::marker::PhantomData;
-
-use ceno_emul::InsnKind;
-use ff_ext::ExtensionField;
-
-use crate::{
-    Value,
-    circuit_builder::CircuitBuilder,
-    error::ZKVMError,
-    expression::{ToExpr, WitIn},
-    instructions::{
-        Instruction,
-        riscv::{constants::UInt, u_insn::UInstructionConfig},
-    },
-    set_val,
-    tables::InsnRecord,
-    utils::i64_to_base,
-    witness::LkMultiplicity,
-};
-
-pub struct AuipcConfig<E: ExtensionField> {
-    pub u_insn: UInstructionConfig<E>,
-    pub imm: WitIn,
-    pub overflow_bit: WitIn,
-    pub rd_written: UInt<E>,
-}
-
-pub struct AuipcInstruction<E>(PhantomData<E>);
-
-/// AUIPC instruction circuit
-impl<E: ExtensionField> Instruction<E> for AuipcInstruction<E> {
-    type InstructionConfig = AuipcConfig<E>;
-
-    fn name() -> String {
-        format!("{:?}", InsnKind::AUIPC)
-    }
-
-    fn construct_circuit(
-        circuit_builder: &mut CircuitBuilder<E>,
-    ) -> Result<AuipcConfig<E>, ZKVMError> {
-        let imm = circuit_builder.create_witin(|| "imm");
-        let rd_written = UInt::new(|| "rd_written", circuit_builder)?;
-
-        let u_insn = UInstructionConfig::construct_circuit(
-            circuit_builder,
-            InsnKind::AUIPC,
-            &imm.expr(),
-            rd_written.register_expr(),
-        )?;
-
-        let overflow_bit = circuit_builder.create_witin(|| "overflow_bit");
-        circuit_builder.assert_bit(|| "is_bit", overflow_bit.expr())?;
-
-        // assert: imm + pc = rd_written + overflow_bit * 2^32
-        // valid formulation of mod 2^32 arithmetic because:
-        // - imm and pc are constrained to 4 bytes by instruction table lookup
-        // - rd_written is constrained to 4 bytes by UInt checked limbs
-        circuit_builder.require_equal(
-            || "imm+pc = rd_written+2^32*overflow",
-            imm.expr() + u_insn.vm_state.pc.expr(),
-            rd_written.value() + overflow_bit.expr() * (1u64 << 32),
-        )?;
-
-        Ok(AuipcConfig {
-            u_insn,
-            imm,
-            overflow_bit,
-            rd_written,
-        })
-    }
-
-    fn assign_instance(
-        config: &Self::InstructionConfig,
-        instance: &mut [E::BaseField],
-        lk_multiplicity: &mut LkMultiplicity,
-        step: &ceno_emul::StepRecord,
-    ) -> Result<(), ZKVMError> {
-        let pc: u32 = step.pc().before.0;
-        let imm = InsnRecord::imm_internal(&step.insn());
-        let (sum, overflow) = pc.overflowing_add(imm as u32);
-
-        set_val!(instance, config.imm, i64_to_base::<E::BaseField>(imm));
-        set_val!(instance, config.overflow_bit, overflow as u64);
-
-        let sum_limbs = Value::new(sum, lk_multiplicity);
-        config.rd_written.assign_value(instance, sum_limbs);
-
-        config
-            .u_insn
-            .assign_instance(instance, lk_multiplicity, step)?;
-
-        Ok(())
-    }
-}
diff --git a/ceno_zkvm/src/instructions/riscv/jump/lui.rs b/ceno_zkvm/src/instructions/riscv/jump/lui.rs
deleted file mode 100644
index 8ad9d497e..000000000
--- a/ceno_zkvm/src/instructions/riscv/jump/lui.rs
+++ /dev/null
@@ -1,64 +0,0 @@
-use std::marker::PhantomData;
-
-use ceno_emul::InsnKind;
-use ff_ext::ExtensionField;
-
-use crate::{
-    Value,
-    circuit_builder::CircuitBuilder,
-    error::ZKVMError,
-    instructions::{
-        Instruction,
-        riscv::{constants::UInt, u_insn::UInstructionConfig},
-    },
-    witness::LkMultiplicity,
-};
-
-pub struct LuiConfig<E: ExtensionField> {
-    pub u_insn: UInstructionConfig<E>,
-    pub rd_written: UInt<E>,
-}
-
-pub struct LuiInstruction<E>(PhantomData<E>);
-
-/// LUI instruction circuit
-impl<E: ExtensionField> Instruction<E> for LuiInstruction<E> {
-    type InstructionConfig = LuiConfig<E>;
-
-    fn name() -> String {
-        format!("{:?}", InsnKind::LUI)
-    }
-
-    fn construct_circuit(
-        circuit_builder: &mut CircuitBuilder<E>,
-    ) -> Result<LuiConfig<E>, ZKVMError> {
-        let rd_written = UInt::new(|| "rd_limbs", circuit_builder)?;
-
-        // rd_written = imm, so just enforce that U-type immediate from program
-        // table is equal to rd_written value
-        let u_insn = UInstructionConfig::construct_circuit(
-            circuit_builder,
-            InsnKind::LUI,
-            &rd_written.value(), // instruction immediate for program table lookup
-            rd_written.register_expr(),
-        )?;
-
-        Ok(LuiConfig { u_insn, rd_written })
-    }
-
-    fn assign_instance(
-        config: &Self::InstructionConfig,
-        instance: &mut [E::BaseField],
-        lk_multiplicity: &mut LkMultiplicity,
-        step: &ceno_emul::StepRecord,
-    ) -> Result<(), ZKVMError> {
-        config
-            .u_insn
-            .assign_instance(instance, lk_multiplicity, step)?;
-
-        let rd = Value::new(step.rd().unwrap().value.after, lk_multiplicity);
-        config.rd_written.assign_limbs(instance, rd.as_u16_limbs());
-
-        Ok(())
-    }
-}
diff --git a/ceno_zkvm/src/instructions/riscv/jump/test.rs b/ceno_zkvm/src/instructions/riscv/jump/test.rs
index 4453f4c3a..648b17f66 100644
--- a/ceno_zkvm/src/instructions/riscv/jump/test.rs
+++ b/ceno_zkvm/src/instructions/riscv/jump/test.rs
@@ -3,14 +3,11 @@ use goldilocks::GoldilocksExt2;
 
 use crate::{
     circuit_builder::{CircuitBuilder, ConstraintSystem},
-    instructions::{
-        Instruction,
-        riscv::test_utils::{imm_j, imm_u},
-    },
+    instructions::Instruction,
     scheme::mock_prover::{MOCK_PC_START, MockProver},
 };
 
-use super::{AuipcInstruction, JalInstruction, JalrInstruction, LuiInstruction};
+use super::{JalInstruction, JalrInstruction};
 
 #[test]
 fn test_opcode_jal() {
@@ -29,7 +26,7 @@ fn test_opcode_jal() {
 
     let pc_offset: i32 = -8i32;
     let new_pc: ByteAddr = ByteAddr(MOCK_PC_START.0.wrapping_add_signed(pc_offset));
-    let insn_code = encode_rv32(InsnKind::JAL, 0, 0, 4, imm_j(pc_offset));
+    let insn_code = encode_rv32(InsnKind::JAL, 0, 0, 4, pc_offset);
     let (raw_witin, lkm) = JalInstruction::<GoldilocksExt2>::assign_instances(
         &config,
         cb.cs.num_witin as usize,
@@ -64,7 +61,7 @@ fn test_opcode_jalr() {
     let imm = -15i32;
     let rs1_read: Word = 100u32;
     let new_pc: ByteAddr = ByteAddr(rs1_read.wrapping_add_signed(imm) & (!1));
-    let insn_code = encode_rv32(InsnKind::JALR, 2, 0, 4, imm as u32);
+    let insn_code = encode_rv32(InsnKind::JALR, 2, 0, 4, imm);
 
     let (raw_witin, lkm) = JalrInstruction::<GoldilocksExt2>::assign_instances(
         &config,
@@ -79,72 +76,5 @@ fn test_opcode_jalr() {
         )],
     )
     .unwrap();
-
-    MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
-}
-
-#[test]
-fn test_opcode_lui() {
-    let mut cs = ConstraintSystem::<GoldilocksExt2>::new(|| "riscv");
-    let mut cb = CircuitBuilder::new(&mut cs);
-    let config = cb
-        .namespace(
-            || "lui",
-            |cb| {
-                let config = LuiInstruction::<GoldilocksExt2>::construct_circuit(cb);
-                Ok(config)
-            },
-        )
-        .unwrap()
-        .unwrap();
-
-    let imm_value = imm_u(0x90005);
-    let insn_code = encode_rv32(InsnKind::LUI, 0, 0, 4, imm_value);
-    let (raw_witin, lkm) = LuiInstruction::<GoldilocksExt2>::assign_instances(
-        &config,
-        cb.cs.num_witin as usize,
-        vec![StepRecord::new_u_instruction(
-            4,
-            MOCK_PC_START,
-            insn_code,
-            Change::new(0, imm_value),
-            0,
-        )],
-    )
-    .unwrap();
-
-    MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
-}
-
-#[test]
-fn test_opcode_auipc() {
-    let mut cs = ConstraintSystem::<GoldilocksExt2>::new(|| "riscv");
-    let mut cb = CircuitBuilder::new(&mut cs);
-    let config = cb
-        .namespace(
-            || "auipc",
-            |cb| {
-                let config = AuipcInstruction::<GoldilocksExt2>::construct_circuit(cb);
-                Ok(config)
-            },
-        )
-        .unwrap()
-        .unwrap();
-
-    let imm_value = imm_u(0x90005);
-    let insn_code = encode_rv32(InsnKind::AUIPC, 0, 0, 4, imm_value);
-    let (raw_witin, lkm) = AuipcInstruction::<GoldilocksExt2>::assign_instances(
-        &config,
-        cb.cs.num_witin as usize,
-        vec![StepRecord::new_u_instruction(
-            4,
-            MOCK_PC_START,
-            insn_code,
-            Change::new(0, MOCK_PC_START.0.wrapping_add(imm_value)),
-            0,
-        )],
-    )
-    .unwrap();
-
     MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
 }
diff --git a/ceno_zkvm/src/instructions/riscv/logic_imm/logic_imm_circuit.rs b/ceno_zkvm/src/instructions/riscv/logic_imm/logic_imm_circuit.rs
index 9328e27a7..4e9bd5faa 100644
--- a/ceno_zkvm/src/instructions/riscv/logic_imm/logic_imm_circuit.rs
+++ b/ceno_zkvm/src/instructions/riscv/logic_imm/logic_imm_circuit.rs
@@ -123,7 +123,7 @@ impl<E: ExtensionField> LogicConfig<E> {
 
 #[cfg(test)]
 mod test {
-    use ceno_emul::{Change, InsnKind, PC_STEP_SIZE, StepRecord, encode_rv32};
+    use ceno_emul::{Change, InsnKind, PC_STEP_SIZE, StepRecord, encode_rv32u};
     use goldilocks::GoldilocksExt2;
 
     use crate::{
@@ -190,7 +190,7 @@ mod test {
             .unwrap()
             .unwrap();
 
-        let insn_code = encode_rv32(I::INST_KIND, 2, 0, 4, imm);
+        let insn_code = encode_rv32u(I::INST_KIND, 2, 0, 4, imm);
         let (raw_witin, lkm) = LogicInstruction::<GoldilocksExt2, I>::assign_instances(
             &config,
             cb.cs.num_witin as usize,
diff --git a/ceno_zkvm/src/instructions/riscv/memory/test.rs b/ceno_zkvm/src/instructions/riscv/memory/test.rs
index 6243f197b..4785994d3 100644
--- a/ceno_zkvm/src/instructions/riscv/memory/test.rs
+++ b/ceno_zkvm/src/instructions/riscv/memory/test.rs
@@ -74,7 +74,7 @@ fn load(mem_value: Word, insn: InsnKind, shift: u32) -> Word {
     }
 }
 
-fn impl_opcode_store<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruction<E>>(imm: u32) {
+fn impl_opcode_store<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruction<E>>(imm: i32) {
     let mut cs = ConstraintSystem::<E>::new(|| "riscv");
     let mut cb = CircuitBuilder::new(&mut cs);
     let config = cb
@@ -92,7 +92,7 @@ fn impl_opcode_store<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruct
     let prev_mem_value = 0x40302010;
     let rs2_word = Word::from(0x12345678_u32);
     let rs1_word = Word::from(0x4000000_u32);
-    let unaligned_addr = ByteAddr::from(rs1_word.wrapping_add(imm));
+    let unaligned_addr = ByteAddr::from(rs1_word.wrapping_add_signed(imm));
     let new_mem_value = match I::INST_KIND {
         InsnKind::SB => sb(prev_mem_value, rs2_word, unaligned_addr.shift()),
         InsnKind::SH => sh(prev_mem_value, rs2_word, unaligned_addr.shift()),
@@ -122,7 +122,7 @@ fn impl_opcode_store<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruct
     MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
 }
 
-fn impl_opcode_load<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruction<E>>(imm: u32) {
+fn impl_opcode_load<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instruction<E>>(imm: i32) {
     let mut cs = ConstraintSystem::<E>::new(|| "riscv");
     let mut cb = CircuitBuilder::new(&mut cs);
     let config = cb
@@ -140,7 +140,7 @@ fn impl_opcode_load<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instructi
     let mem_value = 0x40302010;
     let rs1_word = Word::from(0x4000000_u32);
     let prev_rd_word = Word::from(0x12345678_u32);
-    let unaligned_addr = ByteAddr::from(rs1_word.wrapping_add(imm));
+    let unaligned_addr = ByteAddr::from(rs1_word.wrapping_add_signed(imm));
     let new_rd_word = load(mem_value, I::INST_KIND, unaligned_addr.shift());
     let rd_change = Change {
         before: prev_rd_word,
@@ -166,16 +166,16 @@ fn impl_opcode_load<E: ExtensionField + Hash, I: RIVInstruction, Inst: Instructi
     MockProver::assert_satisfied_raw(&cb, raw_witin, &[insn_code], None, Some(lkm));
 }
 
-fn impl_opcode_sb(imm: u32) {
+fn impl_opcode_sb(imm: i32) {
     impl_opcode_store::<GoldilocksExt2, SBOp, SbInstruction<GoldilocksExt2>>(imm)
 }
 
-fn impl_opcode_sh(imm: u32) {
+fn impl_opcode_sh(imm: i32) {
     assert_eq!(imm & 0x01, 0);
     impl_opcode_store::<GoldilocksExt2, SHOp, ShInstruction<GoldilocksExt2>>(imm)
 }
 
-fn impl_opcode_sw(imm: u32) {
+fn impl_opcode_sw(imm: i32) {
     assert_eq!(imm & 0x03, 0);
     impl_opcode_store::<GoldilocksExt2, SWOp, SwInstruction<GoldilocksExt2>>(imm)
 }
@@ -187,9 +187,8 @@ fn test_sb() {
     impl_opcode_sb(10);
     impl_opcode_sb(15);
 
-    let neg_one = u32::MAX;
-    for i in 0..4 {
-        impl_opcode_sb(neg_one - i);
+    for i in -4..0 {
+        impl_opcode_sb(i);
     }
 }
 
@@ -198,9 +197,8 @@ fn test_sh() {
     impl_opcode_sh(0);
     impl_opcode_sh(2);
 
-    let neg_two = u32::MAX - 1;
-    for i in [0, 2] {
-        impl_opcode_sh(neg_two - i)
+    for i in [-4, -2] {
+        impl_opcode_sh(i)
     }
 }
 
@@ -209,8 +207,7 @@ fn test_sw() {
     impl_opcode_sw(0);
     impl_opcode_sw(4);
 
-    let neg_four = u32::MAX - 3;
-    impl_opcode_sw(neg_four);
+    impl_opcode_sw(-4);
 }
 
 #[test]
@@ -220,10 +217,8 @@ fn test_lb() {
     impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(2);
     impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(3);
 
-    let neg_one = u32::MAX;
-    // imm = -1, -2, -3
-    for i in 0..3 {
-        impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(neg_one - i);
+    for i in -3..0 {
+        impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(i);
     }
 }
 
@@ -234,10 +229,8 @@ fn test_lbu() {
     impl_opcode_load::<GoldilocksExt2, LbuOp, LbuInstruction<GoldilocksExt2>>(2);
     impl_opcode_load::<GoldilocksExt2, LbuOp, LbuInstruction<GoldilocksExt2>>(3);
 
-    let neg_one = u32::MAX;
-    // imm = -1, -2, -3
-    for i in 0..3 {
-        impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(neg_one - i);
+    for i in -3..0 {
+        impl_opcode_load::<GoldilocksExt2, LbOp, LbInstruction<GoldilocksExt2>>(i);
     }
 }
 
@@ -247,10 +240,8 @@ fn test_lh() {
     impl_opcode_load::<GoldilocksExt2, LhOp, LhInstruction<GoldilocksExt2>>(2);
     impl_opcode_load::<GoldilocksExt2, LhOp, LhInstruction<GoldilocksExt2>>(4);
 
-    let neg_two = u32::MAX - 1;
-    // imm = -2, -4
-    for i in [0, 2] {
-        impl_opcode_load::<GoldilocksExt2, LhOp, LhInstruction<GoldilocksExt2>>(neg_two - i);
+    for i in [-4, -2] {
+        impl_opcode_load::<GoldilocksExt2, LhOp, LhInstruction<GoldilocksExt2>>(i);
     }
 }
 
@@ -260,10 +251,8 @@ fn test_lhu() {
     impl_opcode_load::<GoldilocksExt2, LhuOp, LhuInstruction<GoldilocksExt2>>(2);
     impl_opcode_load::<GoldilocksExt2, LhuOp, LhuInstruction<GoldilocksExt2>>(4);
 
-    let neg_two = u32::MAX - 1;
-    // imm = -2, -4
-    for i in [0, 2] {
-        impl_opcode_load::<GoldilocksExt2, LhuOp, LhuInstruction<GoldilocksExt2>>(neg_two - i);
+    for i in [-4, -2] {
+        impl_opcode_load::<GoldilocksExt2, LhuOp, LhuInstruction<GoldilocksExt2>>(i);
     }
 }
 
@@ -271,5 +260,5 @@ fn test_lhu() {
 fn test_lw() {
     impl_opcode_load::<GoldilocksExt2, LwOp, LwInstruction<GoldilocksExt2>>(0);
     impl_opcode_load::<GoldilocksExt2, LwOp, LwInstruction<GoldilocksExt2>>(4);
-    impl_opcode_load::<GoldilocksExt2, LwOp, LwInstruction<GoldilocksExt2>>(u32::MAX - 3); // imm = -4
+    impl_opcode_load::<GoldilocksExt2, LwOp, LwInstruction<GoldilocksExt2>>(-4);
 }
diff --git a/ceno_zkvm/src/instructions/riscv/rv32im.rs b/ceno_zkvm/src/instructions/riscv/rv32im.rs
index d404cd073..a00d2c7e4 100644
--- a/ceno_zkvm/src/instructions/riscv/rv32im.rs
+++ b/ceno_zkvm/src/instructions/riscv/rv32im.rs
@@ -39,10 +39,7 @@ use std::collections::{BTreeMap, BTreeSet};
 use strum::IntoEnumIterator;
 
 use super::{
-    arith::AddInstruction,
-    branch::BltuInstruction,
-    ecall::HaltInstruction,
-    jump::{JalInstruction, LuiInstruction},
+    arith::AddInstruction, branch::BltuInstruction, ecall::HaltInstruction, jump::JalInstruction,
     memory::LwInstruction,
 };
 
@@ -88,8 +85,6 @@ pub struct Rv32imConfig<E: ExtensionField> {
     // Jump Opcodes
     pub jal_config: <JalInstruction<E> as Instruction<E>>::InstructionConfig,
     pub jalr_config: <JalrInstruction<E> as Instruction<E>>::InstructionConfig,
-    pub auipc_config: <AuipcInstruction<E> as Instruction<E>>::InstructionConfig,
-    pub lui_config: <LuiInstruction<E> as Instruction<E>>::InstructionConfig,
 
     // Memory Opcodes
     pub lw_config: <LwInstruction<E> as Instruction<E>>::InstructionConfig,
@@ -155,10 +150,8 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         let bgeu_config = cs.register_opcode_circuit::<BgeuInstruction<E>>();
 
         // jump opcodes
-        let lui_config = cs.register_opcode_circuit::<LuiInstruction<E>>();
         let jal_config = cs.register_opcode_circuit::<JalInstruction<E>>();
         let jalr_config = cs.register_opcode_circuit::<JalrInstruction<E>>();
-        let auipc_config = cs.register_opcode_circuit::<AuipcInstruction<E>>();
 
         // memory opcodes
         let lw_config = cs.register_opcode_circuit::<LwInstruction<E>>();
@@ -218,10 +211,8 @@ impl<E: ExtensionField> Rv32imConfig<E> {
             bge_config,
             bgeu_config,
             // jump opcodes
-            lui_config,
             jal_config,
             jalr_config,
-            auipc_config,
             // memory opcodes
             sw_config,
             sh_config,
@@ -287,8 +278,6 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         // jump
         fixed.register_opcode_circuit::<JalInstruction<E>>(cs);
         fixed.register_opcode_circuit::<JalrInstruction<E>>(cs);
-        fixed.register_opcode_circuit::<AuipcInstruction<E>>(cs);
-        fixed.register_opcode_circuit::<LuiInstruction<E>>(cs);
         // memory
         fixed.register_opcode_circuit::<SwInstruction<E>>(cs);
         fixed.register_opcode_circuit::<ShInstruction<E>>(cs);
@@ -318,20 +307,19 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         witness: &mut ZKVMWitnesses<E>,
         steps: Vec<StepRecord>,
     ) -> Result<GroupedSteps, ZKVMError> {
-        let mut all_records: BTreeMap<usize, Vec<StepRecord>> = InsnKind::iter()
-            .map(|insn_kind| ((insn_kind as usize), Vec::new()))
+        let mut all_records: BTreeMap<InsnKind, Vec<StepRecord>> = InsnKind::iter()
+            .map(|insn_kind| (insn_kind, Vec::new()))
             .collect();
         let mut halt_records = Vec::new();
         steps.into_iter().for_each(|record| {
-            let insn_kind = record.insn().codes().kind;
+            let insn_kind = record.insn.kind;
             match insn_kind {
                 // ecall / halt
-                EANY if record.rs1().unwrap().value == Platform::ecall_halt() => {
+                InsnKind::ECALL if record.rs1().unwrap().value == Platform::ecall_halt() => {
                     halt_records.push(record);
                 }
                 // other type of ecalls are handled by dummy ecall instruction
                 _ => {
-                    let insn_kind = insn_kind as usize;
                     // it's safe to unwrap as all_records are initialized with Vec::new()
                     all_records.get_mut(&insn_kind).unwrap().push(record);
                 }
@@ -353,7 +341,7 @@ impl<E: ExtensionField> Rv32imConfig<E> {
                 witness.assign_opcode_circuit::<$instruction>(
                     cs,
                     &self.$config,
-                    all_records.remove(&($insn_kind as usize)).unwrap(),
+                    all_records.remove(&($insn_kind)).unwrap(),
                 )?;
             };
         }
@@ -393,8 +381,6 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         // jump
         assign_opcode!(JAL, JalInstruction<E>, jal_config);
         assign_opcode!(JALR, JalrInstruction<E>, jalr_config);
-        assign_opcode!(AUIPC, AuipcInstruction<E>, auipc_config);
-        assign_opcode!(LUI, LuiInstruction<E>, lui_config);
         // memory
         assign_opcode!(LW, LwInstruction<E>, lw_config);
         assign_opcode!(LB, LbInstruction<E>, lb_config);
@@ -411,9 +397,8 @@ impl<E: ExtensionField> Rv32imConfig<E> {
         assert_eq!(
             all_records.keys().cloned().collect::<BTreeSet<_>>(),
             // these are opcodes that haven't been implemented
-            [INVALID, DIV, REM, REMU, EANY]
+            [INVALID, DIV, REM, REMU, ECALL]
                 .into_iter()
-                .map(|insn_kind| insn_kind as usize)
                 .collect::<BTreeSet<_>>(),
         );
         Ok(GroupedSteps(all_records))
@@ -439,7 +424,7 @@ impl<E: ExtensionField> Rv32imConfig<E> {
 }
 
 /// Opaque type to pass unimplemented instructions from Rv32imConfig to DummyExtraConfig.
-pub struct GroupedSteps(BTreeMap<usize, Vec<StepRecord>>);
+pub struct GroupedSteps(BTreeMap<InsnKind, Vec<StepRecord>>);
 
 /// Fake version of what is missing in Rv32imConfig, for some tests.
 pub struct DummyExtraConfig<E: ExtensionField> {
@@ -487,7 +472,7 @@ impl<E: ExtensionField> DummyExtraConfig<E> {
                 witness.assign_opcode_circuit::<$instruction>(
                     cs,
                     &self.$config,
-                    steps.remove(&($insn_kind as usize)).unwrap(),
+                    steps.remove(&($insn_kind)).unwrap(),
                 )?;
             };
         }
@@ -495,10 +480,11 @@ impl<E: ExtensionField> DummyExtraConfig<E> {
         assign_opcode!(DIV, DivDummy<E>, div_config);
         assign_opcode!(REM, RemDummy<E>, rem_config);
         assign_opcode!(REMU, RemuDummy<E>, remu_config);
-        assign_opcode!(EANY, EcallDummy<E>, ecall_config);
+        assign_opcode!(ECALL, EcallDummy<E>, ecall_config);
 
-        let _ = steps.remove(&(INVALID as usize));
-        assert!(steps.is_empty());
+        let _ = steps.remove(&INVALID);
+        let keys: Vec<&InsnKind> = steps.keys().collect::<Vec<_>>();
+        assert!(steps.is_empty(), "unimplemented opcodes: {:?}", keys);
         Ok(())
     }
 }
diff --git a/ceno_zkvm/src/instructions/riscv/shift_imm.rs b/ceno_zkvm/src/instructions/riscv/shift_imm.rs
index 50ebe4ff0..b16dd5bf2 100644
--- a/ceno_zkvm/src/instructions/riscv/shift_imm.rs
+++ b/ceno_zkvm/src/instructions/riscv/shift_imm.rs
@@ -188,7 +188,7 @@ impl<E: ExtensionField, I: RIVInstruction> Instruction<E> for ShiftImmInstructio
 
 #[cfg(test)]
 mod test {
-    use ceno_emul::{Change, InsnKind, PC_STEP_SIZE, StepRecord, encode_rv32};
+    use ceno_emul::{Change, InsnKind, PC_STEP_SIZE, StepRecord, encode_rv32u};
     use goldilocks::GoldilocksExt2;
 
     use super::{ShiftImmInstruction, SlliOp, SraiOp, SrliOp};
@@ -255,17 +255,17 @@ mod test {
         let (prefix, insn_code, rd_written) = match I::INST_KIND {
             InsnKind::SLLI => (
                 "SLLI",
-                encode_rv32(InsnKind::SLLI, 2, 0, 4, imm),
+                encode_rv32u(InsnKind::SLLI, 2, 0, 4, imm),
                 rs1_read << imm,
             ),
             InsnKind::SRAI => (
                 "SRAI",
-                encode_rv32(InsnKind::SRAI, 2, 0, 4, imm),
+                encode_rv32u(InsnKind::SRAI, 2, 0, 4, imm),
                 (rs1_read as i32 >> imm as i32) as u32,
             ),
             InsnKind::SRLI => (
                 "SRLI",
-                encode_rv32(InsnKind::SRLI, 2, 0, 4, imm),
+                encode_rv32u(InsnKind::SRLI, 2, 0, 4, imm),
                 rs1_read >> imm,
             ),
             _ => unreachable!(),
diff --git a/ceno_zkvm/src/instructions/riscv/slti.rs b/ceno_zkvm/src/instructions/riscv/slti.rs
index a0766b425..cbab2fe16 100644
--- a/ceno_zkvm/src/instructions/riscv/slti.rs
+++ b/ceno_zkvm/src/instructions/riscv/slti.rs
@@ -228,7 +228,7 @@ mod test {
         let mut cs = ConstraintSystem::<GoldilocksExt2>::new(|| "riscv");
         let mut cb = CircuitBuilder::new(&mut cs);
 
-        let insn_code = encode_rv32(I::INST_KIND, 2, 0, 4, imm as u32);
+        let insn_code = encode_rv32(I::INST_KIND, 2, 0, 4, imm);
 
         let config = cb
             .namespace(
diff --git a/ceno_zkvm/src/instructions/riscv/test_utils.rs b/ceno_zkvm/src/instructions/riscv/test_utils.rs
deleted file mode 100644
index 416ce628c..000000000
--- a/ceno_zkvm/src/instructions/riscv/test_utils.rs
+++ /dev/null
@@ -1,23 +0,0 @@
-pub fn imm_b(imm: i32) -> u32 {
-    // imm is 13 bits in B-type
-    imm_with_max_valid_bits(imm, 13)
-}
-
-pub fn imm_i(imm: i32) -> u32 {
-    // imm is 12 bits in I-type
-    imm_with_max_valid_bits(imm, 12)
-}
-
-pub fn imm_j(imm: i32) -> u32 {
-    // imm is 21 bits in J-type
-    imm_with_max_valid_bits(imm, 21)
-}
-
-fn imm_with_max_valid_bits(imm: i32, bits: u32) -> u32 {
-    imm as u32 & !(u32::MAX << bits)
-}
-
-pub fn imm_u(imm: u32) -> u32 {
-    // valid imm is imm[12:31] in U-type
-    imm << 12
-}
diff --git a/ceno_zkvm/src/instructions/riscv/u_insn.rs b/ceno_zkvm/src/instructions/riscv/u_insn.rs
deleted file mode 100644
index 719ee2df1..000000000
--- a/ceno_zkvm/src/instructions/riscv/u_insn.rs
+++ /dev/null
@@ -1,65 +0,0 @@
-use ceno_emul::{InsnKind, StepRecord};
-use ff_ext::ExtensionField;
-
-use crate::{
-    chip_handler::RegisterExpr,
-    circuit_builder::CircuitBuilder,
-    error::ZKVMError,
-    expression::{Expression, ToExpr},
-    instructions::riscv::insn_base::{StateInOut, WriteRD},
-    tables::InsnRecord,
-    witness::LkMultiplicity,
-};
-
-/// This config handles the common part of the U-type instruction:
-/// - PC, cycle, fetch
-/// - Register access
-///
-/// It does not witness the output rd value or instruction immediate
-#[derive(Debug)]
-pub struct UInstructionConfig<E: ExtensionField> {
-    pub vm_state: StateInOut<E>,
-    pub rd: WriteRD<E>,
-}
-
-impl<E: ExtensionField> UInstructionConfig<E> {
-    pub fn construct_circuit(
-        circuit_builder: &mut CircuitBuilder<E>,
-        insn_kind: InsnKind,
-        imm: &Expression<E>,
-        rd_written: RegisterExpr<E>,
-    ) -> Result<Self, ZKVMError> {
-        // State in and out
-        let vm_state = StateInOut::construct_circuit(circuit_builder, false)?;
-
-        // Registers
-        let rd = WriteRD::construct_circuit(circuit_builder, rd_written, vm_state.ts)?;
-
-        // Fetch instruction
-        circuit_builder.lk_fetch(&InsnRecord::new(
-            vm_state.pc.expr(),
-            insn_kind.into(),
-            Some(rd.id.expr()),
-            0.into(),
-            0.into(),
-            imm.clone(),
-        ))?;
-
-        Ok(UInstructionConfig { vm_state, rd })
-    }
-
-    pub fn assign_instance(
-        &self,
-        instance: &mut [<E as ExtensionField>::BaseField],
-        lk_multiplicity: &mut LkMultiplicity,
-        step: &StepRecord,
-    ) -> Result<(), ZKVMError> {
-        self.vm_state.assign_instance(instance, step)?;
-        self.rd.assign_instance(instance, lk_multiplicity, step)?;
-
-        // Fetch the instruction.
-        lk_multiplicity.fetch(step.pc().before.0);
-
-        Ok(())
-    }
-}
diff --git a/ceno_zkvm/src/scheme/mock_prover.rs b/ceno_zkvm/src/scheme/mock_prover.rs
index 21b6d4c05..d2ce320ee 100644
--- a/ceno_zkvm/src/scheme/mock_prover.rs
+++ b/ceno_zkvm/src/scheme/mock_prover.rs
@@ -17,7 +17,7 @@ use crate::{
 };
 use ark_std::test_rng;
 use base64::{Engine, engine::general_purpose::STANDARD_NO_PAD};
-use ceno_emul::{ByteAddr, CENO_PLATFORM, PC_WORD_SIZE, Program};
+use ceno_emul::{ByteAddr, CENO_PLATFORM, Program};
 use ff::Field;
 use ff_ext::ExtensionField;
 use generic_static::StaticTypeMap;
@@ -26,7 +26,7 @@ use itertools::{Itertools, enumerate, izip};
 use multilinear_extensions::{mle::IntoMLEs, virtual_poly_v2::ArcMultilinearExtension};
 use rand::thread_rng;
 use std::{
-    collections::{BTreeMap, HashMap, HashSet},
+    collections::{HashMap, HashSet},
     fs::File,
     hash::Hash,
     io::{BufReader, ErrorKind},
@@ -400,7 +400,7 @@ impl<'a, E: ExtensionField + Hash> MockProver<E> {
     pub fn run(
         cb: &CircuitBuilder<E>,
         wits_in: &[ArcMultilinearExtension<'a, E>],
-        programs: &[u32],
+        programs: &[ceno_emul::Instruction],
         lkm: Option<LkMultiplicity>,
     ) -> Result<(), Vec<MockProverError<E>>> {
         Self::run_maybe_challenge(cb, wits_in, programs, &[], None, lkm)
@@ -409,33 +409,16 @@ impl<'a, E: ExtensionField + Hash> MockProver<E> {
     fn run_maybe_challenge(
         cb: &CircuitBuilder<E>,
         wits_in: &[ArcMultilinearExtension<'a, E>],
-        input_programs: &[u32],
+        input_programs: &[ceno_emul::Instruction],
         pi: &[ArcMultilinearExtension<'a, E>],
         challenge: Option<[E; 2]>,
         lkm: Option<LkMultiplicity>,
     ) -> Result<(), Vec<MockProverError<E>>> {
-        // fix the program table
-        let instructions = input_programs
-            .iter()
-            .cloned()
-            .chain(std::iter::repeat(0))
-            .take(MOCK_PROGRAM_SIZE)
-            .collect_vec();
-        let image = instructions
-            .iter()
-            .enumerate()
-            .map(|(insn_idx, &insn)| {
-                (
-                    CENO_PLATFORM.pc_base() + (insn_idx * PC_WORD_SIZE) as u32,
-                    insn,
-                )
-            })
-            .collect::<BTreeMap<u32, u32>>();
         let program = Program::new(
             CENO_PLATFORM.pc_base(),
             CENO_PLATFORM.pc_base(),
-            instructions,
-            image,
+            input_programs.to_vec(),
+            Default::default(),
         );
 
         // load tables
@@ -667,7 +650,7 @@ impl<'a, E: ExtensionField + Hash> MockProver<E> {
     pub fn assert_with_expected_errors(
         cb: &CircuitBuilder<E>,
         wits_in: &[ArcMultilinearExtension<'a, E>],
-        programs: &[u32],
+        programs: &[ceno_emul::Instruction],
         constraint_names: &[&str],
         challenge: Option<[E; 2]>,
         lkm: Option<LkMultiplicity>,
@@ -719,7 +702,7 @@ Hints:
     pub fn assert_satisfied_raw(
         cb: &CircuitBuilder<E>,
         raw_witin: RowMajorMatrix<E::BaseField>,
-        programs: &[u32],
+        programs: &[ceno_emul::Instruction],
         challenge: Option<[E; 2]>,
         lkm: Option<LkMultiplicity>,
     ) {
@@ -734,7 +717,7 @@ Hints:
     pub fn assert_satisfied(
         cb: &CircuitBuilder<E>,
         wits_in: &[ArcMultilinearExtension<'a, E>],
-        programs: &[u32],
+        programs: &[ceno_emul::Instruction],
         challenge: Option<[E; 2]>,
         lkm: Option<LkMultiplicity>,
     ) {
diff --git a/ceno_zkvm/src/scheme/tests.rs b/ceno_zkvm/src/scheme/tests.rs
index 945e49730..8a97ff437 100644
--- a/ceno_zkvm/src/scheme/tests.rs
+++ b/ceno_zkvm/src/scheme/tests.rs
@@ -3,8 +3,8 @@ use std::marker::PhantomData;
 use ark_std::test_rng;
 use ceno_emul::{
     CENO_PLATFORM,
-    InsnKind::{ADD, EANY},
-    PC_WORD_SIZE, Platform, Program, StepRecord, VMState,
+    InsnKind::{ADD, ECALL},
+    Platform, Program, StepRecord, VMState, encode_rv32,
 };
 use ff::Field;
 use ff_ext::ExtensionField;
@@ -18,7 +18,6 @@ use transcript::{BasicTranscript, Transcript};
 
 use crate::{
     circuit_builder::CircuitBuilder,
-    declare_program,
     error::ZKVMError,
     expression::{ToExpr, WitIn},
     instructions::{
@@ -187,23 +186,12 @@ fn test_rw_lk_expression_combination() {
     test_rw_lk_expression_combination_inner::<17, 61>();
 }
 
-const PROGRAM_SIZE: usize = 4;
-#[allow(clippy::unusual_byte_groupings)]
-const ECALL_HALT: u32 = 0b_000000000000_00000_000_00000_1110011;
-#[allow(clippy::unusual_byte_groupings)]
-const PROGRAM_CODE: [u32; PROGRAM_SIZE] = {
-    let mut program: [u32; PROGRAM_SIZE] = [ECALL_HALT; PROGRAM_SIZE];
-
-    declare_program!(
-        program,
-        // func7   rs2   rs1   f3  rd    opcode
-        0b_0000000_00100_00001_000_00100_0110011, // add x4, x4, x1 <=> addi x4, x4, 1
-        ECALL_HALT,                               // ecall halt
-        ECALL_HALT,                               // ecall halt
-        ECALL_HALT,                               // ecall halt
-    );
-    program
-};
+const PROGRAM_CODE: [ceno_emul::Instruction; 4] = [
+    encode_rv32(ADD, 4, 1, 4, 0),
+    encode_rv32(ECALL, 0, 0, 0, 0),
+    encode_rv32(ECALL, 0, 0, 0, 0),
+    encode_rv32(ECALL, 0, 0, 0, 0),
+];
 
 #[ignore = "this case is already tested in riscv_example as ecall_halt has only one instance"]
 #[test]
@@ -216,16 +204,7 @@ fn test_single_add_instance_e2e() {
         CENO_PLATFORM.pc_base(),
         CENO_PLATFORM.pc_base(),
         PROGRAM_CODE.to_vec(),
-        PROGRAM_CODE
-            .iter()
-            .enumerate()
-            .map(|(insn_idx, &insn)| {
-                (
-                    (insn_idx * PC_WORD_SIZE) as u32 + CENO_PLATFORM.pc_base(),
-                    insn,
-                )
-            })
-            .collect(),
+        Default::default(),
     );
 
     let pcs_param = Pcs::setup(1 << MAX_NUM_VARIABLES).expect("Basefold PCS setup");
@@ -271,10 +250,10 @@ fn test_single_add_instance_e2e() {
     let mut add_records = vec![];
     let mut halt_records = vec![];
     all_records.into_iter().for_each(|record| {
-        let kind = record.insn().codes().kind;
+        let kind = record.insn().kind;
         match kind {
             ADD => add_records.push(record),
-            EANY => {
+            ECALL => {
                 if record.rs1().unwrap().value == Platform::ecall_halt() {
                     halt_records.push(record);
                 }
diff --git a/ceno_zkvm/src/tables/program.rs b/ceno_zkvm/src/tables/program.rs
index a5ae2ad7f..5695ee247 100644
--- a/ceno_zkvm/src/tables/program.rs
+++ b/ceno_zkvm/src/tables/program.rs
@@ -13,27 +13,13 @@ use crate::{
     witness::RowMajorMatrix,
 };
 use ceno_emul::{
-    DecodedInstruction, InsnCodes, InsnFormat::*, InsnKind::*, PC_STEP_SIZE, Program, WORD_SIZE,
+    InsnFormat, InsnFormat::*, InsnKind::*, Instruction, PC_STEP_SIZE, Program, WORD_SIZE,
 };
 use ff_ext::ExtensionField;
 use goldilocks::SmallField;
 use itertools::Itertools;
 use rayon::iter::{IndexedParallelIterator, IntoParallelIterator, ParallelIterator};
 
-#[macro_export]
-macro_rules! declare_program {
-    ($program:ident, $($instr:expr),* $(,)?) => {
-
-        {
-            let mut _i = 0;
-            $(
-                $program[_i] = $instr;
-                _i += 1;
-            )*
-        }
-    };
-}
-
 /// This structure establishes the order of the fields in instruction records, common to the program table and circuit fetches.
 #[derive(Clone, Debug)]
 pub struct InsnRecord<T>([T; 6]);
@@ -43,7 +29,7 @@ impl<T> InsnRecord<T> {
     where
         T: From<u32>,
     {
-        let rd = rd.unwrap_or_else(|| T::from(DecodedInstruction::RD_NULL));
+        let rd = rd.unwrap_or_else(|| T::from(Instruction::RD_NULL));
         InsnRecord([pc, kind, rd, rs1, rs2, imm_internal])
     }
 
@@ -53,10 +39,10 @@ impl<T> InsnRecord<T> {
 }
 
 impl<F: SmallField> InsnRecord<F> {
-    fn from_decoded(pc: u32, insn: &DecodedInstruction) -> Self {
+    fn from_decoded(pc: u32, insn: &Instruction) -> Self {
         InsnRecord([
             (pc as u64).into(),
-            (insn.codes().kind as u64).into(),
+            (insn.kind as u64).into(),
             (insn.rd_internal() as u64).into(),
             (insn.rs1_or_zero() as u64).into(),
             (insn.rs2_or_zero() as u64).into(),
@@ -73,25 +59,17 @@ impl InsnRecord<()> {
     /// - `as u32` and `as i32` as usual.
     /// - `i64_to_base(imm)` gives the field element going into the program table.
     /// - `as u64` in unsigned cases.
-    pub fn imm_internal(insn: &DecodedInstruction) -> i64 {
-        let imm: u32 = insn.immediate();
-        match insn.codes() {
+    pub fn imm_internal(insn: &Instruction) -> i64 {
+        match (insn.kind, InsnFormat::from(insn.kind)) {
             // Prepare the immediate for ShiftImmInstruction.
             // The shift is implemented as a multiplication/division by 1 << immediate.
-            InsnCodes {
-                kind: SLLI | SRLI | SRAI,
-                ..
-            } => 1 << (imm & 0x1F),
+            (SLLI | SRLI | SRAI, _) => 1 << insn.imm,
             // Unsigned view.
             // For example, u32::MAX is `u32::MAX mod p` in the finite field.
-            InsnCodes { format: R | U, .. }
-            | InsnCodes {
-                kind: ADDI | SLTIU | ANDI | XORI | ORI,
-                ..
-            } => imm as u64 as i64,
+            (_, R | U) | (ADDI | SLTIU | ANDI | XORI | ORI, _) => insn.imm as u32 as i64,
             // Signed view.
             // For example, u32::MAX is `-1 mod p` in the finite field.
-            _ => imm as i32 as i64,
+            _ => insn.imm as i64,
         }
     }
 }
@@ -171,7 +149,7 @@ impl<E: ExtensionField> TableCircuit<E> for ProgramTableCircuit<E> {
             .zip((0..num_instructions).into_par_iter())
             .for_each(|(row, i)| {
                 let pc = pc_base + (i * PC_STEP_SIZE) as u32;
-                let insn = DecodedInstruction::new(program.instructions[i]);
+                let insn = program.instructions[i];
                 let values: InsnRecord<_> = InsnRecord::from_decoded(pc, &insn);
 
                 // Copy all the fields.
@@ -222,29 +200,6 @@ mod tests {
     use ff::Field;
     use goldilocks::{Goldilocks as F, GoldilocksExt2 as E};
 
-    #[test]
-    #[allow(clippy::identity_op)]
-    fn test_decode_imm() {
-        for (i, expected) in [
-            // Example of I-type: ADDI.
-            // imm    | rs1     | funct3      | rd     | opcode
-            (89 << 20 | 1 << 15 | 0b000 << 12 | 1 << 7 | 0x13, 89),
-            // Shifts get a precomputed power of 2: SLLI, SRLI, SRAI.
-            (31 << 20 | 1 << 15 | 0b001 << 12 | 1 << 7 | 0x13, 1 << 31),
-            (31 << 20 | 1 << 15 | 0b101 << 12 | 1 << 7 | 0x13, 1 << 31),
-            (
-                1 << 30 | 31 << 20 | 1 << 15 | 0b101 << 12 | 1 << 7 | 0x13,
-                1 << 31,
-            ),
-            // Example of R-type with funct7: SUB.
-            // funct7     | rs2    | rs1     | funct3      | rd     | opcode
-            (0x20 << 25 | 1 << 20 | 1 << 15 | 0 << 12 | 1 << 7 | 0x33, 0),
-        ] {
-            let imm = InsnRecord::imm_internal(&DecodedInstruction::new(i));
-            assert_eq!(imm, expected);
-        }
-    }
-
     #[test]
     fn test_program_padding() {
         let mut cs = ConstraintSystem::<E>::new(|| "riscv");
diff --git a/clippy.toml b/clippy.toml
index 6e64e6f38..87fda2227 100644
--- a/clippy.toml
+++ b/clippy.toml
@@ -2,9 +2,10 @@
 # Eg removing syn is blocked by ark-ff-asm cutting a new release
 # (https://github.com/arkworks-rs/algebra/issues/813) amongst other things.
 allowed-duplicate-crates = [
-  "syn",
-  "windows-sys",
+  "hashbrown",
+  "itertools",
   "regex-automata",
   "regex-syntax",
-  "itertools",
+  "syn",
+  "windows-sys",
 ]

From af836bfaf945479a57ec14f6385d9067c55adaa5 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matthias=20G=C3=B6rgens?= <matthias.goergens@gmail.com>
Date: Fri, 13 Dec 2024 13:22:42 +0800
Subject: [PATCH 3/5] Make `cargo build` work (#719)

Right now a simple `cargo build` fails.  So let's fix that.

This PR also disentangles `ceno_rt`, so that we can work on building
guest programs much easier.

Update: I extracted the remove of the target matrix into
https://github.com/scroll-tech/ceno/pull/725 for ease of review.

This PR also enables debug assertions for our tests.
---
 .github/workflows/integration.yml             |  3 +-
 .github/workflows/lints.yml                   |  8 +-
 .github/workflows/tests.yml                   |  2 +-
 Cargo.lock                                    | 87 ++++---------------
 Cargo.toml                                    | 10 ++-
 Makefile.toml                                 |  7 --
 README.md                                     |  3 +-
 ceno_zkvm/Makefile.toml                       |  4 +-
 examples-builder/build.rs                     |  8 +-
 examples/.cargo/config.toml                   |  1 -
 examples/Cargo.toml                           |  9 --
 examples/rust-toolchain.toml                  |  1 -
 {ceno_rt => guest}/.cargo/config.toml         |  0
 {examples => guest}/Cargo.lock                | 12 +--
 guest/Cargo.toml                              | 15 ++++
 {ceno_rt => guest/ceno_rt}/Cargo.toml         |  0
 {ceno_rt => guest/ceno_rt}/README.md          |  0
 {ceno_rt => guest/ceno_rt}/build.rs           |  0
 {ceno_rt => guest/ceno_rt}/ceno_link.x        |  0
 {ceno_rt => guest/ceno_rt}/memory.x           |  0
 {ceno_rt => guest/ceno_rt}/src/allocator.rs   |  0
 {ceno_rt => guest/ceno_rt}/src/io.rs          |  0
 {ceno_rt => guest/ceno_rt}/src/lib.rs         |  0
 {ceno_rt => guest/ceno_rt}/src/params.rs      |  0
 guest/examples/Cargo.toml                     | 13 +++
 .../examples}/examples/ceno_rt_alloc.rs       |  0
 .../examples}/examples/ceno_rt_io.rs          |  0
 .../examples}/examples/ceno_rt_mem.rs         |  0
 .../examples}/examples/ceno_rt_mini.rs        |  0
 .../examples}/examples/ceno_rt_panic.rs       |  0
 mpcs/src/basefold.rs                          |  2 -
 31 files changed, 73 insertions(+), 112 deletions(-)
 delete mode 120000 examples/.cargo/config.toml
 delete mode 100644 examples/Cargo.toml
 delete mode 120000 examples/rust-toolchain.toml
 rename {ceno_rt => guest}/.cargo/config.toml (100%)
 rename {examples => guest}/Cargo.lock (88%)
 create mode 100644 guest/Cargo.toml
 rename {ceno_rt => guest/ceno_rt}/Cargo.toml (100%)
 rename {ceno_rt => guest/ceno_rt}/README.md (100%)
 rename {ceno_rt => guest/ceno_rt}/build.rs (100%)
 rename {ceno_rt => guest/ceno_rt}/ceno_link.x (100%)
 rename {ceno_rt => guest/ceno_rt}/memory.x (100%)
 rename {ceno_rt => guest/ceno_rt}/src/allocator.rs (100%)
 rename {ceno_rt => guest/ceno_rt}/src/io.rs (100%)
 rename {ceno_rt => guest/ceno_rt}/src/lib.rs (100%)
 rename {ceno_rt => guest/ceno_rt}/src/params.rs (100%)
 create mode 100644 guest/examples/Cargo.toml
 rename {examples => guest/examples}/examples/ceno_rt_alloc.rs (100%)
 rename {examples => guest/examples}/examples/ceno_rt_io.rs (100%)
 rename {examples => guest/examples}/examples/ceno_rt_mem.rs (100%)
 rename {examples => guest/examples}/examples/ceno_rt_mini.rs (100%)
 rename {examples => guest/examples}/examples/ceno_rt_panic.rs (100%)

diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index b882ca931..934fe3f3e 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -43,18 +43,17 @@ jobs:
             ~/.cargo/registry/cache/
             ~/.cargo/git/db/
             target/
+            guest/target/
           key: integration-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
       - uses: dtolnay/rust-toolchain@nightly
 
       - name: Run example
         env:
-          RAYON_NUM_THREADS: 2
           RUSTFLAGS: "-C opt-level=3"
         run: cargo run --package ceno_zkvm --example riscv_opcodes -- --start 10 --end 11
 
       - name: Run fibonacci
         env:
-          RAYON_NUM_THREADS: 8
           RUST_LOG: debug
           RUSTFLAGS: "-C opt-level=3"
         run: cargo run --package ceno_zkvm --bin e2e -- --platform=sp1 ceno_zkvm/examples/fibonacci.elf
diff --git a/.github/workflows/lints.yml b/.github/workflows/lints.yml
index fe7367a54..207769aa8 100644
--- a/.github/workflows/lints.yml
+++ b/.github/workflows/lints.yml
@@ -49,7 +49,7 @@ jobs:
             ~/.cargo/registry/cache/
             ~/.cargo/git/db/
             target/
-            examples/target/
+            guest/target/
           key: lint-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Install cargo make
@@ -62,13 +62,13 @@ jobs:
         env:
           RUSTFLAGS: "-Dwarnings"
         run: |
-          cargo check --workspace --all-targets --exclude ceno_rt
+          cargo check --workspace --all-targets
           # We have a lot of code under #[cfg(not(debug_assertions))] and similar,
           # so we need to run cargo check in release mode, too:
-          cargo check --workspace --all-targets --exclude ceno_rt --release
+          cargo check --workspace --all-targets --release
           cargo make clippy
           # Same for clippy:
-          cargo clippy --workspace --all-targets --exclude ceno_rt --release
+          cargo clippy --workspace --all-targets --release
 
       - name: Install taplo
         run: taplo --version || cargo install taplo-cli
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index da0784782..e7d224995 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -47,7 +47,7 @@ jobs:
             ~/.cargo/registry/cache/
             ~/.cargo/git/db/
             target/
-            examples/target/
+            guest/target/
           key: tests-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
 
       - name: Install cargo make
diff --git a/Cargo.lock b/Cargo.lock
index 865b783f5..35ff0505a 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -244,13 +244,6 @@ dependencies = [
  "tracing",
 ]
 
-[[package]]
-name = "ceno_rt"
-version = "0.1.0"
-dependencies = [
- "riscv",
-]
-
 [[package]]
 name = "ceno_zkvm"
 version = "0.1.0"
@@ -468,12 +461,6 @@ dependencies = [
  "itertools 0.10.5",
 ]
 
-[[package]]
-name = "critical-section"
-version = "1.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "790eea4361631c5e7d22598ecd5723ff611904e3344ce8720784c93e3d83d40b"
-
 [[package]]
 name = "crossbeam-channel"
 version = "0.5.13"
@@ -602,12 +589,6 @@ version = "0.7.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4445909572dbd556c457c849c4ca58623d84b27c8fff1e74b0b4227d8b90d17b"
 
-[[package]]
-name = "embedded-hal"
-version = "1.0.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "361a90feb7004eca4019fb28352a9465666b24f840f5c3cddf0ff13920590b89"
-
 [[package]]
 name = "encode_unicode"
 version = "1.0.0"
@@ -655,9 +636,9 @@ dependencies = [
 
 [[package]]
 name = "fastrand"
-version = "2.2.0"
+version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "486f806e73c5707928240ddc295403b1b93c96a02038563881c4a2fd84b81ac4"
+checksum = "37909eebbb50d72f9059c3b6d82c0463f2ff062c9e95845c43a6c9c0355411be"
 
 [[package]]
 name = "ff"
@@ -973,9 +954,9 @@ dependencies = [
 
 [[package]]
 name = "libc"
-version = "0.2.167"
+version = "0.2.168"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "09d6582e104315a817dff97f75133544b2e094ee22447d2acf4a74e189ba06fc"
+checksum = "5aaeb2981e0606ca11d79718f8bb01164f1d6ed75080182d3abf017e6d244b6d"
 
 [[package]]
 name = "libredox"
@@ -1416,7 +1397,7 @@ dependencies = [
  "smallvec",
  "symbolic-demangle",
  "tempfile",
- "thiserror 2.0.4",
+ "thiserror 2.0.6",
 ]
 
 [[package]]
@@ -1548,9 +1529,9 @@ dependencies = [
 
 [[package]]
 name = "redox_syscall"
-version = "0.5.7"
+version = "0.5.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f"
+checksum = "03a862b389f93e68874fbf580b9de08dd02facb9a788ebadaf4a3fd33cf58834"
 dependencies = [
  "bitflags",
 ]
@@ -1619,36 +1600,6 @@ dependencies = [
  "bytemuck",
 ]
 
-[[package]]
-name = "riscv"
-version = "0.12.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5ea8ff73d3720bdd0a97925f0bf79ad2744b6da8ff36be3840c48ac81191d7a7"
-dependencies = [
- "critical-section",
- "embedded-hal",
- "paste",
- "riscv-macros",
- "riscv-pac",
-]
-
-[[package]]
-name = "riscv-macros"
-version = "0.1.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "f265be5d634272320a7de94cea15c22a3bfdd4eb42eb43edc528415f066a1f25"
-dependencies = [
- "proc-macro2",
- "quote",
- "syn 2.0.90",
-]
-
-[[package]]
-name = "riscv-pac"
-version = "0.2.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
-
 [[package]]
 name = "rrs-succinct"
 version = "0.1.0"
@@ -1668,15 +1619,15 @@ checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
 
 [[package]]
 name = "rustix"
-version = "0.38.41"
+version = "0.38.42"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "d7f649912bc1495e167a6edee79151c84b1bad49748cb4f1f1167f459f6224f6"
+checksum = "f93dc38ecbab2eb790ff964bb77fa94faf256fd3e73285fd7ba0903b76bedb85"
 dependencies = [
  "bitflags",
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys 0.52.0",
+ "windows-sys 0.59.0",
 ]
 
 [[package]]
@@ -1708,18 +1659,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
 
 [[package]]
 name = "serde"
-version = "1.0.215"
+version = "1.0.216"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f"
+checksum = "0b9781016e935a97e8beecf0c933758c97a5520d32930e460142b4cd80c6338e"
 dependencies = [
  "serde_derive",
 ]
 
 [[package]]
 name = "serde_derive"
-version = "1.0.215"
+version = "1.0.216"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0"
+checksum = "46f859dbbf73865c6627ed570e78961cd3ac92407a2d117204c49232485da55e"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -1918,11 +1869,11 @@ dependencies = [
 
 [[package]]
 name = "thiserror"
-version = "2.0.4"
+version = "2.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "2f49a1853cf82743e3b7950f77e0f4d622ca36cf4317cba00c767838bac8d490"
+checksum = "8fec2a1820ebd077e2b90c4df007bebf344cd394098a13c563957d0afc83ea47"
 dependencies = [
- "thiserror-impl 2.0.4",
+ "thiserror-impl 2.0.6",
 ]
 
 [[package]]
@@ -1938,9 +1889,9 @@ dependencies = [
 
 [[package]]
 name = "thiserror-impl"
-version = "2.0.4"
+version = "2.0.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "8381894bb3efe0c4acac3ded651301ceee58a15d47c2e34885ed1908ad667061"
+checksum = "d65750cab40f4ff1929fb1ba509e9914eb756131cef4210da8d5d700d26f6312"
 dependencies = [
  "proc-macro2",
  "quote",
diff --git a/Cargo.toml b/Cargo.toml
index ead19b880..93cabd8d9 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -1,9 +1,8 @@
 [workspace]
-exclude = ["examples"]
+exclude = ["guest"]
 members = [
   "ceno_emul",
   "examples-builder",
-  "ceno_rt",
   "mpcs",
   "multilinear_extensions",
   "sumcheck",
@@ -54,6 +53,13 @@ tracing-subscriber = { version = "0.3", features = ["env-filter"] }
 
 [profile.dev]
 lto = "thin"
+# We are running our tests with optimizations turned on to make them faster.
+# Please turn optimizations off, when you want accurate stack traces for debugging.
+opt-level = 2
+
+[profile.dev.package."*"]
+# Set the default for dependencies in Development mode.
+opt-level = 3
 
 [profile.release]
 lto = "thin"
diff --git a/Makefile.toml b/Makefile.toml
index 4a5a93188..199a42528 100644
--- a/Makefile.toml
+++ b/Makefile.toml
@@ -1,7 +1,5 @@
 [env]
 CARGO_MAKE_EXTEND_WORKSPACE_MAKEFILE = true
-CORE = { script = ["nproc"] }
-RAYON_NUM_THREADS = "${CORE}"
 
 [tasks.tests]
 args = [
@@ -11,10 +9,7 @@ args = [
   "--bins",
   "--tests",
   "--examples",
-  "--release",
   "--workspace",
-  "--exclude",
-  "ceno_rt",
 ]
 command = "cargo"
 workspace = false
@@ -34,8 +29,6 @@ args = [
   "clippy",
   "--workspace",
   "--all-targets",
-  "--exclude",
-  "ceno_rt",
   "--",
   "-D",
   "warnings",
diff --git a/README.md b/README.md
index 82f2e8bfe..0c57ac442 100644
--- a/README.md
+++ b/README.md
@@ -33,10 +33,9 @@ Clippy and check work as usual:
 ```sh
 cargo check
 cargo clippy
+cargo build
 ```
 
-Alas, `cargo build` doesn't work. That's a known problem and we're working on it.  Please use `cargo make build` instead for now.
-
 ### Setting up self-hosted CI docker container
 
 To set up docker container for CI, you can run the following command:
diff --git a/ceno_zkvm/Makefile.toml b/ceno_zkvm/Makefile.toml
index a4965ba50..45d27cf51 100644
--- a/ceno_zkvm/Makefile.toml
+++ b/ceno_zkvm/Makefile.toml
@@ -1,9 +1,7 @@
 [env]
 CARGO_MAKE_EXTEND_WORKSPACE_MAKEFILE = true
-CORE = { script = ["nproc"] }
-RAYON_NUM_THREADS = "${CORE}"
 
 [tasks.riscv_opcodes_flamegraph]
 args = ["run", "--package", "ceno_zkvm", "--release", "--example", "riscv_opcodes"]
 command = "cargo"
-env = { "RUST_LOG" = "debug", "RAYON_NUM_THREADS" = "8" }
+env = { "RUST_LOG" = "debug" }
diff --git a/examples-builder/build.rs b/examples-builder/build.rs
index 7ea9b9628..0e8d90441 100644
--- a/examples-builder/build.rs
+++ b/examples-builder/build.rs
@@ -26,7 +26,7 @@ fn build_elfs() {
     // See git history for an attempt to do this.
     let output = Command::new("cargo")
         .args(["build", "--release", "--examples"])
-        .current_dir("../examples")
+        .current_dir("../guest/examples")
         .env_clear()
         .envs(std::env::vars().filter(|x| !x.0.starts_with("CARGO_")))
         .output()
@@ -41,11 +41,11 @@ fn build_elfs() {
             dest,
             r#"#[allow(non_upper_case_globals)]
             pub const {example}: &[u8] =
-                include_bytes!(r"{CARGO_MANIFEST_DIR}/../examples/target/riscv32im-unknown-none-elf/release/examples/{example}");"#
+                include_bytes!(r"{CARGO_MANIFEST_DIR}/../guest/target/riscv32im-unknown-none-elf/release/examples/{example}");"#
         ).expect("failed to write vars.rs");
     }
-    let input_path = "../examples/";
-    let elfs_path = "../examples/target/riscv32im-unknown-none-elf/release/examples/";
+    let input_path = "../guest/";
+    let elfs_path = "../guest/target/riscv32im-unknown-none-elf/release/examples/";
 
     println!("cargo:rerun-if-changed={input_path}");
     println!("cargo:rerun-if-changed={elfs_path}");
diff --git a/examples/.cargo/config.toml b/examples/.cargo/config.toml
deleted file mode 120000
index bade0c189..000000000
--- a/examples/.cargo/config.toml
+++ /dev/null
@@ -1 +0,0 @@
-../../ceno_rt/.cargo/config.toml
\ No newline at end of file
diff --git a/examples/Cargo.toml b/examples/Cargo.toml
deleted file mode 100644
index 85c975ac1..000000000
--- a/examples/Cargo.toml
+++ /dev/null
@@ -1,9 +0,0 @@
-[package]
-edition = "2021"
-name = "examples"
-readme = "README.md"
-resolver = "2"
-version = "0.1.0"
-
-[dependencies]
-ceno_rt = { path = "../ceno_rt" }
diff --git a/examples/rust-toolchain.toml b/examples/rust-toolchain.toml
deleted file mode 120000
index 43fe0ada6..000000000
--- a/examples/rust-toolchain.toml
+++ /dev/null
@@ -1 +0,0 @@
-../rust-toolchain.toml
\ No newline at end of file
diff --git a/ceno_rt/.cargo/config.toml b/guest/.cargo/config.toml
similarity index 100%
rename from ceno_rt/.cargo/config.toml
rename to guest/.cargo/config.toml
diff --git a/examples/Cargo.lock b/guest/Cargo.lock
similarity index 88%
rename from examples/Cargo.lock
rename to guest/Cargo.lock
index 359355930..84c6c3ab6 100644
--- a/examples/Cargo.lock
+++ b/guest/Cargo.lock
@@ -36,9 +36,9 @@ checksum = "57c0d7b74b563b49d38dae00a0c37d4d6de9b432382b2892f0574ddcae73fd0a"
 
 [[package]]
 name = "proc-macro2"
-version = "1.0.88"
+version = "1.0.92"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "7c3a7fc5db1e57d5a779a352c8cdb57b29aa4c40cc69c3a68a7fedc815fbf2f9"
+checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0"
 dependencies = [
  "unicode-ident",
 ]
@@ -84,9 +84,9 @@ checksum = "8188909339ccc0c68cfb5a04648313f09621e8b87dc03095454f1a11f6c5d436"
 
 [[package]]
 name = "syn"
-version = "2.0.82"
+version = "2.0.90"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "83540f837a8afc019423a8edb95b52a8effe46957ee402287f4292fae35be021"
+checksum = "919d3b74a5dd0ccd15aeb8f93e7006bd9e14c295087c9896a110f490752bcf31"
 dependencies = [
  "proc-macro2",
  "quote",
@@ -95,6 +95,6 @@ dependencies = [
 
 [[package]]
 name = "unicode-ident"
-version = "1.0.13"
+version = "1.0.14"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "e91b56cd4cadaeb79bbf1a5645f6b4f8dc5bde8834ad5894a8db35fda9efa1fe"
+checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83"
diff --git a/guest/Cargo.toml b/guest/Cargo.toml
new file mode 100644
index 000000000..0aa06be48
--- /dev/null
+++ b/guest/Cargo.toml
@@ -0,0 +1,15 @@
+[workspace]
+members = ["examples", "ceno_rt"]
+resolver = "2"
+
+[workspace.package]
+categories = ["cryptography", "zk", "blockchain", "ceno"]
+edition = "2021"
+keywords = ["cryptography", "zk", "blockchain", "ceno"]
+license = "MIT OR Apache-2.0"
+readme = "../README.md"
+repository = "https://github.com/scroll-tech/ceno"
+version = "0.1.0"
+
+[profile.release]
+lto = "thin"
diff --git a/ceno_rt/Cargo.toml b/guest/ceno_rt/Cargo.toml
similarity index 100%
rename from ceno_rt/Cargo.toml
rename to guest/ceno_rt/Cargo.toml
diff --git a/ceno_rt/README.md b/guest/ceno_rt/README.md
similarity index 100%
rename from ceno_rt/README.md
rename to guest/ceno_rt/README.md
diff --git a/ceno_rt/build.rs b/guest/ceno_rt/build.rs
similarity index 100%
rename from ceno_rt/build.rs
rename to guest/ceno_rt/build.rs
diff --git a/ceno_rt/ceno_link.x b/guest/ceno_rt/ceno_link.x
similarity index 100%
rename from ceno_rt/ceno_link.x
rename to guest/ceno_rt/ceno_link.x
diff --git a/ceno_rt/memory.x b/guest/ceno_rt/memory.x
similarity index 100%
rename from ceno_rt/memory.x
rename to guest/ceno_rt/memory.x
diff --git a/ceno_rt/src/allocator.rs b/guest/ceno_rt/src/allocator.rs
similarity index 100%
rename from ceno_rt/src/allocator.rs
rename to guest/ceno_rt/src/allocator.rs
diff --git a/ceno_rt/src/io.rs b/guest/ceno_rt/src/io.rs
similarity index 100%
rename from ceno_rt/src/io.rs
rename to guest/ceno_rt/src/io.rs
diff --git a/ceno_rt/src/lib.rs b/guest/ceno_rt/src/lib.rs
similarity index 100%
rename from ceno_rt/src/lib.rs
rename to guest/ceno_rt/src/lib.rs
diff --git a/ceno_rt/src/params.rs b/guest/ceno_rt/src/params.rs
similarity index 100%
rename from ceno_rt/src/params.rs
rename to guest/ceno_rt/src/params.rs
diff --git a/guest/examples/Cargo.toml b/guest/examples/Cargo.toml
new file mode 100644
index 000000000..397e743b1
--- /dev/null
+++ b/guest/examples/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+categories.workspace = true
+description = "Ceno RiscV guest examples"
+edition.workspace = true
+keywords.workspace = true
+license.workspace = true
+name = "examples"
+readme = "README.md"
+repository.workspace = true
+version.workspace = true
+
+[dependencies]
+ceno_rt = { path = "../ceno_rt" }
diff --git a/examples/examples/ceno_rt_alloc.rs b/guest/examples/examples/ceno_rt_alloc.rs
similarity index 100%
rename from examples/examples/ceno_rt_alloc.rs
rename to guest/examples/examples/ceno_rt_alloc.rs
diff --git a/examples/examples/ceno_rt_io.rs b/guest/examples/examples/ceno_rt_io.rs
similarity index 100%
rename from examples/examples/ceno_rt_io.rs
rename to guest/examples/examples/ceno_rt_io.rs
diff --git a/examples/examples/ceno_rt_mem.rs b/guest/examples/examples/ceno_rt_mem.rs
similarity index 100%
rename from examples/examples/ceno_rt_mem.rs
rename to guest/examples/examples/ceno_rt_mem.rs
diff --git a/examples/examples/ceno_rt_mini.rs b/guest/examples/examples/ceno_rt_mini.rs
similarity index 100%
rename from examples/examples/ceno_rt_mini.rs
rename to guest/examples/examples/ceno_rt_mini.rs
diff --git a/examples/examples/ceno_rt_panic.rs b/guest/examples/examples/ceno_rt_panic.rs
similarity index 100%
rename from examples/examples/ceno_rt_panic.rs
rename to guest/examples/examples/ceno_rt_panic.rs
diff --git a/mpcs/src/basefold.rs b/mpcs/src/basefold.rs
index 1aec0fb9d..c7eb18f71 100644
--- a/mpcs/src/basefold.rs
+++ b/mpcs/src/basefold.rs
@@ -947,8 +947,6 @@ where
         transcript: &mut impl Transcript<E>,
     ) -> Result<(), Error> {
         let timer = start_timer!(|| "Basefold::batch_verify");
-        // 	let key = "RAYON_NUM_THREADS";
-        // 	env::set_var(key, "32");
         let comms = comms.iter().collect_vec();
         let num_vars = points.iter().map(|point| point.len()).max().unwrap();
         let num_rounds = num_vars - Spec::get_basecode_msg_size_log();

From 7ea23d47f5b7842616a797dc23052edc170468b0 Mon Sep 17 00:00:00 2001
From: soham <soham@zemse.in>
Date: Fri, 13 Dec 2024 14:23:02 +0530
Subject: [PATCH 4/5] Fix InsnKind in SLT (#750)

Closes #746
---
 ceno_zkvm/src/instructions/riscv/slt.rs | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ceno_zkvm/src/instructions/riscv/slt.rs b/ceno_zkvm/src/instructions/riscv/slt.rs
index 08ceff8b3..a3e477350 100644
--- a/ceno_zkvm/src/instructions/riscv/slt.rs
+++ b/ceno_zkvm/src/instructions/riscv/slt.rs
@@ -83,7 +83,7 @@ impl<E: ExtensionField, I: RIVInstruction> Instruction<E> for SetLessThanInstruc
 
         let r_insn = RInstructionConfig::<E>::construct_circuit(
             cb,
-            InsnKind::SLT,
+            I::INST_KIND,
             rs1_read.register_expr(),
             rs2_read.register_expr(),
             rd_written.register_expr(),
@@ -159,7 +159,7 @@ mod test {
             .unwrap()
             .unwrap();
 
-        let insn_code = encode_rv32(InsnKind::SLT, 2, 3, 4, 0);
+        let insn_code = encode_rv32(I::INST_KIND, 2, 3, 4, 0);
         let (raw_witin, lkm) = SetLessThanInstruction::<_, I>::assign_instances(
             &config,
             cb.cs.num_witin as usize,

From 0a6437171760190836fd7221181fb2e4f1bb5313 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Matthias=20G=C3=B6rgens?= <matthias.goergens@gmail.com>
Date: Fri, 13 Dec 2024 17:36:14 +0800
Subject: [PATCH 5/5] Speed up CI (#752)

We are spending a lot of time restoring our cache on our self-hosted
runners. So let's try offloading some of the CI work back to GitHub,
specifically hand back lints and tests, but keep integration tests with
our runner.

Also:
- upgrade our actions
- cancel previous runs, when new commits are pushed
---
 .github/workflows/integration.yml |  8 ++++++--
 .github/workflows/lints.yml       | 29 ++++++++---------------------
 .github/workflows/tests.yml       | 28 ++++++++--------------------
 3 files changed, 22 insertions(+), 43 deletions(-)

diff --git a/.github/workflows/integration.yml b/.github/workflows/integration.yml
index 934fe3f3e..8bcc5dfed 100644
--- a/.github/workflows/integration.yml
+++ b/.github/workflows/integration.yml
@@ -8,6 +8,10 @@ on:
     branches:
       - master
 
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
+
 jobs:
   skip_check:
     runs-on: [self-hosted, Linux, X64]
@@ -33,9 +37,9 @@ jobs:
     runs-on: [self-hosted, Linux, X64]
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - name: Cargo cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: |
             ~/.cargo/bin/
diff --git a/.github/workflows/lints.yml b/.github/workflows/lints.yml
index 207769aa8..1f8d41b0b 100644
--- a/.github/workflows/lints.yml
+++ b/.github/workflows/lints.yml
@@ -8,32 +8,18 @@ on:
     branches:
       - master
 
-jobs:
-  skip_check:
-    runs-on: [self-hosted, Linux, X64]
-    outputs:
-      should_skip: ${{ steps.skip_check.outputs.should_skip }}
-    steps:
-      - id: skip_check
-        uses: fkirc/skip-duplicate-actions@v5
-        with:
-          cancel_others: 'true'
-          concurrent_skipping: 'same_content_newer'
-          paths_ignore: '["**/README.md"]'
-          do_not_skip: '["pull_request", "workflow_dispatch", "schedule", "merge_group"]'
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
 
+jobs:
   lints:
-    needs: [skip_check]
-    if: |
-      github.event.pull_request.draft == false &&
-      (github.event.action == 'ready_for_review' || needs.skip_check.outputs.should_skip != 'true')
-
     name: Various lints
     timeout-minutes: 30
-    runs-on: [self-hosted, Linux, X64]
+    runs-on: ubuntu-24.04
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@master
         with:
           components: rustfmt, clippy
@@ -41,7 +27,7 @@ jobs:
           # TODO(Matthias): see whether we can keep this in sync with rust-toolchain.toml automatically?
           toolchain: nightly-2024-12-06
       - name: Cargo cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: |
             ~/.cargo/bin/
@@ -51,6 +37,7 @@ jobs:
             target/
             guest/target/
           key: lint-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: lint-${{ runner.os }}-cargo-
 
       - name: Install cargo make
         run: |
diff --git a/.github/workflows/tests.yml b/.github/workflows/tests.yml
index e7d224995..0f2ea9df3 100644
--- a/.github/workflows/tests.yml
+++ b/.github/workflows/tests.yml
@@ -8,38 +8,25 @@ on:
     branches:
       - master
 
-jobs:
-  skip_check:
-    runs-on: [self-hosted, Linux, X64]
-    outputs:
-      should_skip: ${{ steps.skip_check.outputs.should_skip }}
-    steps:
-      - id: skip_check
-        uses: fkirc/skip-duplicate-actions@v5
-        with:
-          cancel_others: 'true'
-          concurrent_skipping: 'same_content_newer'
-          paths_ignore: '["**/README.md"]'
-          do_not_skip: '["pull_request", "workflow_dispatch", "schedule", "merge_group"]'
+concurrency:
+  group: ${{ github.workflow }}-${{ github.ref || github.run_id }}
+  cancel-in-progress: ${{ github.ref != 'refs/heads/master' }}
 
+jobs:
   tests:
-    needs: [skip_check]
-    if: |
-      github.event.pull_request.draft == false &&
-      (github.event.action == 'ready_for_review' || needs.skip_check.outputs.should_skip != 'true')
     name: Run Tests
     timeout-minutes: 30
-    runs-on: [self-hosted, Linux, X64]
+    runs-on: ubuntu-24.04
 
     steps:
-      - uses: actions/checkout@v2
+      - uses: actions/checkout@v4
       - uses: dtolnay/rust-toolchain@master
         with:
           targets: riscv32im-unknown-none-elf
           # TODO(Matthias): see whether we can keep this in sync with rust-toolchain.toml automatically?
           toolchain: nightly-2024-12-06
       - name: Cargo cache
-        uses: actions/cache@v3
+        uses: actions/cache@v4
         with:
           path: |
             ~/.cargo/bin/
@@ -49,6 +36,7 @@ jobs:
             target/
             guest/target/
           key: tests-${{ runner.os }}-cargo-${{ hashFiles('**/Cargo.lock') }}
+          restore-keys: tests-${{ runner.os }}-cargo-
 
       - name: Install cargo make
         run: |