diff --git a/backend/src/mock/machine.rs b/backend/src/mock/machine.rs
index 1e634224b0..042679958b 100644
--- a/backend/src/mock/machine.rs
+++ b/backend/src/mock/machine.rs
@@ -1,4 +1,4 @@
-use std::collections::BTreeMap;
+use std::{collections::BTreeMap, time::Instant};
 
 use itertools::Itertools;
 use powdr_ast::analyzed::{AlgebraicExpression, AlgebraicReferenceThin, Analyzed, PolyID};
@@ -39,9 +39,14 @@ impl<'a, F: FieldElement> Machine<'a, F> {
         }
 
         for stage in 1..pil.stage_count() {
-            log::debug!("Generating stage-{stage} witness for machine {machine_name}");
+            log::info!("Generating stage-{stage} witness for machine {machine_name}");
+            let start_time = Instant::now();
             witness =
                 witgen_callback.next_stage_witness(pil, &witness, challenges.clone(), stage as u8);
+            log::info!(
+                "Generated stage-{stage} witness for machine {machine_name} in {}s",
+                start_time.elapsed().as_secs_f64()
+            );
         }
 
         let fixed = machine_fixed_columns(fixed, pil);
diff --git a/executor/src/witgen/bus_accumulator.rs b/executor/src/witgen/bus_accumulator.rs
new file mode 100644
index 0000000000..a3f76c27cc
--- /dev/null
+++ b/executor/src/witgen/bus_accumulator.rs
@@ -0,0 +1,182 @@
+use std::collections::BTreeMap;
+
+use powdr_ast::analyzed::Analyzed;
+use powdr_number::FieldElement;
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
+
+pub fn generate_bus_accumulators<T: FieldElement>(
+    _pil: &Analyzed<T>,
+    witness_columns: &[(String, Vec<T>)],
+    _fixed_columns: Vec<(String, &[T])>,
+    _challenges: BTreeMap<u64, T>,
+) -> Vec<(String, Vec<T>)> {
+    let accumulators = (0..31)
+        .into_par_iter()
+        .map(|i| interaction_columns(i, witness_columns))
+        .collect::<Vec<_>>();
+
+    witness_columns
+        .to_vec()
+        .into_iter()
+        .chain(accumulators.into_iter().flatten())
+        .collect()
+}
+
+fn interaction_columns<T: FieldElement>(
+    connection_index: usize,
+    witness_columns: &[(String, Vec<T>)],
+) -> Vec<(String, Vec<T>)> {
+    let tuple_size = if connection_index == 0 {
+        // Simulate PC lookup
+        700
+    } else {
+        1 + connection_index / 10
+    };
+
+    // Pick random indices
+    let indices = (0..tuple_size)
+        .map(|j| {
+            ((42usize
+                .wrapping_mul(connection_index * 70 + j)
+                .wrapping_add(123))
+                % 17482394)
+                % witness_columns.len()
+        })
+        .collect::<Vec<_>>();
+
+    let size = witness_columns[0].1.len();
+    let mut acc1 = vec![T::zero(); size];
+    let mut acc2 = vec![T::zero(); size];
+    let mut acc1_next = vec![T::zero(); size];
+    let mut acc2_next = vec![T::zero(); size];
+
+    for i in 0..size {
+        let current_acc = if i == 0 {
+            (T::zero(), T::zero())
+        } else {
+            (acc1[i - 1], acc2[i - 1])
+        };
+
+        let tuple = indices
+            .iter()
+            .map(|&j| witness_columns[j].1[i])
+            .collect::<Vec<_>>();
+
+        let fingerprint = add_ext(
+            fingerprint(&tuple, (T::from(1234), T::from(12345))),
+            (T::from(8764), T::from(876324)),
+        );
+        let multiplicity = T::from(42);
+
+        /*
+        add_ext(
+            current_acc,
+            mul_ext(m_ext_next, inv_ext(folded_next))
+        )
+         */
+        let update = add_ext(
+            current_acc,
+            mul_ext((multiplicity, T::from(0)), inv_ext(fingerprint)),
+        );
+
+        acc1[i] = update.0;
+        acc2[i] = update.1;
+        acc1_next[(i + size - 1) % size] = update.0;
+        acc2_next[(i + size - 1) % size] = update.1;
+    }
+
+    vec![
+        (name("main::acc", connection_index * 2), acc1),
+        (name("main::acc", connection_index * 2 + 1), acc2),
+        (name("main::acc_next", connection_index * 2), acc1_next),
+        (name("main::acc_next", connection_index * 2 + 1), acc2_next),
+    ]
+}
+
+fn name(base: &str, i: usize) -> String {
+    if i == 0 {
+        return base.to_string();
+    }
+    format!("{base}_{}", i)
+}
+
+/*
+let<T: Add + FromLiteral + Mul> mul_ext: Fp2<T>, Fp2<T> -> Fp2<T> = |a, b| match (a, b) {
+    (Fp2::Fp2(a0, a1), Fp2::Fp2(b0, b1)) => Fp2::Fp2(
+        // Multiplication modulo the polynomial x^2 - 11. We'll use the fact
+        // that x^2 == 11 (mod x^2 - 11), so:
+        // (a0 + a1 * x) * (b0 + b1 * x) = a0 * b0 + 11 * a1 * b1 + (a1 * b0 + a0 * b1) * x (mod x^2 - 11)
+        a0 * b0 + 11 * a1 * b1,
+        a1 * b0 + a0 * b1
+    )
+};
+*/
+
+fn mul_ext<T: FieldElement>(a: (T, T), b: (T, T)) -> (T, T) {
+    (a.0 * b.0 + a.1 * b.1 * T::from(11), a.1 * b.0 + a.0 * b.1)
+}
+
+fn add_ext<T: FieldElement>(a: (T, T), b: (T, T)) -> (T, T) {
+    (a.0 + b.0, a.1 + b.1)
+}
+
+fn sub_ext<T: FieldElement>(a: (T, T), b: (T, T)) -> (T, T) {
+    (a.0 - b.0, a.1 - b.1)
+}
+
+/*
+/// Maps [x_1, x_2, ..., x_n] to its Read-Solomon fingerprint, using a challenge alpha: $\sum_{i=1}^n alpha**{(n - i)} * x_i$
+/// To generate an expression that computes the fingerprint, use `fingerprint_inter` instead.
+/// Note that alpha is passed as an expressions, so that it is only evaluated if needed (i.e., if len(expr_array) > 1).
+let fingerprint: fe[], Fp2<expr> -> Fp2<fe> = query |expr_array, alpha| {
+    fingerprint_impl(expr_array, eval_ext(alpha), len(expr_array))
+};
+
+let fingerprint_impl: fe[], Fp2<fe>, int -> Fp2<fe> = query |expr_array, alpha, l| if l == 1 {
+    // Base case
+    from_base(expr_array[0])
+} else {
+
+    // Recursively compute the fingerprint as fingerprint(expr_array[:-1], alpha) * alpha + expr_array[-1]
+    let intermediate_fingerprint = fingerprint_impl(expr_array, alpha, l - 1);
+    add_ext(mul_ext(alpha, intermediate_fingerprint), from_base(expr_array[l - 1]))
+};
+*/
+
+fn fingerprint<T: FieldElement>(expr_array: &[T], alpha: (T, T)) -> (T, T) {
+    fingerprint_impl(expr_array, alpha, expr_array.len())
+}
+
+fn fingerprint_impl<T: FieldElement>(expr_array: &[T], alpha: (T, T), l: usize) -> (T, T) {
+    if l == 1 {
+        return (expr_array[0], T::zero());
+    }
+
+    let intermediate_fingerprint = fingerprint_impl(expr_array, alpha, l - 1);
+    add_ext(
+        mul_ext(alpha, intermediate_fingerprint),
+        (expr_array[l - 1], T::zero()),
+    )
+}
+
+/*
+
+/// Extension field inversion
+let inv_ext: Fp2<fe> -> Fp2<fe> = |a| match a {
+    // The inverse of (a0, a1) is a point (b0, b1) such that:
+    // (a0 + a1 * x) (b0 + b1 * x) = 1 (mod x^2 - 11)
+    // Multiplying out and plugging in x^2 = 11 yields the following system of linear equations:
+    // a0 * b0 + 11 * a1 * b1 = 1
+    // a1 * b0 + a0 * b1 = 0
+    // Solving for (b0, b1) yields:
+    Fp2::Fp2(a0, a1) => {
+        let factor = inv_field(11 * a1 * a1 - a0 * a0);
+        Fp2::Fp2(-a0 * factor, a1 * factor)
+    }
+};
+
+*/
+fn inv_ext<T: FieldElement>(a: (T, T)) -> (T, T) {
+    let factor = T::from(1) / (T::from(11) * a.1 * a.1 - a.0 * a.0);
+    (-a.0 * factor, a.1 * factor)
+}
diff --git a/executor/src/witgen/machines/dynamic_machine.rs b/executor/src/witgen/machines/dynamic_machine.rs
index 7c95996a0a..78cfecc422 100644
--- a/executor/src/witgen/machines/dynamic_machine.rs
+++ b/executor/src/witgen/machines/dynamic_machine.rs
@@ -230,6 +230,8 @@ impl<'a, T: FieldElement> DynamicMachine<'a, T> {
             &self.parts,
             SolverState::new(data, self.publics.clone()),
             mutable_state,
+            self.degree,
+            true,
         );
         if let Some(outer_query) = outer_query {
             processor = processor.with_outer_query(outer_query);
diff --git a/executor/src/witgen/machines/machine_extractor.rs b/executor/src/witgen/machines/machine_extractor.rs
index 7451a34aaa..12174dfe16 100644
--- a/executor/src/witgen/machines/machine_extractor.rs
+++ b/executor/src/witgen/machines/machine_extractor.rs
@@ -14,6 +14,7 @@ use super::sorted_witness_machine::SortedWitnesses;
 use super::FixedData;
 use super::KnownMachine;
 use crate::witgen::machines::dynamic_machine::DynamicMachine;
+use crate::witgen::machines::second_stage_machine::SecondStageMachine;
 use crate::witgen::machines::Connection;
 use crate::witgen::machines::{write_once_memory::WriteOnceMemory, MachineParts};
 use crate::Identity;
@@ -60,26 +61,19 @@ impl<'a, T: FieldElement> MachineExtractor<'a, T> {
             .collect::<Vec<&analyzed::Expression>>();
 
         if self.fixed.stage() > 0 {
-            // We expect later-stage witness columns to be accumulators for lookup and permutation arguments.
-            // These don't behave like normal witness columns (e.g. in a block machine), and they might depend
-            // on witness columns of more than one machine.
-            // Therefore, we treat everything as one big machine. Also, we remove lookups and permutations,
-            // as they are assumed to be handled in stage 0.
-            let polynomial_identities = identities
-                .into_iter()
-                .filter(|identity| matches!(identity, Identity::Polynomial(_)))
-                .collect::<Vec<_>>();
             let machine_parts = MachineParts::new(
                 self.fixed,
                 Default::default(),
-                polynomial_identities,
+                identities,
                 self.fixed.witness_cols.keys().collect::<HashSet<_>>(),
                 prover_functions,
             );
 
-            return build_main_machine(self.fixed, machine_parts)
-                .into_iter()
-                .collect();
+            return vec![KnownMachine::SecondStageMachine(SecondStageMachine::new(
+                "Bus Machine".to_string(),
+                self.fixed,
+                machine_parts,
+            ))];
         }
         let mut machines: Vec<KnownMachine<T>> = vec![];
 
@@ -193,7 +187,7 @@ impl<'a, T: FieldElement> MachineExtractor<'a, T> {
                 }
             }
 
-            let name = suggest_machine_name(&machine_parts);
+            let name = machine_parts.name();
             let id = id_counter;
             id_counter += 1;
             let name_with_type = |t: &str| format!("Secondary machine {id}: {name} ({t})");
@@ -343,18 +337,6 @@ fn log_extracted_machine<T: FieldElement>(parts: &MachineParts<'_, T>) {
     );
 }
 
-fn suggest_machine_name<T: FieldElement>(parts: &MachineParts<'_, T>) -> String {
-    let first_witness = parts.witnesses.iter().next().unwrap();
-    let first_witness_name = parts.column_name(first_witness);
-    let namespace = first_witness_name
-        .rfind("::")
-        .map(|idx| &first_witness_name[..idx]);
-
-    // For machines compiled using Powdr ASM we'll always have a namespace, but as a last
-    // resort we'll use the first witness name.
-    namespace.unwrap_or(first_witness_name).to_string()
-}
-
 #[derive(Default)]
 /// Keeps track of the global set of publics that are referenced by the machine's identities.
 struct PublicsTracker<'a>(BTreeSet<&'a String>);
diff --git a/executor/src/witgen/machines/mod.rs b/executor/src/witgen/machines/mod.rs
index b425fe3d6c..67a12437dd 100644
--- a/executor/src/witgen/machines/mod.rs
+++ b/executor/src/witgen/machines/mod.rs
@@ -17,6 +17,7 @@ use self::double_sorted_witness_machine_16::DoubleSortedWitnesses16;
 use self::double_sorted_witness_machine_32::DoubleSortedWitnesses32;
 pub use self::fixed_lookup_machine::FixedLookup;
 use self::profiling::{record_end, record_start};
+use self::second_stage_machine::SecondStageMachine;
 use self::sorted_witness_machine::SortedWitnesses;
 use self::write_once_memory::WriteOnceMemory;
 
@@ -30,6 +31,7 @@ mod dynamic_machine;
 mod fixed_lookup_machine;
 pub mod machine_extractor;
 pub mod profiling;
+mod second_stage_machine;
 mod sorted_witness_machine;
 mod write_once_memory;
 
@@ -117,6 +119,7 @@ pub enum LookupCell<'a, T> {
 /// This allows us to treat machines uniformly without putting them into a `Box`,
 /// which requires that all lifetime parameters are 'static.
 pub enum KnownMachine<'a, T: FieldElement> {
+    SecondStageMachine(SecondStageMachine<'a, T>),
     SortedWitnesses(SortedWitnesses<'a, T>),
     DoubleSortedWitnesses16(DoubleSortedWitnesses16<'a, T>),
     DoubleSortedWitnesses32(DoubleSortedWitnesses32<'a, T>),
@@ -129,6 +132,7 @@ pub enum KnownMachine<'a, T: FieldElement> {
 impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
     fn run<Q: QueryCallback<T>>(&mut self, mutable_state: &MutableState<'a, T, Q>) {
         match self {
+            KnownMachine::SecondStageMachine(m) => m.run(mutable_state),
             KnownMachine::SortedWitnesses(m) => m.run(mutable_state),
             KnownMachine::DoubleSortedWitnesses16(m) => m.run(mutable_state),
             KnownMachine::DoubleSortedWitnesses32(m) => m.run(mutable_state),
@@ -146,6 +150,9 @@ impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
         caller_rows: &'b RowPair<'b, 'a, T>,
     ) -> EvalResult<'a, T> {
         match self {
+            KnownMachine::SecondStageMachine(m) => {
+                m.process_plookup(mutable_state, identity_id, caller_rows)
+            }
             KnownMachine::SortedWitnesses(m) => {
                 m.process_plookup(mutable_state, identity_id, caller_rows)
             }
@@ -172,6 +179,7 @@ impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
 
     fn name(&self) -> &str {
         match self {
+            KnownMachine::SecondStageMachine(m) => m.name(),
             KnownMachine::SortedWitnesses(m) => m.name(),
             KnownMachine::DoubleSortedWitnesses16(m) => m.name(),
             KnownMachine::DoubleSortedWitnesses32(m) => m.name(),
@@ -187,6 +195,7 @@ impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
         mutable_state: &'b MutableState<'a, T, Q>,
     ) -> HashMap<String, Vec<T>> {
         match self {
+            KnownMachine::SecondStageMachine(m) => m.take_witness_col_values(mutable_state),
             KnownMachine::SortedWitnesses(m) => m.take_witness_col_values(mutable_state),
             KnownMachine::DoubleSortedWitnesses16(m) => m.take_witness_col_values(mutable_state),
             KnownMachine::DoubleSortedWitnesses32(m) => m.take_witness_col_values(mutable_state),
@@ -199,6 +208,7 @@ impl<'a, T: FieldElement> Machine<'a, T> for KnownMachine<'a, T> {
 
     fn identity_ids(&self) -> Vec<u64> {
         match self {
+            KnownMachine::SecondStageMachine(m) => m.identity_ids(),
             KnownMachine::SortedWitnesses(m) => m.identity_ids(),
             KnownMachine::DoubleSortedWitnesses16(m) => m.identity_ids(),
             KnownMachine::DoubleSortedWitnesses32(m) => m.identity_ids(),
@@ -323,13 +333,35 @@ impl<'a, T: FieldElement> MachineParts<'a, T> {
         witnesses: HashSet<PolyID>,
         prover_functions: Vec<&'a analyzed::Expression>,
     ) -> Self {
-        Self {
+        let parts = Self {
             fixed_data,
             connections,
             identities,
             witnesses,
             prover_functions,
-        }
+        };
+
+        log::info!(
+            "Machine '{}' has {} identities, {} witnesses, and {} prover functions.",
+            parts.name(),
+            parts.identities.len(),
+            parts.witnesses.len(),
+            parts.prover_functions.len()
+        );
+
+        parts
+    }
+
+    fn name(&self) -> String {
+        let first_witness = self.witnesses.iter().next().unwrap();
+        let first_witness_name = self.column_name(first_witness);
+        let namespace = first_witness_name
+            .rfind("::")
+            .map(|idx| &first_witness_name[..idx]);
+
+        // For machines compiled using Powdr ASM we'll always have a namespace, but as a last
+        // resort we'll use the first witness name.
+        namespace.unwrap_or(first_witness_name).to_string()
     }
 
     /// Returns a copy of the machine parts but only containing identities that
diff --git a/executor/src/witgen/machines/second_stage_machine.rs b/executor/src/witgen/machines/second_stage_machine.rs
new file mode 100644
index 0000000000..c957eeb72e
--- /dev/null
+++ b/executor/src/witgen/machines/second_stage_machine.rs
@@ -0,0 +1,199 @@
+use itertools::Itertools;
+use powdr_ast::analyzed::Identity;
+use powdr_number::{DegreeType, FieldElement};
+use std::collections::{BTreeMap, HashMap};
+
+use crate::witgen::block_processor::BlockProcessor;
+use crate::witgen::data_structures::finalizable_data::FinalizableData;
+use crate::witgen::data_structures::mutable_state::MutableState;
+use crate::witgen::machines::{Machine, MachineParts};
+use crate::witgen::processor::SolverState;
+use crate::witgen::rows::{Row, RowIndex, RowPair};
+use crate::witgen::sequence_iterator::{DefaultSequenceIterator, ProcessingSequenceIterator};
+use crate::witgen::vm_processor::VmProcessor;
+use crate::witgen::{EvalResult, FixedData, QueryCallback};
+
+/// A machine responsible for second-phase witness generation.
+/// For example, this might generate the witnesses for a bus accumulator or LogUp argument.
+pub struct SecondStageMachine<'a, T: FieldElement> {
+    fixed_data: &'a FixedData<'a, T>,
+    parts: MachineParts<'a, T>,
+    data: FinalizableData<T>,
+    publics: BTreeMap<&'a str, T>,
+    name: String,
+    degree: DegreeType,
+}
+
+impl<'a, T: FieldElement> Machine<'a, T> for SecondStageMachine<'a, T> {
+    fn identity_ids(&self) -> Vec<u64> {
+        Vec::new()
+    }
+
+    fn name(&self) -> &str {
+        &self.name
+    }
+
+    /// Runs the machine without any arguments from the first row.
+    fn run<Q: QueryCallback<T>>(&mut self, mutable_state: &MutableState<'a, T, Q>) {
+        assert!(self.data.is_empty());
+        let first_row = self.compute_partial_first_row(mutable_state);
+        self.data = self.process(first_row, mutable_state);
+    }
+
+    fn process_plookup<'b, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &MutableState<'a, T, Q>,
+        _identity_id: u64,
+        _caller_rows: &'b RowPair<'b, 'a, T>,
+    ) -> EvalResult<'a, T> {
+        panic!("SecondStageMachine can't be called by other machines!")
+    }
+
+    fn take_witness_col_values<'b, Q: QueryCallback<T>>(
+        &mut self,
+        _mutable_state: &'b MutableState<'a, T, Q>,
+    ) -> HashMap<String, Vec<T>> {
+        log::debug!("Finalizing VM: {}", self.name());
+
+        self.fix_first_row();
+
+        self.data
+            .take_transposed()
+            .map(|(id, (values, _))| (id, values))
+            .map(|(id, values)| (self.fixed_data.column_name(&id).to_string(), values))
+            .collect()
+    }
+}
+
+impl<'a, T: FieldElement> SecondStageMachine<'a, T> {
+    pub fn new(name: String, fixed_data: &'a FixedData<'a, T>, parts: MachineParts<'a, T>) -> Self {
+        let data = FinalizableData::new(&parts.witnesses);
+
+        // Only keep polynomial identities. We assume other constraints to be handled in stage 0.
+        let polynomial_identities = parts
+            .identities
+            .into_iter()
+            .filter(|identity| matches!(identity, Identity::Polynomial(_)))
+            .collect::<Vec<_>>();
+        let parts = MachineParts::new(
+            fixed_data,
+            Default::default(),
+            polynomial_identities,
+            parts.witnesses,
+            parts.prover_functions,
+        );
+
+        let witness_sizes = fixed_data
+            .witness_cols
+            .values()
+            .filter_map(|w| w.external_values.as_ref())
+            .map(|values| values.len())
+            .unique()
+            .collect::<Vec<_>>();
+        let degree = witness_sizes.into_iter().exactly_one().unwrap() as DegreeType;
+
+        Self {
+            degree,
+            name,
+            fixed_data,
+            parts,
+            data,
+            publics: Default::default(),
+        }
+    }
+
+    /// Runs the solver on the row pair (degree - 1, 0) in order to partially compute the first
+    /// row from identities like `pc' = (1 - first_step') * <...>`.
+    fn compute_partial_first_row<Q: QueryCallback<T>>(
+        &self,
+        mutable_state: &MutableState<'a, T, Q>,
+    ) -> Row<T> {
+        // Use `BlockProcessor` + `DefaultSequenceIterator` using a "block size" of 0. Because `BlockProcessor`
+        // expects `data` to include the row before and after the block, this means we'll run the
+        // solver on exactly one row pair.
+        // Note that using `BlockProcessor` instead of `VmProcessor` is more convenient here because
+        // it does not assert that the row is "complete" afterwards (i.e., that all identities
+        // are satisfied assuming 0 for unknown values).
+        let data = FinalizableData::with_initial_rows_in_progress(
+            &self.parts.witnesses,
+            [
+                Row::fresh(self.fixed_data, RowIndex::from_i64(-1, self.degree)),
+                Row::fresh(self.fixed_data, RowIndex::from_i64(0, self.degree)),
+            ]
+            .into_iter(),
+        );
+
+        // We're only interested in the first row anyway, so identities without a next reference
+        // are irrelevant.
+        // Also, they can lead to problems in the case where some witness columns are provided
+        // externally, e.g. if the last row happens to call into a stateful machine like memory.
+        let next_parts = self.parts.restricted_to_identities_with_next_references();
+        let mut processor = BlockProcessor::new(
+            RowIndex::from_i64(-1, self.degree),
+            // Shouldn't need any publics at this point
+            SolverState::without_publics(data),
+            mutable_state,
+            self.fixed_data,
+            &next_parts,
+            self.degree,
+        );
+        let mut sequence_iterator = ProcessingSequenceIterator::Default(
+            DefaultSequenceIterator::new(0, next_parts.identities.len(), None),
+        );
+        processor.solve(&mut sequence_iterator).unwrap();
+
+        // Ignore any updates to the publics at this point, as we'll re-visit the last row again.
+        let mut block = processor.finish().block;
+        assert!(block.len() == 2);
+        block.pop().unwrap()
+    }
+
+    fn process<Q: QueryCallback<T>>(
+        &mut self,
+        first_row: Row<T>,
+        mutable_state: &MutableState<'a, T, Q>,
+    ) -> FinalizableData<T> {
+        log::trace!(
+            "Running Second-Stage Machine with the following initial values in the first row:\n{}",
+            first_row.render_values(false, &self.parts)
+        );
+        let data = FinalizableData::with_initial_rows_in_progress(
+            &self.parts.witnesses,
+            [first_row].into_iter(),
+        );
+
+        let mut processor = VmProcessor::new(
+            self.name().to_string(),
+            RowIndex::from_degree(0, self.degree),
+            self.fixed_data,
+            &self.parts,
+            SolverState::new(data, self.publics.clone()),
+            mutable_state,
+            self.degree,
+            false,
+        );
+        processor.run(true);
+        let (updated_data, degree) = processor.finish();
+
+        // The processor might have detected a loop, in which case the degree has changed
+        self.degree = degree;
+
+        updated_data.block
+    }
+
+    /// At the end of the solving algorithm, we'll have computed the first row twice
+    /// (as row 0 and as row <degree>). This function merges the two versions.
+    fn fix_first_row(&mut self) {
+        assert_eq!(self.data.len() as DegreeType, self.degree + 1);
+
+        let last_row = self.data.pop().unwrap();
+        if self.data[0].merge_with(&last_row).is_err() {
+            log::error!("{}", self.data[0].render("First row", false, &self.parts));
+            log::error!("{}", last_row.render("Last row", false, &self.parts));
+            panic!(
+                "Failed to merge the first and last row of the VM '{}'",
+                self.name()
+            );
+        }
+    }
+}
diff --git a/executor/src/witgen/mod.rs b/executor/src/witgen/mod.rs
index 1ce6d6c93b..832a1e0385 100644
--- a/executor/src/witgen/mod.rs
+++ b/executor/src/witgen/mod.rs
@@ -1,6 +1,7 @@
 use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
 use std::sync::Arc;
 
+use bus_accumulator::generate_bus_accumulators;
 use itertools::Itertools;
 use machines::machine_extractor::MachineExtractor;
 use powdr_ast::analyzed::{
@@ -8,7 +9,7 @@ use powdr_ast::analyzed::{
     Expression, FunctionValueDefinition, PolyID, PolynomialType, Symbol, SymbolKind,
     TypedExpression,
 };
-use powdr_ast::parsed::visitor::{AllChildren, ExpressionVisitable};
+use powdr_ast::parsed::visitor::AllChildren;
 use powdr_ast::parsed::{FunctionKind, LambdaExpression};
 use powdr_number::{DegreeType, FieldElement};
 use std::iter::once;
@@ -27,6 +28,7 @@ use self::machines::profiling::{record_end, record_start, reset_and_print_profil
 mod affine_expression;
 pub(crate) mod analysis;
 mod block_processor;
+mod bus_accumulator;
 mod data_structures;
 mod eval_result;
 mod expression_evaluator;
@@ -97,6 +99,25 @@ impl<T: FieldElement> WitgenCallbackContext<T> {
             .collect()
     }
 
+    pub fn select_fixed_columns2(
+        &self,
+        pil: &Analyzed<T>,
+        size: DegreeType,
+    ) -> Vec<(String, &[T])> {
+        // The provided PIL might only contain a subset of all fixed columns.
+        let fixed_column_names = pil
+            .constant_polys_in_source_order()
+            .flat_map(|(symbol, _)| symbol.array_elements())
+            .map(|(name, _)| name.clone())
+            .collect::<BTreeSet<_>>();
+        // Select the columns in the current PIL and select the right size.
+        self.fixed_col_values
+            .iter()
+            .filter(|(n, _)| fixed_column_names.contains(n))
+            .map(|(n, v)| (n.clone(), v.get_by_size(size).unwrap()))
+            .collect()
+    }
+
     /// Computes the next-stage witness, given the current witness and challenges.
     /// All columns in the provided PIL are expected to have the same size.
     /// Typically, this function should be called once per machine.
@@ -108,11 +129,14 @@ impl<T: FieldElement> WitgenCallbackContext<T> {
         stage: u8,
     ) -> Vec<(String, Vec<T>)> {
         let size = current_witness.iter().next().unwrap().1.len() as DegreeType;
-        let fixed_col_values = self.select_fixed_columns(pil, size);
-        WitnessGenerator::new(pil, &fixed_col_values, &*self.query_callback)
-            .with_external_witness_values(current_witness)
-            .with_challenges(stage, challenges)
-            .generate()
+        let fixed_col_values = self.select_fixed_columns2(pil, size);
+
+        // WitnessGenerator::new(pil, &fixed_col_values, &*self.query_callback)
+        //     .with_external_witness_values(current_witness)
+        //     .with_challenges(stage, challenges)
+        //     .generate()
+        assert_eq!(stage, 1);
+        generate_bus_accumulators(pil, current_witness, fixed_col_values, challenges)
     }
 }
 
@@ -182,35 +206,47 @@ impl<'a, 'b, T: FieldElement> WitnessGenerator<'a, 'b, T> {
             self.challenges,
             self.stage,
         );
-        let identities = self
-            .analyzed
-            .identities
-            .clone()
+
+        let identities_by_stage =
+            self.analyzed
+                .identities
+                .clone()
+                .into_iter()
+                .map(|identity| {
+                    let stage =
+                        identity
+                            .all_children()
+                            .map(|child| {
+                                if let AlgebraicExpression::Challenge(challenge) = child {
+                                    challenge.stage
+                                } else {
+                                    0
+                                }
+                            })
+                            .chain(fixed.polynomial_references(&identity).into_iter().map(
+                                |poly_id| {
+                                    if poly_id.ptype == PolynomialType::Committed {
+                                        fixed.witness_cols[&poly_id].stage
+                                    } else {
+                                        0
+                                    }
+                                },
+                            ))
+                            .max()
+                            .unwrap_or(0) as u8;
+                    (stage, identity)
+                })
+                .sorted_by_key(|(stage, _)| *stage)
+                .chunk_by(|(stage, _)| *stage);
+        let mut identities_by_stage = identities_by_stage
             .into_iter()
-            .filter(|identity| {
-                let references_later_stage_challenge = identity.expr_any(|expr| {
-                    if let AlgebraicExpression::Challenge(challenge) = expr {
-                        challenge.stage >= self.stage.into()
-                    } else {
-                        false
-                    }
-                });
-                let references_later_stage_witness = fixed
-                    .polynomial_references(identity)
-                    .into_iter()
-                    .any(|poly_id| {
-                        (poly_id.ptype == PolynomialType::Committed)
-                            && fixed.witness_cols[&poly_id].stage > self.stage as u32
-                    });
-
-                let discard = references_later_stage_challenge || references_later_stage_witness;
-
-                if discard {
-                    log::debug!("Skipping identity that references later-stage items: {identity}",);
-                }
-                !discard
+            .map(|(stage, identities)| {
+                let identities = identities.map(|(_, identity)| identity).collect::<Vec<_>>();
+                (stage, identities)
             })
-            .collect::<Vec<_>>();
+            .collect::<BTreeMap<_, _>>();
+
+        let identities = identities_by_stage.remove(&self.stage).unwrap();
 
         // Removes identities like X * (X - 1) = 0 or [ A ] in [ BYTES ]
         // These are already captured in the range constraints.
@@ -570,7 +606,14 @@ impl<'a, T> FixedColumn<'a, T> {
     }
 
     pub fn values(&self, size: DegreeType) -> &[T] {
-        self.values.get_by_size(size).unwrap()
+        self.values.get_by_size(size).unwrap_or_else(|| {
+            panic!(
+                "Fixed column {} does not have a value for size {}. Available sizes: {:?}",
+                self.name,
+                size,
+                self.values.available_sizes()
+            )
+        })
     }
 
     pub fn values_max_size(&self) -> &[T] {
diff --git a/executor/src/witgen/processor.rs b/executor/src/witgen/processor.rs
index 74fe04280a..d820f1c6a0 100644
--- a/executor/src/witgen/processor.rs
+++ b/executor/src/witgen/processor.rs
@@ -4,6 +4,7 @@ use powdr_ast::analyzed::PolynomialType;
 use powdr_ast::analyzed::{AlgebraicExpression as Expression, AlgebraicReference, PolyID};
 
 use powdr_number::{DegreeType, FieldElement};
+use rayon::iter::{IntoParallelIterator, ParallelIterator};
 
 use crate::witgen::affine_expression::AlgebraicVariable;
 use crate::witgen::data_structures::mutable_state::MutableState;
@@ -220,7 +221,7 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> Processor<'a, 'c, T, Q> {
     }
 
     pub fn process_queries(&mut self, row_index: usize) -> Result<bool, EvalError<T>> {
-        let mut query_processor = QueryProcessor::new(
+        let query_processor = QueryProcessor::new(
             self.fixed_data,
             self.mutable_state.query_callback(),
             self.size,
@@ -238,13 +239,31 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> Processor<'a, 'c, T, Q> {
         );
         let mut updates = EvalValue::complete(vec![]);
 
-        for (i, fun) in self.parts.prover_functions.iter().enumerate() {
-            if !self.processed_prover_functions.has_run(row_index, i) {
-                let r = query_processor.process_prover_function(&row_pair, fun)?;
-                if r.is_complete() {
-                    updates.combine(r);
-                    self.processed_prover_functions.mark_as_run(row_index, i);
+        let functions_and_index = self
+            .parts
+            .prover_functions
+            .iter()
+            .enumerate()
+            .collect::<Vec<_>>();
+
+        let results = functions_and_index
+            .into_par_iter()
+            .map(|(i, fun)| {
+                if !self.processed_prover_functions.has_run(row_index, i) {
+                    Ok(Some((
+                        query_processor.process_prover_function(&row_pair, fun)?,
+                        i,
+                    )))
+                } else {
+                    Ok(None)
                 }
+            })
+            .collect::<Result<Vec<_>, EvalError<T>>>()?;
+
+        for (r, i) in results.into_iter().flatten() {
+            if r.is_complete() {
+                updates.combine(r);
+                self.processed_prover_functions.mark_as_run(row_index, i);
             }
         }
 
diff --git a/executor/src/witgen/query_processor.rs b/executor/src/witgen/query_processor.rs
index 1e0d383fe6..7847502bd1 100644
--- a/executor/src/witgen/query_processor.rs
+++ b/executor/src/witgen/query_processor.rs
@@ -33,9 +33,9 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
         }
     }
 
-    pub fn process_prover_function<'c>(
-        &'c mut self,
-        rows: &'c RowPair<'c, 'a, T>,
+    pub fn process_prover_function(
+        &self,
+        rows: &RowPair<'_, 'a, T>,
         fun: &'a Expression,
     ) -> EvalResult<'a, T> {
         let arguments = vec![Arc::new(Value::Integer(BigInt::from(u64::from(
@@ -77,7 +77,7 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
     /// Panics if the column does not have a query attached.
     /// @returns None if the value for that column is already known.
     pub fn process_query(
-        &mut self,
+        &self,
         rows: &RowPair<'_, 'a, T>,
         poly_id: &PolyID,
     ) -> Option<EvalResult<'a, T>> {
@@ -91,7 +91,7 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
     }
 
     fn process_witness_query(
-        &mut self,
+        &self,
         query: &'a Expression,
         poly: &'a AlgebraicReference,
         rows: &RowPair<'_, 'a, T>,
@@ -129,7 +129,7 @@ impl<'a, 'b, T: FieldElement, QueryCallback: super::QueryCallback<T>>
     }
 
     fn interpolate_query(
-        &mut self,
+        &self,
         query: &'a Expression,
         rows: &RowPair<'_, 'a, T>,
     ) -> Result<String, EvalError> {
diff --git a/executor/src/witgen/vm_processor.rs b/executor/src/witgen/vm_processor.rs
index 65968eb4a9..bfb9c8de52 100644
--- a/executor/src/witgen/vm_processor.rs
+++ b/executor/src/witgen/vm_processor.rs
@@ -23,7 +23,7 @@ use super::{Constraints, EvalError, EvalValue, FixedData, QueryCallback};
 /// Maximal period checked during loop detection.
 const MAX_PERIOD: usize = 4;
 
-const REPORT_FREQUENCY: u64 = 1_000;
+const REPORT_FREQUENCY: u64 = 100;
 
 /// A list of identities with a flag whether it is complete.
 struct CompletableIdentities<'a, T: FieldElement> {
@@ -68,6 +68,7 @@ pub struct VmProcessor<'a, 'c, T: FieldElement, Q: QueryCallback<T>> {
     last_report_time: Instant,
     processor: Processor<'a, 'c, T, Q>,
     progress_bar: ProgressBar,
+    loop_detection: bool,
 }
 
 impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> VmProcessor<'a, 'c, T, Q> {
@@ -79,11 +80,11 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> VmProcessor<'a, 'c, T, Q> {
         parts: &'c MachineParts<'a, T>,
         mutable_data: SolverState<'a, T>,
         mutable_state: &'c MutableState<'a, T, Q>,
+        degree: DegreeType,
+        loop_detection: bool,
     ) -> Self {
         let degree_range = parts.common_degree_range();
 
-        let degree = degree_range.max;
-
         let (identities_with_next, identities_without_next): (Vec<_>, Vec<_>) = parts
             .identities
             .iter()
@@ -118,6 +119,7 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> VmProcessor<'a, 'c, T, Q> {
             last_report_time: Instant::now(),
             processor,
             progress_bar,
+            loop_detection,
         }
     }
 
@@ -181,7 +183,11 @@ impl<'a, 'c, T: FieldElement, Q: QueryCallback<T>> VmProcessor<'a, 'c, T, Q> {
             }
 
             // Check if we are in a loop.
-            if looping_period.is_none() && row_index % 100 == 0 && row_index > 0 {
+            if looping_period.is_none()
+                && row_index % 100 == 0
+                && row_index > 0
+                && self.loop_detection
+            {
                 looping_period = self.rows_are_repeating(row_index);
                 if let Some(p) = looping_period {
                     log::log!(
diff --git a/std/protocols/bus.asm b/std/protocols/bus.asm
index c0e1ccb1ac..55c456ba5e 100644
--- a/std/protocols/bus.asm
+++ b/std/protocols/bus.asm
@@ -13,6 +13,7 @@ use std::math::fp2::needs_extension;
 use std::math::fp2::fp2_from_array;
 use std::math::fp2::constrain_eq_ext;
 use std::protocols::fingerprint::fingerprint_with_id;
+use std::protocols::fingerprint::fingerprint_with_id2;
 use std::protocols::fingerprint::fingerprint_with_id_inter;
 use std::math::fp2::required_extension_size;
 use std::prover::eval;
@@ -82,21 +83,26 @@ let bus_interaction: expr, expr[], expr -> () = constr |id, tuple, multiplicity|
 /// This is intended to be used as a hint in the extension field case; for the base case
 /// automatic witgen is smart enough to figure out the value of the accumulator.
 let compute_next_z: expr, expr, expr[], expr, Fp2<expr>, Fp2<expr>, Fp2<expr> -> fe[] = query |is_first, id, tuple, multiplicity, acc, alpha, beta| {
-    // Implemented as: folded = (beta - fingerprint(id, tuple...));
-    // `multiplicity / (beta - fingerprint(id, tuple...))` to `acc`
-    let folded_next = sub_ext(eval_ext(beta), fingerprint_with_id(eval(id'), array::eval(array::next(tuple)), alpha));
 
-    let m_ext = from_base(multiplicity);
-    let m_ext_next = next_ext(m_ext);
+    let m_next = eval(multiplicity');
+    let m_ext_next = from_base(m_next);
 
     let is_first_next = eval(is_first');
     let current_acc = if is_first_next == 1 {from_base(0)} else {eval_ext(acc)};
     
     // acc' = current_acc + multiplicity' / folded'
-    let res = add_ext(
-        current_acc,
-        mul_ext(eval_ext(m_ext_next), inv_ext(folded_next))
-    );
+    let res = if m_next == 0 {
+        current_acc
+    }
+    else {
+        // Implemented as: folded = (beta - fingerprint(id, tuple...));
+        // `multiplicity / (beta - fingerprint(id, tuple...))` to `acc`
+        let folded_next = sub_ext(eval_ext(beta), fingerprint_with_id2(eval(id'), array::eval(array::next(tuple)), alpha));
+        add_ext(
+            current_acc,
+            mul_ext(m_ext_next, inv_ext(folded_next))
+        )
+    };
 
     unpack_ext_array(res)
 };
diff --git a/std/protocols/fingerprint.asm b/std/protocols/fingerprint.asm
index c19bb6d634..8f9592b627 100644
--- a/std/protocols/fingerprint.asm
+++ b/std/protocols/fingerprint.asm
@@ -7,19 +7,34 @@ use std::math::fp2::pow_ext;
 use std::math::fp2::from_base;
 use std::math::fp2::eval_ext;
 use std::check::assert;
+use std::utils::fold;
+use std::prover::eval;
 
 /// Maps [x_1, x_2, ..., x_n] to its Read-Solomon fingerprint, using a challenge alpha: $\sum_{i=1}^n alpha**{(n - i)} * x_i$
 /// To generate an expression that computes the fingerprint, use `fingerprint_inter` instead.
 /// Note that alpha is passed as an expressions, so that it is only evaluated if needed (i.e., if len(expr_array) > 1).
-let fingerprint: fe[], Fp2<expr> -> Fp2<fe> = query |expr_array, alpha| if len(expr_array) == 1 {
+let fingerprint: fe[], Fp2<expr> -> Fp2<fe> = query |expr_array, alpha| {
+    fingerprint_impl(expr_array, eval_ext(alpha), len(expr_array))
+};
+
+let fingerprint_impl: fe[], Fp2<fe>, int -> Fp2<fe> = query |expr_array, alpha, l| if l == 1 {
     // Base case
     from_base(expr_array[0])
 } else {
-    assert(len(expr_array) > 1, || "fingerprint requires at least one element");
 
     // Recursively compute the fingerprint as fingerprint(expr_array[:-1], alpha) * alpha + expr_array[-1]
-    let intermediate_fingerprint = fingerprint(array::sub_array(expr_array, 0, len(expr_array) - 1), alpha);
-    add_ext(mul_ext(eval_ext(alpha), intermediate_fingerprint), from_base(expr_array[len(expr_array) - 1]))
+    let intermediate_fingerprint = fingerprint_impl(expr_array, alpha, l - 1);
+    add_ext(mul_ext(alpha, intermediate_fingerprint), from_base(expr_array[l - 1]))
+};
+
+let fingerprint2: fe[], Fp2<expr> -> Fp2<fe> = query |expr_array, alpha| {
+    let n = len(expr_array);
+    fold(
+        n,
+        |i| if expr_array[i] == 0 {from_base(0)} else {mul_ext(pow_ext(eval_ext(alpha), n - i - 1), from_base(expr_array[i]))},
+        from_base(0),
+        |sum_acc, el| add_ext(sum_acc, el)
+    )
 };
 
 /// Like `fingerprint`, but "materializes" the intermediate results as intermediate columns.
@@ -43,6 +58,7 @@ let fingerprint_inter: expr[], Fp2<expr> -> Fp2<expr> = |expr_array, alpha| if l
 
 /// Maps [id, x_1, x_2, ..., x_n] to its Read-Solomon fingerprint, using a challenge alpha: $\sum_{i=1}^n alpha**{(n - i)} * x_i$
 let fingerprint_with_id: fe, fe[], Fp2<expr> -> Fp2<fe> = query |id, expr_array, alpha| fingerprint([id] + expr_array, alpha);
+let fingerprint_with_id2: fe, fe[], Fp2<expr> -> Fp2<fe> = query |id, expr_array, alpha| fingerprint2([id] + expr_array, alpha);
 
 /// Maps [id, x_1, x_2, ..., x_n] to its Read-Solomon fingerprint, using a challenge alpha: $\sum_{i=1}^n alpha**{(n - i)} * x_i$
 let fingerprint_with_id_inter: expr, expr[], Fp2<expr> -> Fp2<expr> = |id, expr_array, alpha| fingerprint_inter([id] + expr_array, alpha);