From 1995f21d38a6c05b49c99486c819b7ed59918046 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Tue, 12 Nov 2024 15:09:13 +0200
Subject: [PATCH 1/5] Sketch state root hash field in pruning logs

---
 ...b71ef6258e011f424c705d56c9827e8277c84.json |  44 ++++++++
 ...dc766262b5bdc58be0444e164b1bd9feed02d.json |  38 -------
 ...5393ec02458104c483a2023b24881ae0c6716.json |  23 ----
 ...d_batch_root_hash_to_pruning_logs.down.sql |   2 +
 ...add_batch_root_hash_to_pruning_logs.up.sql |   3 +
 core/lib/dal/src/consensus_dal/mod.rs         |   4 +-
 core/lib/dal/src/pruning_dal/mod.rs           | 106 +++++++++++++-----
 core/lib/dal/src/pruning_dal/tests.rs         |  59 +++++-----
 .../api_server/src/execution_sandbox/mod.rs   |  10 +-
 core/node/db_pruner/src/lib.rs                |  62 +++++-----
 core/node/db_pruner/src/tests.rs              |  64 ++++-------
 core/node/metadata_calculator/src/pruning.rs  |   4 +-
 .../metadata_calculator/src/recovery/mod.rs   |  27 +++--
 core/node/metadata_calculator/src/tests.rs    |   2 +-
 core/node/metadata_calculator/src/updater.rs  |   2 +-
 15 files changed, 230 insertions(+), 220 deletions(-)
 create mode 100644 core/lib/dal/.sqlx/query-8bab3c429fc5bbf4a91819e55cfb71ef6258e011f424c705d56c9827e8277c84.json
 delete mode 100644 core/lib/dal/.sqlx/query-c9a842d04e8b225e43f07f76541dc766262b5bdc58be0444e164b1bd9feed02d.json
 delete mode 100644 core/lib/dal/.sqlx/query-cf3c7b918a3f82476543841d4dc5393ec02458104c483a2023b24881ae0c6716.json
 create mode 100644 core/lib/dal/migrations/20241112120944_add_batch_root_hash_to_pruning_logs.down.sql
 create mode 100644 core/lib/dal/migrations/20241112120944_add_batch_root_hash_to_pruning_logs.up.sql

diff --git a/core/lib/dal/.sqlx/query-8bab3c429fc5bbf4a91819e55cfb71ef6258e011f424c705d56c9827e8277c84.json b/core/lib/dal/.sqlx/query-8bab3c429fc5bbf4a91819e55cfb71ef6258e011f424c705d56c9827e8277c84.json
new file mode 100644
index 000000000000..0132af8fbe6c
--- /dev/null
+++ b/core/lib/dal/.sqlx/query-8bab3c429fc5bbf4a91819e55cfb71ef6258e011f424c705d56c9827e8277c84.json
@@ -0,0 +1,44 @@
+{
+  "db_name": "PostgreSQL",
+  "query": "\n            WITH\n            soft AS (\n                SELECT\n                    pruned_l1_batch,\n                    pruned_miniblock\n                FROM\n                    pruning_log\n                WHERE\n                    type = 'Soft'\n                ORDER BY\n                    pruned_l1_batch DESC\n                LIMIT\n                    1\n            ),\n            \n            hard AS (\n                SELECT\n                    pruned_l1_batch,\n                    pruned_miniblock,\n                    pruned_l1_batch_root_hash\n                FROM\n                    pruning_log\n                WHERE\n                    type = 'Hard'\n                ORDER BY\n                    pruned_l1_batch DESC\n                LIMIT\n                    1\n            )\n            \n            SELECT\n                soft.pruned_l1_batch AS last_soft_pruned_l1_batch,\n                soft.pruned_miniblock AS last_soft_pruned_l2_block,\n                hard.pruned_l1_batch AS last_hard_pruned_l1_batch,\n                hard.pruned_miniblock AS last_hard_pruned_l2_block,\n                hard.pruned_l1_batch_root_hash AS last_hard_pruned_batch_root_hash\n            FROM\n                soft\n            FULL JOIN hard ON TRUE\n            ",
+  "describe": {
+    "columns": [
+      {
+        "ordinal": 0,
+        "name": "last_soft_pruned_l1_batch",
+        "type_info": "Int8"
+      },
+      {
+        "ordinal": 1,
+        "name": "last_soft_pruned_l2_block",
+        "type_info": "Int8"
+      },
+      {
+        "ordinal": 2,
+        "name": "last_hard_pruned_l1_batch",
+        "type_info": "Int8"
+      },
+      {
+        "ordinal": 3,
+        "name": "last_hard_pruned_l2_block",
+        "type_info": "Int8"
+      },
+      {
+        "ordinal": 4,
+        "name": "last_hard_pruned_batch_root_hash",
+        "type_info": "Bytea"
+      }
+    ],
+    "parameters": {
+      "Left": []
+    },
+    "nullable": [
+      true,
+      true,
+      true,
+      true,
+      true
+    ]
+  },
+  "hash": "8bab3c429fc5bbf4a91819e55cfb71ef6258e011f424c705d56c9827e8277c84"
+}
diff --git a/core/lib/dal/.sqlx/query-c9a842d04e8b225e43f07f76541dc766262b5bdc58be0444e164b1bd9feed02d.json b/core/lib/dal/.sqlx/query-c9a842d04e8b225e43f07f76541dc766262b5bdc58be0444e164b1bd9feed02d.json
deleted file mode 100644
index fb28539ccdf6..000000000000
--- a/core/lib/dal/.sqlx/query-c9a842d04e8b225e43f07f76541dc766262b5bdc58be0444e164b1bd9feed02d.json
+++ /dev/null
@@ -1,38 +0,0 @@
-{
-  "db_name": "PostgreSQL",
-  "query": "\n            WITH\n            soft AS (\n                SELECT\n                    pruned_l1_batch,\n                    pruned_miniblock\n                FROM\n                    pruning_log\n                WHERE\n                    type = 'Soft'\n                ORDER BY\n                    pruned_l1_batch DESC\n                LIMIT\n                    1\n            ),\n            \n            hard AS (\n                SELECT\n                    pruned_l1_batch,\n                    pruned_miniblock\n                FROM\n                    pruning_log\n                WHERE\n                    type = 'Hard'\n                ORDER BY\n                    pruned_l1_batch DESC\n                LIMIT\n                    1\n            )\n            \n            SELECT\n                soft.pruned_l1_batch AS last_soft_pruned_l1_batch,\n                soft.pruned_miniblock AS last_soft_pruned_miniblock,\n                hard.pruned_l1_batch AS last_hard_pruned_l1_batch,\n                hard.pruned_miniblock AS last_hard_pruned_miniblock\n            FROM\n                soft\n            FULL JOIN hard ON TRUE\n            ",
-  "describe": {
-    "columns": [
-      {
-        "ordinal": 0,
-        "name": "last_soft_pruned_l1_batch",
-        "type_info": "Int8"
-      },
-      {
-        "ordinal": 1,
-        "name": "last_soft_pruned_miniblock",
-        "type_info": "Int8"
-      },
-      {
-        "ordinal": 2,
-        "name": "last_hard_pruned_l1_batch",
-        "type_info": "Int8"
-      },
-      {
-        "ordinal": 3,
-        "name": "last_hard_pruned_miniblock",
-        "type_info": "Int8"
-      }
-    ],
-    "parameters": {
-      "Left": []
-    },
-    "nullable": [
-      true,
-      true,
-      true,
-      true
-    ]
-  },
-  "hash": "c9a842d04e8b225e43f07f76541dc766262b5bdc58be0444e164b1bd9feed02d"
-}
diff --git a/core/lib/dal/.sqlx/query-cf3c7b918a3f82476543841d4dc5393ec02458104c483a2023b24881ae0c6716.json b/core/lib/dal/.sqlx/query-cf3c7b918a3f82476543841d4dc5393ec02458104c483a2023b24881ae0c6716.json
deleted file mode 100644
index 59bfa4858c02..000000000000
--- a/core/lib/dal/.sqlx/query-cf3c7b918a3f82476543841d4dc5393ec02458104c483a2023b24881ae0c6716.json
+++ /dev/null
@@ -1,23 +0,0 @@
-{
-  "db_name": "PostgreSQL",
-  "query": "\n            SELECT\n                COUNT(*) AS \"count!\"\n            FROM\n                (\n                    SELECT\n                        *\n                    FROM\n                        storage_logs\n                    WHERE\n                        hashed_key = $1\n                        AND miniblock_number <= COALESCE(\n                            (\n                                SELECT\n                                    MAX(number)\n                                FROM\n                                    miniblocks\n                            ),\n                            (\n                                SELECT\n                                    miniblock_number\n                                FROM\n                                    snapshot_recovery\n                            )\n                        )\n                    ORDER BY\n                        miniblock_number DESC,\n                        operation_number DESC\n                    LIMIT\n                        1\n                ) sl\n            WHERE\n                sl.value != $2\n            ",
-  "describe": {
-    "columns": [
-      {
-        "ordinal": 0,
-        "name": "count!",
-        "type_info": "Int8"
-      }
-    ],
-    "parameters": {
-      "Left": [
-        "Bytea",
-        "Bytea"
-      ]
-    },
-    "nullable": [
-      null
-    ]
-  },
-  "hash": "cf3c7b918a3f82476543841d4dc5393ec02458104c483a2023b24881ae0c6716"
-}
diff --git a/core/lib/dal/migrations/20241112120944_add_batch_root_hash_to_pruning_logs.down.sql b/core/lib/dal/migrations/20241112120944_add_batch_root_hash_to_pruning_logs.down.sql
new file mode 100644
index 000000000000..d6beeb80e8a5
--- /dev/null
+++ b/core/lib/dal/migrations/20241112120944_add_batch_root_hash_to_pruning_logs.down.sql
@@ -0,0 +1,2 @@
+ALTER TABLE pruning_log
+    DROP COLUMN pruned_l1_batch_root_hash;
diff --git a/core/lib/dal/migrations/20241112120944_add_batch_root_hash_to_pruning_logs.up.sql b/core/lib/dal/migrations/20241112120944_add_batch_root_hash_to_pruning_logs.up.sql
new file mode 100644
index 000000000000..6a990781f61c
--- /dev/null
+++ b/core/lib/dal/migrations/20241112120944_add_batch_root_hash_to_pruning_logs.up.sql
@@ -0,0 +1,3 @@
+-- nullable for backward compatibility
+ALTER TABLE pruning_log
+    ADD COLUMN pruned_l1_batch_root_hash BYTEA DEFAULT NULL;
diff --git a/core/lib/dal/src/consensus_dal/mod.rs b/core/lib/dal/src/consensus_dal/mod.rs
index a091421d857c..7f3bcd1166ad 100644
--- a/core/lib/dal/src/consensus_dal/mod.rs
+++ b/core/lib/dal/src/consensus_dal/mod.rs
@@ -301,10 +301,10 @@ impl ConsensusDal<'_, '_> {
             .get_pruning_info()
             .await
             .context("get_pruning_info()")?;
-        Ok(match info.last_soft_pruned_l2_block {
+        Ok(match info.last_soft_pruned {
             // It is guaranteed that pruning info values are set for storage recovered from
             // snapshot, even if pruning was not enabled.
-            Some(last_pruned) => validator::BlockNumber(last_pruned.0.into()) + 1,
+            Some(last_pruned) => validator::BlockNumber(last_pruned.l2_block.0.into()) + 1,
             // No snapshot and no pruning:
             None => validator::BlockNumber(0),
         })
diff --git a/core/lib/dal/src/pruning_dal/mod.rs b/core/lib/dal/src/pruning_dal/mod.rs
index bcd9fdcfc3e1..9fe7b9cb570d 100644
--- a/core/lib/dal/src/pruning_dal/mod.rs
+++ b/core/lib/dal/src/pruning_dal/mod.rs
@@ -1,25 +1,43 @@
 use std::ops;
 
 use zksync_db_connection::{connection::Connection, error::DalResult, instrument::InstrumentExt};
-use zksync_types::{L1BatchNumber, L2BlockNumber};
+use zksync_types::{L1BatchNumber, L2BlockNumber, H256};
 
 use crate::Core;
 
 #[cfg(test)]
 mod tests;
 
-#[derive(Debug)]
-pub struct PruningDal<'a, 'c> {
-    pub(crate) storage: &'a mut Connection<'c, Core>,
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct SoftPruningInfo {
+    pub l1_batch: L1BatchNumber,
+    pub l2_block: L2BlockNumber,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct HardPruningInfo {
+    pub l1_batch: L1BatchNumber,
+    pub l2_block: L2BlockNumber,
+    /// May be set to `None` for old pruning logs.
+    pub l1_batch_root_hash: Option<H256>,
 }
 
 /// Information about Postgres pruning.
 #[derive(Debug, Clone, Copy, Default, PartialEq)]
 pub struct PruningInfo {
-    pub last_soft_pruned_l1_batch: Option<L1BatchNumber>,
-    pub last_soft_pruned_l2_block: Option<L2BlockNumber>,
-    pub last_hard_pruned_l1_batch: Option<L1BatchNumber>,
-    pub last_hard_pruned_l2_block: Option<L2BlockNumber>,
+    /// Information about last soft pruning. Soft pruning is expected to be ahead or equal to hard pruning.
+    pub last_soft_pruned: Option<SoftPruningInfo>,
+    /// Information about last hard pruning.
+    pub last_hard_pruned: Option<HardPruningInfo>,
+}
+
+impl PruningInfo {
+    /// Returns `true` iff pruning is caught up, i.e., all soft-pruned data is hard-pruned.
+    pub fn is_caught_up(&self) -> bool {
+        let soft_pruned_l1_batch = self.last_soft_pruned.map(|info| info.l1_batch);
+        let hard_pruned_l1_batch = self.last_hard_pruned.map(|info| info.l1_batch);
+        soft_pruned_l1_batch == hard_pruned_l1_batch
+    }
 }
 
 /// Statistics about a single hard pruning iteration.
@@ -33,6 +51,44 @@ pub struct HardPruningStats {
     pub deleted_l2_to_l1_logs: u64,
 }
 
+#[derive(Debug)]
+struct StoragePruningInfo {
+    last_soft_pruned_l1_batch: Option<i64>,
+    last_soft_pruned_l2_block: Option<i64>,
+    last_hard_pruned_l1_batch: Option<i64>,
+    last_hard_pruned_l2_block: Option<i64>,
+    last_hard_pruned_batch_root_hash: Option<Vec<u8>>,
+}
+
+impl StoragePruningInfo {
+    fn as_soft(&self) -> Option<SoftPruningInfo> {
+        Some(SoftPruningInfo {
+            l1_batch: L1BatchNumber(self.last_soft_pruned_l1_batch? as u32),
+            l2_block: L2BlockNumber(self.last_soft_pruned_l2_block? as u32),
+        })
+    }
+
+    fn as_hard(&self) -> Option<HardPruningInfo> {
+        Some(HardPruningInfo {
+            l1_batch: L1BatchNumber(self.last_hard_pruned_l1_batch? as u32),
+            l2_block: L2BlockNumber(self.last_hard_pruned_l2_block? as u32),
+            l1_batch_root_hash: self
+                .last_hard_pruned_batch_root_hash
+                .as_deref()
+                .map(H256::from_slice),
+        })
+    }
+}
+
+impl From<StoragePruningInfo> for PruningInfo {
+    fn from(row: StoragePruningInfo) -> Self {
+        Self {
+            last_soft_pruned: row.as_soft(),
+            last_hard_pruned: row.as_hard(),
+        }
+    }
+}
+
 #[derive(Debug, sqlx::Type)]
 #[sqlx(type_name = "prune_type")]
 enum PruneType {
@@ -40,9 +96,15 @@ enum PruneType {
     Hard,
 }
 
+#[derive(Debug)]
+pub struct PruningDal<'a, 'c> {
+    pub(crate) storage: &'a mut Connection<'c, Core>,
+}
+
 impl PruningDal<'_, '_> {
     pub async fn get_pruning_info(&mut self) -> DalResult<PruningInfo> {
-        let pruning_info = sqlx::query!(
+        let row = sqlx::query_as!(
+            StoragePruningInfo,
             r#"
             WITH
             soft AS (
@@ -62,7 +124,8 @@ impl PruningDal<'_, '_> {
             hard AS (
                 SELECT
                     pruned_l1_batch,
-                    pruned_miniblock
+                    pruned_miniblock,
+                    pruned_l1_batch_root_hash
                 FROM
                     pruning_log
                 WHERE
@@ -75,33 +138,21 @@ impl PruningDal<'_, '_> {
             
             SELECT
                 soft.pruned_l1_batch AS last_soft_pruned_l1_batch,
-                soft.pruned_miniblock AS last_soft_pruned_miniblock,
+                soft.pruned_miniblock AS last_soft_pruned_l2_block,
                 hard.pruned_l1_batch AS last_hard_pruned_l1_batch,
-                hard.pruned_miniblock AS last_hard_pruned_miniblock
+                hard.pruned_miniblock AS last_hard_pruned_l2_block,
+                hard.pruned_l1_batch_root_hash AS last_hard_pruned_batch_root_hash
             FROM
                 soft
             FULL JOIN hard ON TRUE
             "#
         )
-        .map(|row| PruningInfo {
-            last_soft_pruned_l1_batch: row
-                .last_soft_pruned_l1_batch
-                .map(|num| L1BatchNumber(num as u32)),
-            last_soft_pruned_l2_block: row
-                .last_soft_pruned_miniblock
-                .map(|num| L2BlockNumber(num as u32)),
-            last_hard_pruned_l1_batch: row
-                .last_hard_pruned_l1_batch
-                .map(|num| L1BatchNumber(num as u32)),
-            last_hard_pruned_l2_block: row
-                .last_hard_pruned_miniblock
-                .map(|num| L2BlockNumber(num as u32)),
-        })
         .instrument("get_last_soft_pruned_batch")
         .report_latency()
         .fetch_optional(self.storage)
         .await?;
-        Ok(pruning_info.unwrap_or_default())
+
+        Ok(row.map(PruningInfo::from).unwrap_or_default())
     }
 
     pub async fn soft_prune_batches_range(
@@ -389,6 +440,7 @@ impl PruningDal<'_, '_> {
         Ok(execution_result.rows_affected())
     }
 
+    // FIXME: record removed L1 batch root hash
     async fn insert_hard_pruning_log(
         &mut self,
         last_l1_batch_to_prune: L1BatchNumber,
diff --git a/core/lib/dal/src/pruning_dal/tests.rs b/core/lib/dal/src/pruning_dal/tests.rs
index 70dda48d8c82..9f7310d54735 100644
--- a/core/lib/dal/src/pruning_dal/tests.rs
+++ b/core/lib/dal/src/pruning_dal/tests.rs
@@ -204,10 +204,8 @@ async fn soft_pruning_works() {
 
     assert_eq!(
         PruningInfo {
-            last_soft_pruned_l2_block: None,
-            last_soft_pruned_l1_batch: None,
-            last_hard_pruned_l2_block: None,
-            last_hard_pruned_l1_batch: None
+            last_soft_pruned: None,
+            last_hard_pruned: None,
         },
         transaction.pruning_dal().get_pruning_info().await.unwrap()
     );
@@ -219,10 +217,11 @@ async fn soft_pruning_works() {
         .unwrap();
     assert_eq!(
         PruningInfo {
-            last_soft_pruned_l2_block: Some(L2BlockNumber(11)),
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(5)),
-            last_hard_pruned_l2_block: None,
-            last_hard_pruned_l1_batch: None
+            last_soft_pruned: Some(SoftPruningInfo {
+                l2_block: L2BlockNumber(11),
+                l1_batch: L1BatchNumber(5),
+            }),
+            last_hard_pruned: None,
         },
         transaction.pruning_dal().get_pruning_info().await.unwrap()
     );
@@ -234,10 +233,11 @@ async fn soft_pruning_works() {
         .unwrap();
     assert_eq!(
         PruningInfo {
-            last_soft_pruned_l2_block: Some(L2BlockNumber(21)),
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(10)),
-            last_hard_pruned_l2_block: None,
-            last_hard_pruned_l1_batch: None
+            last_soft_pruned: Some(SoftPruningInfo {
+                l2_block: L2BlockNumber(21),
+                l1_batch: L1BatchNumber(10),
+            }),
+            last_hard_pruned: None,
         },
         transaction.pruning_dal().get_pruning_info().await.unwrap()
     );
@@ -249,10 +249,15 @@ async fn soft_pruning_works() {
         .unwrap();
     assert_eq!(
         PruningInfo {
-            last_soft_pruned_l2_block: Some(L2BlockNumber(21)),
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(10)),
-            last_hard_pruned_l2_block: Some(L2BlockNumber(21)),
-            last_hard_pruned_l1_batch: Some(L1BatchNumber(10))
+            last_soft_pruned: Some(SoftPruningInfo {
+                l2_block: L2BlockNumber(21),
+                l1_batch: L1BatchNumber(10),
+            }),
+            last_hard_pruned: Some(HardPruningInfo {
+                l2_block: L2BlockNumber(21),
+                l1_batch: L1BatchNumber(10),
+                l1_batch_root_hash: None, // FIXME
+            }),
         },
         transaction.pruning_dal().get_pruning_info().await.unwrap()
     );
@@ -399,7 +404,7 @@ async fn l1_batches_can_be_hard_pruned() {
         .get_pruning_info()
         .await
         .unwrap()
-        .last_hard_pruned_l1_batch
+        .last_hard_pruned
         .is_none());
 
     transaction
@@ -410,14 +415,10 @@ async fn l1_batches_can_be_hard_pruned() {
 
     assert_l1_batch_objects_dont_exist(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(5)).await;
     assert_l1_batch_objects_exists(&mut transaction, L1BatchNumber(6)..=L1BatchNumber(10)).await;
+    let pruning_info = transaction.pruning_dal().get_pruning_info().await.unwrap();
     assert_eq!(
-        Some(L1BatchNumber(5)),
-        transaction
-            .pruning_dal()
-            .get_pruning_info()
-            .await
-            .unwrap()
-            .last_hard_pruned_l1_batch
+        L1BatchNumber(5),
+        pruning_info.last_hard_pruned.unwrap().l1_batch
     );
 
     let stats = transaction
@@ -432,14 +433,10 @@ async fn l1_batches_can_be_hard_pruned() {
 
     assert_l1_batch_objects_dont_exist(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(10))
         .await;
+    let pruning_info = transaction.pruning_dal().get_pruning_info().await.unwrap();
     assert_eq!(
-        Some(L1BatchNumber(10)),
-        transaction
-            .pruning_dal()
-            .get_pruning_info()
-            .await
-            .unwrap()
-            .last_hard_pruned_l1_batch
+        L1BatchNumber(10),
+        pruning_info.last_hard_pruned.unwrap().l1_batch
     );
 }
 
diff --git a/core/node/api_server/src/execution_sandbox/mod.rs b/core/node/api_server/src/execution_sandbox/mod.rs
index b560d161ab52..bcba200f5ebc 100644
--- a/core/node/api_server/src/execution_sandbox/mod.rs
+++ b/core/node/api_server/src/execution_sandbox/mod.rs
@@ -228,9 +228,8 @@ impl BlockStartInfo {
         storage: &mut Connection<'_, Core>,
     ) -> anyhow::Result<L2BlockNumber> {
         let cached_pruning_info = self.get_pruning_info(storage).await?;
-        let last_block = cached_pruning_info.last_soft_pruned_l2_block;
-        if let Some(L2BlockNumber(last_block)) = last_block {
-            return Ok(L2BlockNumber(last_block + 1));
+        if let Some(pruned) = cached_pruning_info.last_soft_pruned {
+            return Ok(pruned.l2_block + 1);
         }
         Ok(L2BlockNumber(0))
     }
@@ -240,9 +239,8 @@ impl BlockStartInfo {
         storage: &mut Connection<'_, Core>,
     ) -> anyhow::Result<L1BatchNumber> {
         let cached_pruning_info = self.get_pruning_info(storage).await?;
-        let last_batch = cached_pruning_info.last_soft_pruned_l1_batch;
-        if let Some(L1BatchNumber(last_block)) = last_batch {
-            return Ok(L1BatchNumber(last_block + 1));
+        if let Some(pruned) = cached_pruning_info.last_soft_pruned {
+            return Ok(pruned.l1_batch + 1);
         }
         Ok(L1BatchNumber(0))
     }
diff --git a/core/node/db_pruner/src/lib.rs b/core/node/db_pruner/src/lib.rs
index 4b4a53c68aa0..b94757d93523 100644
--- a/core/node/db_pruner/src/lib.rs
+++ b/core/node/db_pruner/src/lib.rs
@@ -8,7 +8,10 @@ use std::{
 use anyhow::Context as _;
 use serde::{Deserialize, Serialize};
 use tokio::sync::watch;
-use zksync_dal::{pruning_dal::PruningInfo, Connection, ConnectionPool, Core, CoreDal};
+use zksync_dal::{
+    pruning_dal::{HardPruningInfo, PruningInfo, SoftPruningInfo},
+    Connection, ConnectionPool, Core, CoreDal,
+};
 use zksync_health_check::{Health, HealthStatus, HealthUpdater, ReactiveHealthCheck};
 use zksync_types::{L1BatchNumber, L2BlockNumber};
 
@@ -53,10 +56,10 @@ struct DbPrunerHealth {
 impl From<PruningInfo> for DbPrunerHealth {
     fn from(info: PruningInfo) -> Self {
         Self {
-            last_soft_pruned_l1_batch: info.last_soft_pruned_l1_batch,
-            last_soft_pruned_l2_block: info.last_soft_pruned_l2_block,
-            last_hard_pruned_l1_batch: info.last_hard_pruned_l1_batch,
-            last_hard_pruned_l2_block: info.last_hard_pruned_l2_block,
+            last_soft_pruned_l1_batch: info.last_soft_pruned.map(|info| info.l1_batch),
+            last_soft_pruned_l2_block: info.last_soft_pruned.map(|info| info.l2_block),
+            last_hard_pruned_l1_batch: info.last_hard_pruned.map(|info| info.l1_batch),
+            last_hard_pruned_l2_block: info.last_hard_pruned.map(|info| info.l2_block),
         }
     }
 }
@@ -188,13 +191,10 @@ impl DbPruner {
         let mut transaction = storage.start_transaction().await?;
 
         let mut current_pruning_info = transaction.pruning_dal().get_pruning_info().await?;
-        let next_l1_batch_to_prune = L1BatchNumber(
-            current_pruning_info
-                .last_soft_pruned_l1_batch
-                .unwrap_or(L1BatchNumber(0))
-                .0
-                + self.config.pruned_batch_chunk_size,
-        );
+        let next_l1_batch_to_prune = current_pruning_info
+            .last_soft_pruned
+            .map_or(L1BatchNumber(0), |info| info.l1_batch)
+            + self.config.pruned_batch_chunk_size;
         if !self.is_l1_batch_prunable(next_l1_batch_to_prune).await {
             METRICS.pruning_chunk_duration[&PruneType::NoOp].observe(start.elapsed());
             return Ok(false);
@@ -218,8 +218,10 @@ impl DbPruner {
             "Soft pruned db l1_batches up to {next_l1_batch_to_prune} and L2 blocks up to {next_l2_block_to_prune}, operation took {latency:?}",
         );
 
-        current_pruning_info.last_soft_pruned_l1_batch = Some(next_l1_batch_to_prune);
-        current_pruning_info.last_soft_pruned_l2_block = Some(next_l2_block_to_prune);
+        current_pruning_info.last_soft_pruned = Some(SoftPruningInfo {
+            l1_batch: next_l1_batch_to_prune,
+            l2_block: next_l2_block_to_prune,
+        });
         self.update_health(current_pruning_info);
         Ok(true)
     }
@@ -233,20 +235,15 @@ impl DbPruner {
         let mut transaction = storage.start_transaction().await?;
 
         let mut current_pruning_info = transaction.pruning_dal().get_pruning_info().await?;
-        let last_soft_pruned_l1_batch =
-            current_pruning_info.last_soft_pruned_l1_batch.with_context(|| {
-                format!("bogus pruning info {current_pruning_info:?}: trying to hard-prune data, but there is no soft-pruned L1 batch")
-            })?;
-        let last_soft_pruned_l2_block =
-            current_pruning_info.last_soft_pruned_l2_block.with_context(|| {
-                format!("bogus pruning info {current_pruning_info:?}: trying to hard-prune data, but there is no soft-pruned L2 block")
-            })?;
+        let soft_pruned = current_pruning_info.last_soft_pruned.with_context(|| {
+            format!("bogus pruning info {current_pruning_info:?}: trying to hard-prune data, but there is no soft-pruned data")
+        })?;
 
         let mut dal = transaction.pruning_dal();
         let stats = tokio::select! {
             result = dal.hard_prune_batches_range(
-                last_soft_pruned_l1_batch,
-                last_soft_pruned_l2_block,
+                soft_pruned.l1_batch,
+                soft_pruned.l2_block,
             ) => result?,
 
             _ = stop_receiver.changed() => {
@@ -261,12 +258,13 @@ impl DbPruner {
         transaction.commit().await?;
 
         let latency = latency.observe();
-        tracing::info!(
-            "Hard pruned db l1_batches up to {last_soft_pruned_l1_batch} and L2 blocks up to {last_soft_pruned_l2_block}, \
-            operation took {latency:?}"
-        );
-        current_pruning_info.last_hard_pruned_l1_batch = Some(last_soft_pruned_l1_batch);
-        current_pruning_info.last_hard_pruned_l2_block = Some(last_soft_pruned_l2_block);
+        let hard_pruning_info = HardPruningInfo {
+            l1_batch: soft_pruned.l1_batch,
+            l2_block: soft_pruned.l2_block,
+            l1_batch_root_hash: None, // FIXME
+        };
+        tracing::info!("Hard pruned data up to {hard_pruning_info:?}, operation took {latency:?}");
+        current_pruning_info.last_hard_pruned = Some(hard_pruning_info);
         self.update_health(current_pruning_info);
         Ok(PruningIterationOutcome::Pruned)
     }
@@ -280,9 +278,7 @@ impl DbPruner {
         self.update_health(current_pruning_info);
 
         // If this `if` is not entered, it means that the node has restarted after soft pruning
-        if current_pruning_info.last_soft_pruned_l1_batch
-            == current_pruning_info.last_hard_pruned_l1_batch
-        {
+        if current_pruning_info.is_caught_up() {
             let pruning_done = self.soft_prune(&mut storage).await?;
             if !pruning_done {
                 return Ok(PruningIterationOutcome::NoOp);
diff --git a/core/node/db_pruner/src/tests.rs b/core/node/db_pruner/src/tests.rs
index 99fbada423dc..59eb9df8f202 100644
--- a/core/node/db_pruner/src/tests.rs
+++ b/core/node/db_pruner/src/tests.rs
@@ -167,17 +167,27 @@ async fn hard_pruning_ignores_conditions_checks() {
         .unwrap();
 
     assert_eq!(
-        PruningInfo {
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(2)),
-            last_soft_pruned_l2_block: Some(L2BlockNumber(5)),
-            last_hard_pruned_l1_batch: Some(L1BatchNumber(2)),
-            last_hard_pruned_l2_block: Some(L2BlockNumber(5)),
-        },
+        test_pruning_info(2, 5),
         conn.pruning_dal().get_pruning_info().await.unwrap()
     );
     let health = health_check.check_health().await;
     assert_matches!(health.status(), HealthStatus::Ready);
 }
+
+fn test_pruning_info(l1_batch: u32, l2_block: u32) -> PruningInfo {
+    PruningInfo {
+        last_soft_pruned: Some(SoftPruningInfo {
+            l1_batch: L1BatchNumber(l1_batch),
+            l2_block: L2BlockNumber(l2_block),
+        }),
+        last_hard_pruned: Some(HardPruningInfo {
+            l1_batch: L1BatchNumber(l1_batch),
+            l2_block: L2BlockNumber(l2_block),
+            l1_batch_root_hash: None, // FIXME
+        }),
+    }
+}
+
 #[test(tokio::test)]
 async fn pruner_catches_up_with_hard_pruning_up_to_soft_pruning_boundary_ignoring_chunk_size() {
     let pool = ConnectionPool::<Core>::test_pool().await;
@@ -205,12 +215,7 @@ async fn pruner_catches_up_with_hard_pruning_up_to_soft_pruning_boundary_ignorin
         .unwrap();
 
     assert_eq!(
-        PruningInfo {
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(2)),
-            last_soft_pruned_l2_block: Some(L2BlockNumber(5)),
-            last_hard_pruned_l1_batch: Some(L1BatchNumber(2)),
-            last_hard_pruned_l2_block: Some(L2BlockNumber(5)),
-        },
+        test_pruning_info(2, 5),
         conn.pruning_dal().get_pruning_info().await.unwrap()
     );
 
@@ -219,12 +224,7 @@ async fn pruner_catches_up_with_hard_pruning_up_to_soft_pruning_boundary_ignorin
         .await
         .unwrap();
     assert_eq!(
-        PruningInfo {
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(7)),
-            last_soft_pruned_l2_block: Some(L2BlockNumber(15)),
-            last_hard_pruned_l1_batch: Some(L1BatchNumber(7)),
-            last_hard_pruned_l2_block: Some(L2BlockNumber(15)),
-        },
+        test_pruning_info(7, 15),
         conn.pruning_dal().get_pruning_info().await.unwrap()
     );
 }
@@ -253,12 +253,7 @@ async fn unconstrained_pruner_with_fresh_database() {
         .unwrap();
 
     assert_eq!(
-        PruningInfo {
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(3)),
-            last_soft_pruned_l2_block: Some(L2BlockNumber(7)),
-            last_hard_pruned_l1_batch: Some(L1BatchNumber(3)),
-            last_hard_pruned_l2_block: Some(L2BlockNumber(7)),
-        },
+        test_pruning_info(3, 7),
         conn.pruning_dal().get_pruning_info().await.unwrap()
     );
 
@@ -267,12 +262,7 @@ async fn unconstrained_pruner_with_fresh_database() {
         .await
         .unwrap();
     assert_eq!(
-        PruningInfo {
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(6)),
-            last_soft_pruned_l2_block: Some(L2BlockNumber(13)),
-            last_hard_pruned_l1_batch: Some(L1BatchNumber(6)),
-            last_hard_pruned_l2_block: Some(L2BlockNumber(13)),
-        },
+        test_pruning_info(6, 13),
         conn.pruning_dal().get_pruning_info().await.unwrap()
     );
 }
@@ -302,12 +292,7 @@ async fn pruning_blocked_after_first_chunk() {
         .unwrap();
 
     assert_eq!(
-        PruningInfo {
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(3)),
-            last_soft_pruned_l2_block: Some(L2BlockNumber(7)),
-            last_hard_pruned_l1_batch: Some(L1BatchNumber(3)),
-            last_hard_pruned_l2_block: Some(L2BlockNumber(7)),
-        },
+        test_pruning_info(3, 7),
         conn.pruning_dal().get_pruning_info().await.unwrap()
     );
 
@@ -318,12 +303,7 @@ async fn pruning_blocked_after_first_chunk() {
     assert_matches!(outcome, PruningIterationOutcome::NoOp);
     // pruning shouldn't have progressed as chunk 6 cannot be pruned
     assert_eq!(
-        PruningInfo {
-            last_soft_pruned_l1_batch: Some(L1BatchNumber(3)),
-            last_soft_pruned_l2_block: Some(L2BlockNumber(7)),
-            last_hard_pruned_l1_batch: Some(L1BatchNumber(3)),
-            last_hard_pruned_l2_block: Some(L2BlockNumber(7)),
-        },
+        test_pruning_info(3, 7),
         conn.pruning_dal().get_pruning_info().await.unwrap()
     );
 }
diff --git a/core/node/metadata_calculator/src/pruning.rs b/core/node/metadata_calculator/src/pruning.rs
index 4ac05e55c302..1c6d6aa4e795 100644
--- a/core/node/metadata_calculator/src/pruning.rs
+++ b/core/node/metadata_calculator/src/pruning.rs
@@ -101,8 +101,8 @@ impl MerkleTreePruningTask {
             let pruning_info = storage.pruning_dal().get_pruning_info().await?;
             drop(storage);
 
-            if let Some(l1_batch_number) = pruning_info.last_hard_pruned_l1_batch {
-                let target_retained_l1_batch_number = l1_batch_number + 1;
+            if let Some(pruned) = pruning_info.last_hard_pruned {
+                let target_retained_l1_batch_number = pruned.l1_batch + 1;
                 let target_retained_version = u64::from(target_retained_l1_batch_number.0);
                 let Ok(prev_target_version) =
                     pruner_handle.set_target_retained_version(target_retained_version)
diff --git a/core/node/metadata_calculator/src/recovery/mod.rs b/core/node/metadata_calculator/src/recovery/mod.rs
index ce7207471791..372cbc96a6df 100644
--- a/core/node/metadata_calculator/src/recovery/mod.rs
+++ b/core/node/metadata_calculator/src/recovery/mod.rs
@@ -132,7 +132,7 @@ impl InitParameters {
 
         let (l1_batch, l2_block);
         let mut expected_root_hash = None;
-        match (recovery_status, pruning_info.last_hard_pruned_l2_block) {
+        match (recovery_status, pruning_info.last_hard_pruned) {
             (Some(recovery), None) => {
                 tracing::warn!(
                     "Snapshot recovery {recovery:?} is present on the node, but pruning info is empty; assuming no pruning happened"
@@ -141,21 +141,20 @@ impl InitParameters {
                 l2_block = recovery.l2_block_number;
                 expected_root_hash = Some(recovery.l1_batch_root_hash);
             }
-            (Some(recovery), Some(pruned_l2_block)) => {
+            (Some(recovery), Some(pruned)) => {
                 // We have both recovery and some pruning on top of it.
-                l2_block = pruned_l2_block.max(recovery.l2_block_number);
-                l1_batch = pruning_info
-                    .last_hard_pruned_l1_batch
-                    .with_context(|| format!("malformed pruning info: {pruning_info:?}"))?;
-                if l1_batch == recovery.l1_batch_number {
+                l2_block = pruned.l2_block.max(recovery.l2_block_number);
+                l1_batch = pruned.l1_batch;
+                if let Some(root_hash) = pruned.l1_batch_root_hash {
+                    expected_root_hash = Some(root_hash);
+                } else if l1_batch == recovery.l1_batch_number {
                     expected_root_hash = Some(recovery.l1_batch_root_hash);
                 }
             }
-            (None, Some(pruned_l2_block)) => {
-                l2_block = pruned_l2_block;
-                l1_batch = pruning_info
-                    .last_hard_pruned_l1_batch
-                    .with_context(|| format!("malformed pruning info: {pruning_info:?}"))?;
+            (None, Some(pruned)) => {
+                l2_block = pruned.l2_block;
+                l1_batch = pruned.l1_batch;
+                expected_root_hash = pruned.l1_batch_root_hash;
             }
             (None, None) => return Ok(None),
         };
@@ -384,9 +383,9 @@ impl AsyncTreeRecovery {
         snapshot_l2_block: L2BlockNumber,
     ) -> anyhow::Result<()> {
         let pruning_info = storage.pruning_dal().get_pruning_info().await?;
-        if let Some(last_hard_pruned_l2_block) = pruning_info.last_hard_pruned_l2_block {
+        if let Some(pruned) = pruning_info.last_hard_pruned {
             anyhow::ensure!(
-                last_hard_pruned_l2_block == snapshot_l2_block,
+                pruned.l2_block == snapshot_l2_block,
                 "Additional data was pruned compared to tree recovery L2 block #{snapshot_l2_block}: {pruning_info:?}. \
                  Continuing recovery is impossible; to recover the tree, drop its RocksDB directory, stop pruning and restart recovery"
             );
diff --git a/core/node/metadata_calculator/src/tests.rs b/core/node/metadata_calculator/src/tests.rs
index 9717ce5682ce..6ea02a3013a7 100644
--- a/core/node/metadata_calculator/src/tests.rs
+++ b/core/node/metadata_calculator/src/tests.rs
@@ -696,7 +696,7 @@ async fn setup_calculator_with_options(
 ) -> MetadataCalculator {
     let mut storage = pool.connection().await.unwrap();
     let pruning_info = storage.pruning_dal().get_pruning_info().await.unwrap();
-    let has_pruning_logs = pruning_info.last_hard_pruned_l1_batch.is_some();
+    let has_pruning_logs = pruning_info.last_hard_pruned.is_some();
     if !has_pruning_logs && storage.blocks_dal().is_genesis_needed().await.unwrap() {
         insert_genesis_batch(&mut storage, &GenesisParams::mock())
             .await
diff --git a/core/node/metadata_calculator/src/updater.rs b/core/node/metadata_calculator/src/updater.rs
index 17fd5d900eab..2758bd1501c0 100644
--- a/core/node/metadata_calculator/src/updater.rs
+++ b/core/node/metadata_calculator/src/updater.rs
@@ -186,7 +186,7 @@ impl TreeUpdater {
     ) -> anyhow::Result<()> {
         let pruning_info = storage.pruning_dal().get_pruning_info().await?;
         anyhow::ensure!(
-            Some(l1_batch_number) > pruning_info.last_soft_pruned_l1_batch,
+            pruning_info.last_soft_pruned.map_or(true, |info| info.l1_batch < l1_batch_number),
             "L1 batch #{l1_batch_number}, next to be processed by the tree, is pruned; the tree cannot continue operating"
         );
         Ok(())

From 299ba3d2935acb451ab316a81544a41d61204e5d Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Tue, 12 Nov 2024 16:24:45 +0200
Subject: [PATCH 2/5] Record batch root hashes in hard pruning logs

---
 ...f30cbc385e6fb6c8b8ae2c2d06b871a4cae72.json | 27 ++++++
 core/lib/dal/src/pruning_dal/mod.rs           | 91 +++++++++++--------
 core/lib/dal/src/pruning_dal/tests.rs         | 38 ++++----
 3 files changed, 102 insertions(+), 54 deletions(-)
 create mode 100644 core/lib/dal/.sqlx/query-3785c01a8eb1eaeaf7baf0a8ba7f30cbc385e6fb6c8b8ae2c2d06b871a4cae72.json

diff --git a/core/lib/dal/.sqlx/query-3785c01a8eb1eaeaf7baf0a8ba7f30cbc385e6fb6c8b8ae2c2d06b871a4cae72.json b/core/lib/dal/.sqlx/query-3785c01a8eb1eaeaf7baf0a8ba7f30cbc385e6fb6c8b8ae2c2d06b871a4cae72.json
new file mode 100644
index 000000000000..dfaccfbc99d3
--- /dev/null
+++ b/core/lib/dal/.sqlx/query-3785c01a8eb1eaeaf7baf0a8ba7f30cbc385e6fb6c8b8ae2c2d06b871a4cae72.json
@@ -0,0 +1,27 @@
+{
+  "db_name": "PostgreSQL",
+  "query": "\n            INSERT INTO\n            pruning_log (\n                pruned_l1_batch,\n                pruned_miniblock,\n                pruned_l1_batch_root_hash,\n                type,\n                created_at,\n                updated_at\n            )\n            VALUES\n            ($1, $2, $3, $4, NOW(), NOW())\n            ",
+  "describe": {
+    "columns": [],
+    "parameters": {
+      "Left": [
+        "Int8",
+        "Int8",
+        "Bytea",
+        {
+          "Custom": {
+            "name": "prune_type",
+            "kind": {
+              "Enum": [
+                "Soft",
+                "Hard"
+              ]
+            }
+          }
+        }
+      ]
+    },
+    "nullable": []
+  },
+  "hash": "3785c01a8eb1eaeaf7baf0a8ba7f30cbc385e6fb6c8b8ae2c2d06b871a4cae72"
+}
diff --git a/core/lib/dal/src/pruning_dal/mod.rs b/core/lib/dal/src/pruning_dal/mod.rs
index 9fe7b9cb570d..0e5b9c99cc38 100644
--- a/core/lib/dal/src/pruning_dal/mod.rs
+++ b/core/lib/dal/src/pruning_dal/mod.rs
@@ -3,7 +3,7 @@ use std::ops;
 use zksync_db_connection::{connection::Connection, error::DalResult, instrument::InstrumentExt};
 use zksync_types::{L1BatchNumber, L2BlockNumber, H256};
 
-use crate::Core;
+use crate::{Core, CoreDal};
 
 #[cfg(test)]
 mod tests;
@@ -155,7 +155,7 @@ impl PruningDal<'_, '_> {
         Ok(row.map(PruningInfo::from).unwrap_or_default())
     }
 
-    pub async fn soft_prune_batches_range(
+    pub async fn insert_soft_pruning_log(
         &mut self,
         last_l1_batch_to_prune: L1BatchNumber,
         last_l2_block_to_prune: L2BlockNumber,
@@ -188,11 +188,22 @@ impl PruningDal<'_, '_> {
         Ok(())
     }
 
+    /// If the pruned L1 batch does not have a root hash present in the storage, this is a no-op.
     pub async fn hard_prune_batches_range(
         &mut self,
         last_l1_batch_to_prune: L1BatchNumber,
         last_l2_block_to_prune: L2BlockNumber,
     ) -> DalResult<HardPruningStats> {
+        let Some(last_l1_batch_root_hash) = self
+            .storage
+            .blocks_dal()
+            .get_l1_batch_state_root(last_l1_batch_to_prune)
+            .await?
+        else {
+            // Assume that pruning has already occurred.
+            return Ok(HardPruningStats::default());
+        };
+
         let row = sqlx::query!(
             r#"
             SELECT
@@ -210,42 +221,44 @@ impl PruningDal<'_, '_> {
         .fetch_one(self.storage)
         .await?;
 
-        // We don't have any L2 blocks available when recovering from a snapshot
-        let stats = if let Some(first_l2_block_to_prune) = row.first_miniblock_to_prune {
-            let first_l2_block_to_prune = L2BlockNumber(first_l2_block_to_prune as u32);
-
-            let deleted_events = self
-                .delete_events(first_l2_block_to_prune..=last_l2_block_to_prune)
-                .await?;
-            let deleted_l2_to_l1_logs = self
-                .delete_l2_to_l1_logs(first_l2_block_to_prune..=last_l2_block_to_prune)
-                .await?;
-            let deleted_call_traces = self
-                .delete_call_traces(first_l2_block_to_prune..=last_l2_block_to_prune)
-                .await?;
-            self.clear_transaction_fields(first_l2_block_to_prune..=last_l2_block_to_prune)
-                .await?;
-
-            let deleted_storage_logs = self
-                .prune_storage_logs(first_l2_block_to_prune..=last_l2_block_to_prune)
-                .await?;
-            let deleted_l1_batches = self.delete_l1_batches(last_l1_batch_to_prune).await?;
-            let deleted_l2_blocks = self.delete_l2_blocks(last_l2_block_to_prune).await?;
-
-            HardPruningStats {
-                deleted_l1_batches,
-                deleted_l2_blocks,
-                deleted_events,
-                deleted_l2_to_l1_logs,
-                deleted_call_traces,
-                deleted_storage_logs,
-            }
-        } else {
-            HardPruningStats::default()
+        let Some(first_l2_block_to_prune) = row.first_miniblock_to_prune else {
+            return Ok(HardPruningStats::default());
         };
 
-        self.insert_hard_pruning_log(last_l1_batch_to_prune, last_l2_block_to_prune)
+        let first_l2_block_to_prune = L2BlockNumber(first_l2_block_to_prune as u32);
+
+        let deleted_events = self
+            .delete_events(first_l2_block_to_prune..=last_l2_block_to_prune)
+            .await?;
+        let deleted_l2_to_l1_logs = self
+            .delete_l2_to_l1_logs(first_l2_block_to_prune..=last_l2_block_to_prune)
+            .await?;
+        let deleted_call_traces = self
+            .delete_call_traces(first_l2_block_to_prune..=last_l2_block_to_prune)
+            .await?;
+        self.clear_transaction_fields(first_l2_block_to_prune..=last_l2_block_to_prune)
+            .await?;
+
+        let deleted_storage_logs = self
+            .prune_storage_logs(first_l2_block_to_prune..=last_l2_block_to_prune)
             .await?;
+        let deleted_l1_batches = self.delete_l1_batches(last_l1_batch_to_prune).await?;
+        let deleted_l2_blocks = self.delete_l2_blocks(last_l2_block_to_prune).await?;
+
+        let stats = HardPruningStats {
+            deleted_l1_batches,
+            deleted_l2_blocks,
+            deleted_events,
+            deleted_l2_to_l1_logs,
+            deleted_call_traces,
+            deleted_storage_logs,
+        };
+        self.insert_hard_pruning_log(
+            last_l1_batch_to_prune,
+            last_l2_block_to_prune,
+            last_l1_batch_root_hash,
+        )
+        .await?;
         Ok(stats)
     }
 
@@ -440,11 +453,11 @@ impl PruningDal<'_, '_> {
         Ok(execution_result.rows_affected())
     }
 
-    // FIXME: record removed L1 batch root hash
-    async fn insert_hard_pruning_log(
+    pub async fn insert_hard_pruning_log(
         &mut self,
         last_l1_batch_to_prune: L1BatchNumber,
         last_l2_block_to_prune: L2BlockNumber,
+        last_pruned_l1_batch_root_hash: H256,
     ) -> DalResult<()> {
         sqlx::query!(
             r#"
@@ -452,15 +465,17 @@ impl PruningDal<'_, '_> {
             pruning_log (
                 pruned_l1_batch,
                 pruned_miniblock,
+                pruned_l1_batch_root_hash,
                 type,
                 created_at,
                 updated_at
             )
             VALUES
-            ($1, $2, $3, NOW(), NOW())
+            ($1, $2, $3, $4, NOW(), NOW())
             "#,
             i64::from(last_l1_batch_to_prune.0),
             i64::from(last_l2_block_to_prune.0),
+            last_pruned_l1_batch_root_hash.as_bytes(),
             PruneType::Hard as PruneType
         )
         .instrument("hard_prune_batches_range#insert_pruning_log")
diff --git a/core/lib/dal/src/pruning_dal/tests.rs b/core/lib/dal/src/pruning_dal/tests.rs
index 9f7310d54735..16f4c6d7eb3e 100644
--- a/core/lib/dal/src/pruning_dal/tests.rs
+++ b/core/lib/dal/src/pruning_dal/tests.rs
@@ -96,6 +96,13 @@ async fn insert_l1_batch(conn: &mut Connection<'_, Core>, l1_batch_number: L1Bat
         .insert_mock_l1_batch(&header)
         .await
         .unwrap();
+    conn.blocks_dal()
+        .set_l1_batch_hash(
+            l1_batch_number,
+            H256::from_low_u64_be(l1_batch_number.0.into()),
+        )
+        .await
+        .unwrap();
 }
 
 async fn insert_realistic_l1_batches(conn: &mut Connection<'_, Core>, l1_batches_count: u32) {
@@ -121,11 +128,11 @@ async fn insert_realistic_l1_batches(conn: &mut Connection<'_, Core>, l1_batches
     }
 }
 
-async fn assert_l1_batch_objects_exists(
+async fn assert_l1_batches_exist(
     conn: &mut Connection<'_, Core>,
     l1_batches_range: ops::RangeInclusive<L1BatchNumber>,
 ) {
-    for l1_batch_number in l1_batches_range.start().0..l1_batches_range.end().0 {
+    for l1_batch_number in l1_batches_range.start().0..=l1_batches_range.end().0 {
         let l1_batch_number = L1BatchNumber(l1_batch_number);
         assert!(conn
             .blocks_dal()
@@ -150,7 +157,7 @@ async fn assert_l1_batch_objects_exists(
     }
 }
 
-async fn assert_l1_batch_objects_dont_exist(
+async fn assert_l1_batches_not_exist(
     conn: &mut Connection<'_, Core>,
     l1_batches_range: ops::RangeInclusive<L1BatchNumber>,
 ) {
@@ -159,7 +166,7 @@ async fn assert_l1_batch_objects_dont_exist(
         .dump_all_storage_logs_for_tests()
         .await;
 
-    for l1_batch_number in l1_batches_range.start().0..l1_batches_range.end().0 {
+    for l1_batch_number in l1_batches_range.start().0..=l1_batches_range.end().0 {
         let l1_batch_number = L1BatchNumber(l1_batch_number);
         let mut l2_block_number = L2BlockNumber(l1_batch_number.0 * 2);
         assert!(conn
@@ -212,7 +219,7 @@ async fn soft_pruning_works() {
 
     transaction
         .pruning_dal()
-        .soft_prune_batches_range(L1BatchNumber(5), L2BlockNumber(11))
+        .insert_soft_pruning_log(L1BatchNumber(5), L2BlockNumber(11))
         .await
         .unwrap();
     assert_eq!(
@@ -228,7 +235,7 @@ async fn soft_pruning_works() {
 
     transaction
         .pruning_dal()
-        .soft_prune_batches_range(L1BatchNumber(10), L2BlockNumber(21))
+        .insert_soft_pruning_log(L1BatchNumber(10), L2BlockNumber(21))
         .await
         .unwrap();
     assert_eq!(
@@ -244,7 +251,7 @@ async fn soft_pruning_works() {
 
     transaction
         .pruning_dal()
-        .hard_prune_batches_range(L1BatchNumber(10), L2BlockNumber(21))
+        .insert_hard_pruning_log(L1BatchNumber(10), L2BlockNumber(21), H256::repeat_byte(23))
         .await
         .unwrap();
     assert_eq!(
@@ -256,7 +263,7 @@ async fn soft_pruning_works() {
             last_hard_pruned: Some(HardPruningInfo {
                 l2_block: L2BlockNumber(21),
                 l1_batch: L1BatchNumber(10),
-                l1_batch_root_hash: None, // FIXME
+                l1_batch_root_hash: Some(H256::repeat_byte(23)),
             }),
         },
         transaction.pruning_dal().get_pruning_info().await.unwrap()
@@ -367,7 +374,7 @@ async fn storage_logs_pruning_works_correctly() {
 
     let stats = transaction
         .pruning_dal()
-        .hard_prune_batches_range(L1BatchNumber(10), L2BlockNumber(21))
+        .hard_prune_batches_range(L1BatchNumber(9), L2BlockNumber(19))
         .await
         .unwrap();
     let actual_logs = transaction
@@ -398,7 +405,7 @@ async fn l1_batches_can_be_hard_pruned() {
     let mut transaction = conn.start_transaction().await.unwrap();
     insert_realistic_l1_batches(&mut transaction, 10).await;
 
-    assert_l1_batch_objects_exists(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(10)).await;
+    assert_l1_batches_exist(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(9)).await;
     assert!(transaction
         .pruning_dal()
         .get_pruning_info()
@@ -413,8 +420,8 @@ async fn l1_batches_can_be_hard_pruned() {
         .await
         .unwrap();
 
-    assert_l1_batch_objects_dont_exist(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(5)).await;
-    assert_l1_batch_objects_exists(&mut transaction, L1BatchNumber(6)..=L1BatchNumber(10)).await;
+    assert_l1_batches_not_exist(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(5)).await;
+    assert_l1_batches_exist(&mut transaction, L1BatchNumber(6)..=L1BatchNumber(9)).await;
     let pruning_info = transaction.pruning_dal().get_pruning_info().await.unwrap();
     assert_eq!(
         L1BatchNumber(5),
@@ -423,7 +430,7 @@ async fn l1_batches_can_be_hard_pruned() {
 
     let stats = transaction
         .pruning_dal()
-        .hard_prune_batches_range(L1BatchNumber(10), L2BlockNumber(21))
+        .hard_prune_batches_range(L1BatchNumber(9), L2BlockNumber(19))
         .await
         .unwrap();
     assert_eq!(stats.deleted_l1_batches, 4);
@@ -431,11 +438,10 @@ async fn l1_batches_can_be_hard_pruned() {
     assert_eq!(stats.deleted_events, 40);
     assert_eq!(stats.deleted_l2_to_l1_logs, 40);
 
-    assert_l1_batch_objects_dont_exist(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(10))
-        .await;
+    assert_l1_batches_not_exist(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(9)).await;
     let pruning_info = transaction.pruning_dal().get_pruning_info().await.unwrap();
     assert_eq!(
-        L1BatchNumber(10),
+        L1BatchNumber(9),
         pruning_info.last_hard_pruned.unwrap().l1_batch
     );
 }

From 5c601341cc520a58ff3a22d2c939a3537159e960 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Tue, 12 Nov 2024 16:36:11 +0200
Subject: [PATCH 3/5] Change `hard_prune_batches_range` semantics

---
 core/lib/dal/src/pruning_dal/mod.rs   | 20 ++------------------
 core/lib/dal/src/pruning_dal/tests.rs | 10 ----------
 2 files changed, 2 insertions(+), 28 deletions(-)

diff --git a/core/lib/dal/src/pruning_dal/mod.rs b/core/lib/dal/src/pruning_dal/mod.rs
index 0e5b9c99cc38..85127ac7030b 100644
--- a/core/lib/dal/src/pruning_dal/mod.rs
+++ b/core/lib/dal/src/pruning_dal/mod.rs
@@ -3,7 +3,7 @@ use std::ops;
 use zksync_db_connection::{connection::Connection, error::DalResult, instrument::InstrumentExt};
 use zksync_types::{L1BatchNumber, L2BlockNumber, H256};
 
-use crate::{Core, CoreDal};
+use crate::Core;
 
 #[cfg(test)]
 mod tests;
@@ -188,22 +188,12 @@ impl PruningDal<'_, '_> {
         Ok(())
     }
 
-    /// If the pruned L1 batch does not have a root hash present in the storage, this is a no-op.
+    /// Does not insert pruning logs; the caller is responsible to do this!
     pub async fn hard_prune_batches_range(
         &mut self,
         last_l1_batch_to_prune: L1BatchNumber,
         last_l2_block_to_prune: L2BlockNumber,
     ) -> DalResult<HardPruningStats> {
-        let Some(last_l1_batch_root_hash) = self
-            .storage
-            .blocks_dal()
-            .get_l1_batch_state_root(last_l1_batch_to_prune)
-            .await?
-        else {
-            // Assume that pruning has already occurred.
-            return Ok(HardPruningStats::default());
-        };
-
         let row = sqlx::query!(
             r#"
             SELECT
@@ -253,12 +243,6 @@ impl PruningDal<'_, '_> {
             deleted_call_traces,
             deleted_storage_logs,
         };
-        self.insert_hard_pruning_log(
-            last_l1_batch_to_prune,
-            last_l2_block_to_prune,
-            last_l1_batch_root_hash,
-        )
-        .await?;
         Ok(stats)
     }
 
diff --git a/core/lib/dal/src/pruning_dal/tests.rs b/core/lib/dal/src/pruning_dal/tests.rs
index 16f4c6d7eb3e..14f664a401fe 100644
--- a/core/lib/dal/src/pruning_dal/tests.rs
+++ b/core/lib/dal/src/pruning_dal/tests.rs
@@ -422,11 +422,6 @@ async fn l1_batches_can_be_hard_pruned() {
 
     assert_l1_batches_not_exist(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(5)).await;
     assert_l1_batches_exist(&mut transaction, L1BatchNumber(6)..=L1BatchNumber(9)).await;
-    let pruning_info = transaction.pruning_dal().get_pruning_info().await.unwrap();
-    assert_eq!(
-        L1BatchNumber(5),
-        pruning_info.last_hard_pruned.unwrap().l1_batch
-    );
 
     let stats = transaction
         .pruning_dal()
@@ -439,11 +434,6 @@ async fn l1_batches_can_be_hard_pruned() {
     assert_eq!(stats.deleted_l2_to_l1_logs, 40);
 
     assert_l1_batches_not_exist(&mut transaction, L1BatchNumber(1)..=L1BatchNumber(9)).await;
-    let pruning_info = transaction.pruning_dal().get_pruning_info().await.unwrap();
-    assert_eq!(
-        L1BatchNumber(9),
-        pruning_info.last_hard_pruned.unwrap().l1_batch
-    );
 }
 
 #[tokio::test]

From 551d6c657fa43ed1483b5d5d8b5daeb1fd1e6317 Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Tue, 12 Nov 2024 17:28:26 +0200
Subject: [PATCH 4/5] Update hard pruning uses

---
 core/lib/snapshots_applier/src/lib.rs         |  5 +-
 core/node/consensus/src/storage/testonly.rs   | 20 ++++--
 core/node/db_pruner/src/lib.rs                | 22 ++++++-
 core/node/db_pruner/src/tests.rs              | 48 +++++++--------
 core/node/metadata_calculator/src/pruning.rs  | 10 ++-
 .../metadata_calculator/src/recovery/tests.rs | 61 +++++++++----------
 core/node/metadata_calculator/src/tests.rs    |  7 ++-
 core/node/test_utils/src/lib.rs               | 10 +--
 8 files changed, 108 insertions(+), 75 deletions(-)

diff --git a/core/lib/snapshots_applier/src/lib.rs b/core/lib/snapshots_applier/src/lib.rs
index b4d24a0b1851..ac96c78475c5 100644
--- a/core/lib/snapshots_applier/src/lib.rs
+++ b/core/lib/snapshots_applier/src/lib.rs
@@ -725,16 +725,17 @@ impl<'a> SnapshotsApplier<'a> {
             // This allows to not deal with the corner cases when a node was recovered from a snapshot, but its pruning log is empty.
             storage_transaction
                 .pruning_dal()
-                .soft_prune_batches_range(
+                .insert_soft_pruning_log(
                     this.applied_snapshot_status.l1_batch_number,
                     this.applied_snapshot_status.l2_block_number,
                 )
                 .await?;
             storage_transaction
                 .pruning_dal()
-                .hard_prune_batches_range(
+                .insert_hard_pruning_log(
                     this.applied_snapshot_status.l1_batch_number,
                     this.applied_snapshot_status.l2_block_number,
+                    this.applied_snapshot_status.l1_batch_root_hash,
                 )
                 .await?;
         }
diff --git a/core/node/consensus/src/storage/testonly.rs b/core/node/consensus/src/storage/testonly.rs
index 0f29e2468267..295ae4fc1790 100644
--- a/core/node/consensus/src/storage/testonly.rs
+++ b/core/node/consensus/src/storage/testonly.rs
@@ -247,15 +247,20 @@ impl ConnectionPool {
             .await
             .wrap("get_l2_block_range_of_l1_batch()")?
             .context("batch not found")?;
-        let last_batch = L1BatchNumber(last_batch.0.try_into().context("oveflow")?);
-        let last_block = L2BlockNumber(last_block.0.try_into().context("oveflow")?);
+        let last_batch = L1BatchNumber(last_batch.0.try_into().context("overflow")?);
+        let last_batch_root_hash = ctx
+            .wait(conn.0.blocks_dal().get_l1_batch_state_root(last_batch))
+            .await?
+            .context("get_l1_batch_state_root()")?
+            .unwrap_or_default();
+        let last_block = L2BlockNumber(last_block.0.try_into().context("overflow")?);
         ctx.wait(
             conn.0
                 .pruning_dal()
-                .soft_prune_batches_range(last_batch, last_block),
+                .insert_soft_pruning_log(last_batch, last_block),
         )
         .await?
-        .context("soft_prune_batches_range()")?;
+        .context("insert_soft_pruning_log()")?;
         ctx.wait(
             conn.0
                 .pruning_dal()
@@ -263,6 +268,13 @@ impl ConnectionPool {
         )
         .await?
         .context("hard_prune_batches_range()")?;
+        ctx.wait(conn.0.pruning_dal().insert_hard_pruning_log(
+            last_batch,
+            last_block,
+            last_batch_root_hash,
+        ))
+        .await?
+        .context("insert_hard_pruning_log()")?;
         Ok(())
     }
 }
diff --git a/core/node/db_pruner/src/lib.rs b/core/node/db_pruner/src/lib.rs
index b94757d93523..6ec8b2ce4602 100644
--- a/core/node/db_pruner/src/lib.rs
+++ b/core/node/db_pruner/src/lib.rs
@@ -207,7 +207,7 @@ impl DbPruner {
             .with_context(|| format!("L1 batch #{next_l1_batch_to_prune} is ready to be pruned, but has no L2 blocks"))?;
         transaction
             .pruning_dal()
-            .soft_prune_batches_range(next_l1_batch_to_prune, next_l2_block_to_prune)
+            .insert_soft_pruning_log(next_l1_batch_to_prune, next_l2_block_to_prune)
             .await?;
 
         transaction.commit().await?;
@@ -239,6 +239,17 @@ impl DbPruner {
             format!("bogus pruning info {current_pruning_info:?}: trying to hard-prune data, but there is no soft-pruned data")
         })?;
 
+        let last_pruned_l1_batch_root_hash = transaction
+            .blocks_dal()
+            .get_l1_batch_state_root(soft_pruned.l1_batch)
+            .await?
+            .with_context(|| {
+                format!(
+                    "hard-pruned L1 batch #{} does not have root hash",
+                    soft_pruned.l1_batch
+                )
+            })?;
+
         let mut dal = transaction.pruning_dal();
         let stats = tokio::select! {
             result = dal.hard_prune_batches_range(
@@ -255,13 +266,20 @@ impl DbPruner {
             }
         };
         METRICS.observe_hard_pruning(stats);
+
+        dal.insert_hard_pruning_log(
+            soft_pruned.l1_batch,
+            soft_pruned.l2_block,
+            last_pruned_l1_batch_root_hash,
+        )
+        .await?;
         transaction.commit().await?;
 
         let latency = latency.observe();
         let hard_pruning_info = HardPruningInfo {
             l1_batch: soft_pruned.l1_batch,
             l2_block: soft_pruned.l2_block,
-            l1_batch_root_hash: None, // FIXME
+            l1_batch_root_hash: Some(last_pruned_l1_batch_root_hash),
         };
         tracing::info!("Hard pruned data up to {hard_pruning_info:?}, operation took {latency:?}");
         current_pruning_info.last_hard_pruned = Some(hard_pruning_info);
diff --git a/core/node/db_pruner/src/tests.rs b/core/node/db_pruner/src/tests.rs
index 59eb9df8f202..099914541fee 100644
--- a/core/node/db_pruner/src/tests.rs
+++ b/core/node/db_pruner/src/tests.rs
@@ -12,8 +12,7 @@ use zksync_node_test_utils::{
     l1_batch_metadata_to_commitment_artifacts,
 };
 use zksync_types::{
-    aggregated_operations::AggregatedActionType, block::L2BlockHeader, Address, L2BlockNumber,
-    ProtocolVersion, H256,
+    aggregated_operations::AggregatedActionType, L2BlockNumber, ProtocolVersion, H256,
 };
 
 use super::*;
@@ -95,8 +94,8 @@ async fn is_l1_batch_prunable_works() {
 
 async fn insert_l2_blocks(
     conn: &mut Connection<'_, Core>,
-    l1_batches_count: u64,
-    l2_blocks_per_batch: u64,
+    l1_batches_count: u32,
+    l2_blocks_per_batch: u32,
 ) {
     conn.protocol_versions_dal()
         .save_protocol_version_with_tx(&ProtocolVersion::default())
@@ -104,36 +103,31 @@ async fn insert_l2_blocks(
         .unwrap();
 
     for l1_batch_number in 0..l1_batches_count {
+        let l1_batch_number = L1BatchNumber(l1_batch_number);
         for l2_block_index in 0..l2_blocks_per_batch {
-            let l2_block_number =
-                L2BlockNumber((l1_batch_number * l2_blocks_per_batch + l2_block_index) as u32);
-            let l2_block_header = L2BlockHeader {
-                number: l2_block_number,
-                timestamp: 0,
-                hash: H256::from_low_u64_be(u64::from(l2_block_number.0)),
-                l1_tx_count: 0,
-                l2_tx_count: 0,
-                fee_account_address: Address::repeat_byte(1),
-                base_fee_per_gas: 0,
-                gas_per_pubdata_limit: 0,
-                batch_fee_input: Default::default(),
-                base_system_contracts_hashes: Default::default(),
-                protocol_version: Some(Default::default()),
-                virtual_blocks: 0,
-                gas_limit: 0,
-                logs_bloom: Default::default(),
-                pubdata_params: Default::default(),
-            };
+            let l2_block_number = l1_batch_number.0 * l2_blocks_per_batch + l2_block_index;
+            let l2_block_header = create_l2_block(l2_block_number);
 
             conn.blocks_dal()
                 .insert_l2_block(&l2_block_header)
                 .await
                 .unwrap();
             conn.blocks_dal()
-                .mark_l2_blocks_as_executed_in_l1_batch(L1BatchNumber(l1_batch_number as u32))
+                .mark_l2_blocks_as_executed_in_l1_batch(l1_batch_number)
                 .await
                 .unwrap();
         }
+
+        let l1_batch_header = create_l1_batch(l1_batch_number.0);
+        conn.blocks_dal()
+            .insert_mock_l1_batch(&l1_batch_header)
+            .await
+            .unwrap();
+        let root_hash = H256::from_low_u64_be(l1_batch_number.0.into());
+        conn.blocks_dal()
+            .set_l1_batch_hash(l1_batch_number, root_hash)
+            .await
+            .unwrap();
     }
 }
 
@@ -144,7 +138,7 @@ async fn hard_pruning_ignores_conditions_checks() {
 
     insert_l2_blocks(&mut conn, 10, 2).await;
     conn.pruning_dal()
-        .soft_prune_batches_range(L1BatchNumber(2), L2BlockNumber(5))
+        .insert_soft_pruning_log(L1BatchNumber(2), L2BlockNumber(5))
         .await
         .unwrap();
 
@@ -183,7 +177,7 @@ fn test_pruning_info(l1_batch: u32, l2_block: u32) -> PruningInfo {
         last_hard_pruned: Some(HardPruningInfo {
             l1_batch: L1BatchNumber(l1_batch),
             l2_block: L2BlockNumber(l2_block),
-            l1_batch_root_hash: None, // FIXME
+            l1_batch_root_hash: Some(H256::from_low_u64_be(l1_batch.into())),
         }),
     }
 }
@@ -194,7 +188,7 @@ async fn pruner_catches_up_with_hard_pruning_up_to_soft_pruning_boundary_ignorin
     let mut conn = pool.connection().await.unwrap();
     insert_l2_blocks(&mut conn, 10, 2).await;
     conn.pruning_dal()
-        .soft_prune_batches_range(L1BatchNumber(2), L2BlockNumber(5))
+        .insert_soft_pruning_log(L1BatchNumber(2), L2BlockNumber(5))
         .await
         .unwrap();
 
diff --git a/core/node/metadata_calculator/src/pruning.rs b/core/node/metadata_calculator/src/pruning.rs
index 1c6d6aa4e795..77e4c30dc1d1 100644
--- a/core/node/metadata_calculator/src/pruning.rs
+++ b/core/node/metadata_calculator/src/pruning.rs
@@ -148,7 +148,7 @@ mod tests {
     use test_casing::test_casing;
     use zksync_node_genesis::{insert_genesis_batch, GenesisParams};
     use zksync_node_test_utils::prepare_recovery_snapshot;
-    use zksync_types::{L1BatchNumber, L2BlockNumber};
+    use zksync_types::{L1BatchNumber, L2BlockNumber, H256};
 
     use super::*;
     use crate::{
@@ -194,6 +194,11 @@ mod tests {
             .hard_prune_batches_range(L1BatchNumber(3), L2BlockNumber(3))
             .await
             .unwrap();
+        storage
+            .pruning_dal()
+            .insert_hard_pruning_log(L1BatchNumber(3), L2BlockNumber(3), H256::zero())
+            .await
+            .unwrap();
 
         while reader.clone().info().await.min_l1_batch_number.unwrap() <= L1BatchNumber(3) {
             tokio::time::sleep(POLL_INTERVAL).await;
@@ -322,9 +327,10 @@ mod tests {
         // Prune first 3 created batches in Postgres.
         storage
             .pruning_dal()
-            .hard_prune_batches_range(
+            .insert_hard_pruning_log(
                 snapshot_recovery.l1_batch_number + 3,
                 snapshot_recovery.l2_block_number + 3,
+                H256::zero(), // not used
             )
             .await
             .unwrap();
diff --git a/core/node/metadata_calculator/src/recovery/tests.rs b/core/node/metadata_calculator/src/recovery/tests.rs
index 4b2ba578a5b6..e2d281e3da9b 100644
--- a/core/node/metadata_calculator/src/recovery/tests.rs
+++ b/core/node/metadata_calculator/src/recovery/tests.rs
@@ -1,6 +1,6 @@
 //! Tests for metadata calculator snapshot recovery.
 
-use std::{collections::HashMap, path::Path, sync::Mutex};
+use std::{path::Path, sync::Mutex};
 
 use assert_matches::assert_matches;
 use tempfile::TempDir;
@@ -16,7 +16,7 @@ use zksync_merkle_tree::{domain::ZkSyncTree, recovery::PersistenceThreadHandle,
 use zksync_node_genesis::{insert_genesis_batch, GenesisParams};
 use zksync_node_test_utils::prepare_recovery_snapshot;
 use zksync_storage::RocksDB;
-use zksync_types::{L1BatchNumber, U256};
+use zksync_types::L1BatchNumber;
 
 use super::*;
 use crate::{
@@ -116,9 +116,16 @@ async fn prune_storage(pool: &ConnectionPool<Core>, pruned_l1_batch: L1BatchNumb
         .await
         .unwrap()
         .expect("L1 batch not present in Postgres");
+    let root_hash = storage
+        .blocks_dal()
+        .get_l1_batch_state_root(dbg!(pruned_l1_batch))
+        .await
+        .unwrap()
+        .expect("L1 batch does not have root hash");
+
     storage
         .pruning_dal()
-        .soft_prune_batches_range(pruned_l1_batch, pruned_l2_block)
+        .insert_soft_pruning_log(pruned_l1_batch, pruned_l2_block)
         .await
         .unwrap();
     let pruning_stats = storage
@@ -130,6 +137,11 @@ async fn prune_storage(pool: &ConnectionPool<Core>, pruned_l1_batch: L1BatchNumb
         pruning_stats.deleted_l1_batches > 0 && pruning_stats.deleted_l2_blocks > 0,
         "{pruning_stats:?}"
     );
+    storage
+        .pruning_dal()
+        .insert_hard_pruning_log(pruned_l1_batch, pruned_l2_block, root_hash)
+        .await
+        .unwrap();
 }
 
 #[tokio::test]
@@ -425,8 +437,7 @@ async fn entire_recovery_workflow(case: RecoveryWorkflowCase) {
     calculator_task.await.expect("calculator panicked").unwrap();
 }
 
-/// `pruned_batches == 0` is a sanity check.
-#[test_casing(4, [0, 1, 2, 4])]
+#[test_casing(3, [1, 2, 4])]
 #[tokio::test]
 async fn recovery_with_further_pruning(pruned_batches: u32) {
     const NEW_BATCH_COUNT: usize = 5;
@@ -459,38 +470,17 @@ async fn recovery_with_further_pruning(pruned_batches: u32) {
     .await;
     db_transaction.commit().await.unwrap();
 
-    let all_logs = storage
-        .storage_logs_dal()
-        .dump_all_storage_logs_for_tests()
-        .await;
-    assert_eq!(all_logs.len(), 400);
-    let initial_writes = storage
-        .storage_logs_dedup_dal()
-        .dump_all_initial_writes_for_tests()
-        .await;
-    let initial_writes: HashMap<_, _> = initial_writes
-        .into_iter()
-        .map(|write| (write.hashed_key, write.index))
-        .collect();
-    drop(storage);
+    // Run the first tree instance to compute root hashes for all batches.
+    let temp_dir = TempDir::new().expect("failed get temporary directory for RocksDB");
+    let (calculator, _) =
+        setup_calculator(&temp_dir.path().join("first"), pool.clone(), true).await;
+    let expected_root_hash = run_calculator(calculator).await;
 
-    let instructions: Vec<_> = all_logs
-        .iter()
-        .map(|log| {
-            let leaf_index = initial_writes[&log.hashed_key];
-            let key = U256::from_little_endian(log.hashed_key.as_bytes());
-            TreeInstruction::write(key, leaf_index, log.value)
-        })
-        .collect();
-    let expected_root_hash = ZkSyncTree::process_genesis_batch(&instructions).root_hash;
-
-    if pruned_batches > 0 {
-        prune_storage(&pool, snapshot_recovery.l1_batch_number + pruned_batches).await;
-    }
+    prune_storage(&pool, snapshot_recovery.l1_batch_number + pruned_batches).await;
 
     // Create a new tree instance. It should recover and process the remaining batches.
     let temp_dir = TempDir::new().expect("failed get temporary directory for RocksDB");
-    let (calculator, _) = setup_calculator(temp_dir.path(), pool, true).await;
+    let (calculator, _) = setup_calculator(&temp_dir.path().join("new"), pool, true).await;
     assert_eq!(run_calculator(calculator).await, expected_root_hash);
 }
 
@@ -519,6 +509,11 @@ async fn pruning_during_recovery_is_detected() {
     let logs = gen_storage_logs(200..400, 5);
     extend_db_state(&mut storage, logs).await;
     drop(storage);
+
+    // Set root hashes for all L1 batches in Postgres.
+    let (calculator, _) =
+        setup_calculator(&temp_dir.path().join("first"), pool.clone(), true).await;
+    run_calculator(calculator).await;
     prune_storage(&pool, L1BatchNumber(1)).await;
 
     let tree_path = temp_dir.path().join("recovery");
diff --git a/core/node/metadata_calculator/src/tests.rs b/core/node/metadata_calculator/src/tests.rs
index 6ea02a3013a7..ff939d1ae582 100644
--- a/core/node/metadata_calculator/src/tests.rs
+++ b/core/node/metadata_calculator/src/tests.rs
@@ -402,7 +402,7 @@ async fn error_on_pruned_next_l1_batch(sealed_protective_reads: bool) {
     extend_db_state(&mut storage, new_logs).await;
     storage
         .pruning_dal()
-        .soft_prune_batches_range(L1BatchNumber(5), L2BlockNumber(5))
+        .insert_soft_pruning_log(L1BatchNumber(5), L2BlockNumber(5))
         .await
         .unwrap();
     storage
@@ -410,6 +410,11 @@ async fn error_on_pruned_next_l1_batch(sealed_protective_reads: bool) {
         .hard_prune_batches_range(L1BatchNumber(5), L2BlockNumber(5))
         .await
         .unwrap();
+    storage
+        .pruning_dal()
+        .insert_hard_pruning_log(L1BatchNumber(5), L2BlockNumber(5), H256::zero())
+        .await
+        .unwrap();
     // Sanity check: there should be no pruned batch headers.
     let next_l1_batch_header = storage
         .blocks_dal()
diff --git a/core/node/test_utils/src/lib.rs b/core/node/test_utils/src/lib.rs
index 9a02c18cd235..ac900e72bb6b 100644
--- a/core/node/test_utils/src/lib.rs
+++ b/core/node/test_utils/src/lib.rs
@@ -382,16 +382,18 @@ pub async fn recover(
 
     storage
         .pruning_dal()
-        .soft_prune_batches_range(snapshot.l1_batch.number, snapshot.l2_block.number)
+        .insert_soft_pruning_log(snapshot.l1_batch.number, snapshot.l2_block.number)
         .await
         .unwrap();
-
     storage
         .pruning_dal()
-        .hard_prune_batches_range(snapshot.l1_batch.number, snapshot.l2_block.number)
+        .insert_hard_pruning_log(
+            snapshot.l1_batch.number,
+            snapshot.l2_block.number,
+            snapshot_recovery.l1_batch_root_hash,
+        )
         .await
         .unwrap();
-
     storage.commit().await.unwrap();
     snapshot_recovery
 }

From 3ac067b13867a9cf14121e1bf4b9818c294f90dd Mon Sep 17 00:00:00 2001
From: Alex Ostrovski <aov@matterlabs.dev>
Date: Tue, 12 Nov 2024 18:03:15 +0200
Subject: [PATCH 5/5] Test detecting root hash mismatch after pruning

---
 .../metadata_calculator/src/recovery/tests.rs | 64 ++++++++++++++++++-
 1 file changed, 63 insertions(+), 1 deletion(-)

diff --git a/core/node/metadata_calculator/src/recovery/tests.rs b/core/node/metadata_calculator/src/recovery/tests.rs
index e2d281e3da9b..8b5371441162 100644
--- a/core/node/metadata_calculator/src/recovery/tests.rs
+++ b/core/node/metadata_calculator/src/recovery/tests.rs
@@ -28,6 +28,8 @@ use crate::{
     MetadataCalculator, MetadataCalculatorConfig,
 };
 
+impl HandleRecoveryEvent for () {}
+
 #[test]
 fn calculating_chunk_count() {
     let mut snapshot = InitParameters {
@@ -118,7 +120,7 @@ async fn prune_storage(pool: &ConnectionPool<Core>, pruned_l1_batch: L1BatchNumb
         .expect("L1 batch not present in Postgres");
     let root_hash = storage
         .blocks_dal()
-        .get_l1_batch_state_root(dbg!(pruned_l1_batch))
+        .get_l1_batch_state_root(pruned_l1_batch)
         .await
         .unwrap()
         .expect("L1 batch does not have root hash");
@@ -484,6 +486,66 @@ async fn recovery_with_further_pruning(pruned_batches: u32) {
     assert_eq!(run_calculator(calculator).await, expected_root_hash);
 }
 
+#[tokio::test]
+async fn detecting_root_hash_mismatch_after_pruning() {
+    let pool = ConnectionPool::<Core>::test_pool().await;
+    let snapshot_logs = gen_storage_logs(100..300, 1).pop().unwrap();
+    let mut storage = pool.connection().await.unwrap();
+    let mut db_transaction = storage.start_transaction().await.unwrap();
+    let snapshot_recovery = prepare_recovery_snapshot(
+        &mut db_transaction,
+        L1BatchNumber(23),
+        L2BlockNumber(42),
+        &snapshot_logs,
+    )
+    .await;
+
+    let logs = gen_storage_logs(200..400, 5);
+    extend_db_state_from_l1_batch(
+        &mut db_transaction,
+        snapshot_recovery.l1_batch_number + 1,
+        snapshot_recovery.l2_block_number + 1,
+        logs,
+    )
+    .await;
+    // Intentionally add an incorrect root has of the batch to be pruned.
+    db_transaction
+        .blocks_dal()
+        .set_l1_batch_hash(snapshot_recovery.l1_batch_number + 1, H256::repeat_byte(42))
+        .await
+        .unwrap();
+    db_transaction.commit().await.unwrap();
+
+    prune_storage(&pool, snapshot_recovery.l1_batch_number + 1).await;
+
+    let temp_dir = TempDir::new().expect("failed get temporary directory for RocksDB");
+    let config = MetadataCalculatorRecoveryConfig::default();
+    let (tree, _) = create_tree_recovery(temp_dir.path(), L1BatchNumber(1), &config).await;
+    let (_stop_sender, stop_receiver) = watch::channel(false);
+    let recovery_options = RecoveryOptions {
+        chunk_count: 5,
+        concurrency_limit: 1,
+        events: Box::new(()),
+    };
+    let init_params = InitParameters::new(&pool, &config)
+        .await
+        .unwrap()
+        .expect("no init params");
+    assert_eq!(init_params.expected_root_hash, Some(H256::repeat_byte(42)));
+
+    let err = tree
+        .recover(init_params, recovery_options, &pool, &stop_receiver)
+        .await
+        .unwrap_err();
+    let err = format!("{err:#}").to_lowercase();
+    assert!(err.contains("root hash"), "{err}");
+
+    // Because of an abrupt error, terminating a RocksDB instance needs to be handled explicitly.
+    tokio::task::spawn_blocking(RocksDB::await_rocksdb_termination)
+        .await
+        .unwrap();
+}
+
 #[derive(Debug)]
 struct PruningEventListener {
     pool: ConnectionPool<Core>,