From a7dc0ed5007f6b2f789f4c61cb3d137843151860 Mon Sep 17 00:00:00 2001 From: Patrick Date: Tue, 3 Dec 2024 17:37:41 +0100 Subject: [PATCH] fix(tee): fix race condition in batch locking (#3342) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What ❔ After [scaling][1] [`zksync-tee-prover`][2] to two instances/replicas on Azure for `azure-stage2`, `azure-testnet2`, and `azure-mainnet2`, we started experiencing [duplicated proving for some batches][3]. ![logs](https://github.com/user-attachments/assets/39170805-d363-47bf-b275-468694593669) While this is not an erroneous situation, it is wasteful from a resource perspective. This was due to a race condition in batch locking. This PR fixes the issue by adding atomic batch locking. [1]: https://github.com/matter-labs/gitops-kubernetes/pull/7033/files [2]: https://github.com/matter-labs/zksync-era/blob/aaca32b6ab411d5cdc1234c20af8b5c1092195d7/core/bin/zksync_tee_prover/src/main.rs [3]: https://grafana.matterlabs.dev/goto/M1I_Bq7HR?orgId=1 ## Why ❔ To fix the bug that only activates after running `zksync-tee-prover` on multiple instances. ## Checklist - [x] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [ ] Tests for the changes have been added / updated. - [ ] Documentation comments have been added / updated. - [x] Code has been formatted via `zkstack dev fmt` and `zkstack dev lint`. --- ...82b0fa233913582fe9091cc1e8954dfd0eb1b.json | 30 ++++++ ...9f73e353da3bc6af7ecb81102c4194df631aa.json | 26 +++++ ...8203a62629904bc4956249e690a8ad7a48983.json | 32 ------ core/lib/dal/src/tee_proof_generation_dal.rs | 102 +++++++++++------- 4 files changed, 120 insertions(+), 70 deletions(-) create mode 100644 core/lib/dal/.sqlx/query-4777de5d3f313f1eb8c3b6a4c1782b0fa233913582fe9091cc1e8954dfd0eb1b.json create mode 100644 core/lib/dal/.sqlx/query-8ead57cdda5909348f31f8c4d989f73e353da3bc6af7ecb81102c4194df631aa.json delete mode 100644 core/lib/dal/.sqlx/query-e46c99b23db91800b27c717100f8203a62629904bc4956249e690a8ad7a48983.json diff --git a/core/lib/dal/.sqlx/query-4777de5d3f313f1eb8c3b6a4c1782b0fa233913582fe9091cc1e8954dfd0eb1b.json b/core/lib/dal/.sqlx/query-4777de5d3f313f1eb8c3b6a4c1782b0fa233913582fe9091cc1e8954dfd0eb1b.json new file mode 100644 index 000000000000..37adf92582e7 --- /dev/null +++ b/core/lib/dal/.sqlx/query-4777de5d3f313f1eb8c3b6a4c1782b0fa233913582fe9091cc1e8954dfd0eb1b.json @@ -0,0 +1,30 @@ +{ + "db_name": "PostgreSQL", + "query": "\n INSERT INTO\n tee_proof_generation_details (\n l1_batch_number, tee_type, status, created_at, updated_at, prover_taken_at\n )\n VALUES\n (\n $1,\n $2,\n $3,\n NOW(),\n NOW(),\n NOW()\n )\n ON CONFLICT (l1_batch_number, tee_type) DO\n UPDATE\n SET\n status = $3,\n updated_at = NOW(),\n prover_taken_at = NOW()\n RETURNING\n l1_batch_number,\n created_at\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "l1_batch_number", + "type_info": "Int8" + }, + { + "ordinal": 1, + "name": "created_at", + "type_info": "Timestamp" + } + ], + "parameters": { + "Left": [ + "Int8", + "Text", + "Text" + ] + }, + "nullable": [ + false, + false + ] + }, + "hash": "4777de5d3f313f1eb8c3b6a4c1782b0fa233913582fe9091cc1e8954dfd0eb1b" +} diff --git a/core/lib/dal/.sqlx/query-8ead57cdda5909348f31f8c4d989f73e353da3bc6af7ecb81102c4194df631aa.json b/core/lib/dal/.sqlx/query-8ead57cdda5909348f31f8c4d989f73e353da3bc6af7ecb81102c4194df631aa.json new file mode 100644 index 000000000000..6266f93e6545 --- /dev/null +++ b/core/lib/dal/.sqlx/query-8ead57cdda5909348f31f8c4d989f73e353da3bc6af7ecb81102c4194df631aa.json @@ -0,0 +1,26 @@ +{ + "db_name": "PostgreSQL", + "query": "\n SELECT\n p.l1_batch_number\n FROM\n proof_generation_details p\n LEFT JOIN\n tee_proof_generation_details tee\n ON\n p.l1_batch_number = tee.l1_batch_number\n AND tee.tee_type = $1\n WHERE\n (\n p.l1_batch_number >= $5\n AND p.vm_run_data_blob_url IS NOT NULL\n AND p.proof_gen_data_blob_url IS NOT NULL\n )\n AND (\n tee.l1_batch_number IS NULL\n OR (\n (tee.status = $2 OR tee.status = $3)\n AND tee.prover_taken_at < NOW() - $4::INTERVAL\n )\n )\n LIMIT 1\n FOR UPDATE OF p\n SKIP LOCKED\n ", + "describe": { + "columns": [ + { + "ordinal": 0, + "name": "l1_batch_number", + "type_info": "Int8" + } + ], + "parameters": { + "Left": [ + "Text", + "Text", + "Text", + "Interval", + "Int8" + ] + }, + "nullable": [ + false + ] + }, + "hash": "8ead57cdda5909348f31f8c4d989f73e353da3bc6af7ecb81102c4194df631aa" +} diff --git a/core/lib/dal/.sqlx/query-e46c99b23db91800b27c717100f8203a62629904bc4956249e690a8ad7a48983.json b/core/lib/dal/.sqlx/query-e46c99b23db91800b27c717100f8203a62629904bc4956249e690a8ad7a48983.json deleted file mode 100644 index 7ca2c9e7e9fa..000000000000 --- a/core/lib/dal/.sqlx/query-e46c99b23db91800b27c717100f8203a62629904bc4956249e690a8ad7a48983.json +++ /dev/null @@ -1,32 +0,0 @@ -{ - "db_name": "PostgreSQL", - "query": "\n WITH upsert AS (\n SELECT\n p.l1_batch_number\n FROM\n proof_generation_details p\n LEFT JOIN\n tee_proof_generation_details tee\n ON\n p.l1_batch_number = tee.l1_batch_number\n AND tee.tee_type = $1\n WHERE\n (\n p.l1_batch_number >= $5\n AND p.vm_run_data_blob_url IS NOT NULL\n AND p.proof_gen_data_blob_url IS NOT NULL\n )\n AND (\n tee.l1_batch_number IS NULL\n OR (\n (tee.status = $2 OR tee.status = $3)\n AND tee.prover_taken_at < NOW() - $4::INTERVAL\n )\n )\n FETCH FIRST ROW ONLY\n )\n \n INSERT INTO\n tee_proof_generation_details (\n l1_batch_number, tee_type, status, created_at, updated_at, prover_taken_at\n )\n SELECT\n l1_batch_number,\n $1,\n $2,\n NOW(),\n NOW(),\n NOW()\n FROM\n upsert\n ON CONFLICT (l1_batch_number, tee_type) DO\n UPDATE\n SET\n status = $2,\n updated_at = NOW(),\n prover_taken_at = NOW()\n RETURNING\n l1_batch_number,\n created_at\n ", - "describe": { - "columns": [ - { - "ordinal": 0, - "name": "l1_batch_number", - "type_info": "Int8" - }, - { - "ordinal": 1, - "name": "created_at", - "type_info": "Timestamp" - } - ], - "parameters": { - "Left": [ - "Text", - "Text", - "Text", - "Interval", - "Int8" - ] - }, - "nullable": [ - false, - false - ] - }, - "hash": "e46c99b23db91800b27c717100f8203a62629904bc4956249e690a8ad7a48983" -} diff --git a/core/lib/dal/src/tee_proof_generation_dal.rs b/core/lib/dal/src/tee_proof_generation_dal.rs index e6b2df974b26..61a9e23ffea5 100644 --- a/core/lib/dal/src/tee_proof_generation_dal.rs +++ b/core/lib/dal/src/tee_proof_generation_dal.rs @@ -64,72 +64,98 @@ impl TeeProofGenerationDal<'_, '_> { ) -> DalResult> { let processing_timeout = pg_interval_from_duration(processing_timeout); let min_batch_number = i64::from(min_batch_number.0); + let mut transaction = self.storage.start_transaction().await?; + + // Lock rows in the proof_generation_details table to prevent race conditions. The + // tee_proof_generation_details table does not have corresponding entries yet if this is the + // first time the query is invoked for a batch. Locking rows in proof_generation_details + // ensures that two different TEE prover instances will not try to prove the same batch. + let batch_number = sqlx::query!( + r#" + SELECT + p.l1_batch_number + FROM + proof_generation_details p + LEFT JOIN + tee_proof_generation_details tee + ON + p.l1_batch_number = tee.l1_batch_number + AND tee.tee_type = $1 + WHERE + ( + p.l1_batch_number >= $5 + AND p.vm_run_data_blob_url IS NOT NULL + AND p.proof_gen_data_blob_url IS NOT NULL + ) + AND ( + tee.l1_batch_number IS NULL + OR ( + (tee.status = $2 OR tee.status = $3) + AND tee.prover_taken_at < NOW() - $4::INTERVAL + ) + ) + LIMIT 1 + FOR UPDATE OF p + SKIP LOCKED + "#, + tee_type.to_string(), + TeeProofGenerationJobStatus::PickedByProver.to_string(), + TeeProofGenerationJobStatus::Failed.to_string(), + processing_timeout, + min_batch_number + ) + .instrument("lock_batch_for_proving#get_batch_no") + .with_arg("tee_type", &tee_type) + .with_arg("processing_timeout", &processing_timeout) + .with_arg("min_batch_number", &min_batch_number) + .fetch_optional(&mut transaction) + .await?; + + let batch_number = match batch_number { + Some(batch) => batch.l1_batch_number, + None => { + return Ok(None); + } + }; + let locked_batch = sqlx::query_as!( StorageLockedBatch, r#" - WITH upsert AS ( - SELECT - p.l1_batch_number - FROM - proof_generation_details p - LEFT JOIN - tee_proof_generation_details tee - ON - p.l1_batch_number = tee.l1_batch_number - AND tee.tee_type = $1 - WHERE - ( - p.l1_batch_number >= $5 - AND p.vm_run_data_blob_url IS NOT NULL - AND p.proof_gen_data_blob_url IS NOT NULL - ) - AND ( - tee.l1_batch_number IS NULL - OR ( - (tee.status = $2 OR tee.status = $3) - AND tee.prover_taken_at < NOW() - $4::INTERVAL - ) - ) - FETCH FIRST ROW ONLY - ) - INSERT INTO tee_proof_generation_details ( l1_batch_number, tee_type, status, created_at, updated_at, prover_taken_at ) - SELECT - l1_batch_number, + VALUES + ( $1, $2, + $3, NOW(), NOW(), NOW() - FROM - upsert + ) ON CONFLICT (l1_batch_number, tee_type) DO UPDATE SET - status = $2, + status = $3, updated_at = NOW(), prover_taken_at = NOW() RETURNING l1_batch_number, created_at "#, + batch_number, tee_type.to_string(), TeeProofGenerationJobStatus::PickedByProver.to_string(), - TeeProofGenerationJobStatus::Failed.to_string(), - processing_timeout, - min_batch_number ) - .instrument("lock_batch_for_proving") + .instrument("lock_batch_for_proving#insert") + .with_arg("batch_number", &batch_number) .with_arg("tee_type", &tee_type) - .with_arg("processing_timeout", &processing_timeout) - .with_arg("l1_batch_number", &min_batch_number) - .fetch_optional(self.storage) + .fetch_optional(&mut transaction) .await? .map(Into::into); + transaction.commit().await?; Ok(locked_batch) }