From 35e4cae29314fa98ce356a875e08b3e869a31036 Mon Sep 17 00:00:00 2001 From: Artem Fomiuk <88630083+Artemka374@users.noreply.github.com> Date: Wed, 4 Sep 2024 17:42:38 +0300 Subject: [PATCH] feat(prover): add CLI option to run prover with max allocation (#2794) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ## What ❔ Add CLI option for prover to limit max allocation of GPU ## Why ❔ To be able to run compressor and prover on one machine. ## Checklist - [ ] PR title corresponds to the body of PR (we generate changelog entries from PRs). - [ ] Tests for the changes have been added / updated. - [ ] Documentation comments have been added / updated. - [ ] Code has been formatted via `zk fmt` and `zk lint`. --- prover/Cargo.lock | 30 ++++++++++--------- prover/Cargo.toml | 4 +-- .../src/gpu_prover_job_processor.rs | 12 ++++++-- prover/crates/bin/prover_fri/src/main.rs | 6 ++++ 4 files changed, 34 insertions(+), 18 deletions(-) diff --git a/prover/Cargo.lock b/prover/Cargo.lock index 3ac54b477380..f5c8ea5549a6 100644 --- a/prover/Cargo.lock +++ b/prover/Cargo.lock @@ -733,9 +733,9 @@ dependencies = [ [[package]] name = "boojum-cuda" -version = "0.150.4" +version = "0.150.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c861b4baec895cb8e53b10825407f0844b0eafda2ac79e7f02de95439f0f1e74" +checksum = "252c28bc729eb32a053de0cbd1c8c55b2f51d00ca0c656f30bc70d255c2d8753" dependencies = [ "boojum", "cmake", @@ -1862,9 +1862,9 @@ checksum = "5443807d6dff69373d433ab9ef5378ad8df50ca6298caf15de6e52e24aaf54d5" [[package]] name = "era_cudart" -version = "0.150.4" +version = "0.150.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4ac97d833b861e32bc0a71d0542bf5c92094f9818c52d65c695227bfa95ffbe3" +checksum = "803be147b389086e33254a6c9fe26a0d1d21a11f9f73181cad06cf5b1beb7d16" dependencies = [ "bitflags 2.6.0", "era_cudart_sys", @@ -1873,9 +1873,9 @@ dependencies = [ [[package]] name = "era_cudart_sys" -version = "0.150.4" +version = "0.150.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ee6aed60cf09cb6d0b954d74351acb9beb13daab0bacad279691f6b97504b7e6" +checksum = "49f9a3d87f3d45d11bc835e5fc78fe6e3fe243355d435f6b3e794b98df7d3323" dependencies = [ "serde_json", ] @@ -5580,9 +5580,9 @@ checksum = "24188a676b6ae68c3b2cb3a01be17fbf7240ce009799bb56d5b1409051e78fde" [[package]] name = "shivini" -version = "0.150.4" +version = "0.150.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c5e5d862287bb883a4cb0bc4f8ea938ba3fdaa5e495f1a59bc3515231017a0e2" +checksum = "331868b8d92ffec8887c17e786632cf0c9bd4750986fc1400a6d1fbf3739cba4" dependencies = [ "bincode", "blake2 0.10.6", @@ -7558,13 +7558,15 @@ dependencies = [ [[package]] name = "zksync-gpu-ffi" -version = "0.150.4" +version = "0.150.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "82fe099f4f4a2cc8ca8ca591d7619ac00b8054f63b712fa6ceee2b84c6e04c62" +checksum = "ae694dc0ad818e4d45af70b2cf579ff46f1ac938b42ee55543529beb45ba1464" dependencies = [ "bindgen 0.59.2", + "cmake", "crossbeam 0.8.4", "derivative", + "era_cudart_sys", "futures 0.3.30", "futures-locks", "num_cpus", @@ -7572,9 +7574,9 @@ dependencies = [ [[package]] name = "zksync-gpu-prover" -version = "0.150.4" +version = "0.150.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f73d27e0e4589c7445f5a22e511cb5186e2d205172ca4b26acd7a334b3af9492" +checksum = "f8156dbaf36764409cc93424d43dc86c993601d73f5aa9a5938e6552a14dc2df" dependencies = [ "bit-vec", "cfg-if 1.0.0", @@ -7589,9 +7591,9 @@ dependencies = [ [[package]] name = "zksync-wrapper-prover" -version = "0.150.4" +version = "0.150.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1cf4c09adf0a84af0d7ded1fd85a2487fef4cbf1cfc1925412717d0eef03dd5a" +checksum = "83975189451bfacfa97dbcce899fde9db15a0c072196a9b92ddfabbe756bab9d" dependencies = [ "circuit_definitions", "zkevm_test_harness", diff --git a/prover/Cargo.toml b/prover/Cargo.toml index 8d87b727f906..403314cc13ca 100644 --- a/prover/Cargo.toml +++ b/prover/Cargo.toml @@ -61,8 +61,8 @@ circuit_sequencer_api = "=0.150.4" zkevm_test_harness = "=0.150.4" # GPU proving dependencies -wrapper_prover = { package = "zksync-wrapper-prover", version = "=0.150.4" } -shivini = "=0.150.4" +wrapper_prover = { package = "zksync-wrapper-prover", version = "=0.150.6" } +shivini = "=0.150.6" # Core workspace dependencies zksync_multivm = { path = "../core/lib/multivm", version = "0.1.0" } diff --git a/prover/crates/bin/prover_fri/src/gpu_prover_job_processor.rs b/prover/crates/bin/prover_fri/src/gpu_prover_job_processor.rs index 63981fa6c7d6..4b11353eac5c 100644 --- a/prover/crates/bin/prover_fri/src/gpu_prover_job_processor.rs +++ b/prover/crates/bin/prover_fri/src/gpu_prover_job_processor.rs @@ -5,6 +5,7 @@ pub mod gpu_prover { use anyhow::Context as _; use shivini::{ gpu_proof_config::GpuProofConfig, gpu_prove_from_external_witness_data, ProverContext, + ProverContextConfig, }; use tokio::task::JoinHandle; use zksync_config::configs::{fri_prover_group::FriProverGroupConfig, FriProverConfig}; @@ -82,7 +83,15 @@ pub mod gpu_prover { address: SocketAddress, zone: Zone, protocol_version: ProtocolSemanticVersion, + max_allocation: Option, ) -> Self { + let prover_context = match max_allocation { + Some(max_allocation) => ProverContext::create_with_config( + ProverContextConfig::default().with_maximum_device_allocation(max_allocation), + ) + .expect("failed initializing gpu prover context"), + None => ProverContext::create().expect("failed initializing gpu prover context"), + }; Prover { blob_store, public_blob_store, @@ -91,8 +100,7 @@ pub mod gpu_prover { setup_load_mode, circuit_ids_for_round_to_be_proven, witness_vector_queue, - prover_context: ProverContext::create() - .expect("failed initializing gpu prover context"), + prover_context, address, zone, protocol_version, diff --git a/prover/crates/bin/prover_fri/src/main.rs b/prover/crates/bin/prover_fri/src/main.rs index db813394c194..b93eb9c03958 100644 --- a/prover/crates/bin/prover_fri/src/main.rs +++ b/prover/crates/bin/prover_fri/src/main.rs @@ -139,6 +139,7 @@ async fn main() -> anyhow::Result<()> { public_blob_store, pool, circuit_ids_for_round_to_be_proven, + opt.max_allocation, notify, ) .await @@ -178,6 +179,7 @@ async fn get_prover_tasks( public_blob_store: Option>, pool: ConnectionPool, circuit_ids_for_round_to_be_proven: Vec, + _max_allocation: Option, _init_notifier: Arc, ) -> anyhow::Result>>> { use crate::prover_job_processor::{load_setup_data_cache, Prover}; @@ -213,6 +215,7 @@ async fn get_prover_tasks( public_blob_store: Option>, pool: ConnectionPool, circuit_ids_for_round_to_be_proven: Vec, + max_allocation: Option, init_notifier: Arc, ) -> anyhow::Result>>> { use gpu_prover_job_processor::gpu_prover; @@ -245,6 +248,7 @@ async fn get_prover_tasks( address.clone(), zone.clone(), protocol_version, + max_allocation, ); let producer = shared_witness_vector_queue.clone(); @@ -295,4 +299,6 @@ pub(crate) struct Cli { pub(crate) config_path: Option, #[arg(long)] pub(crate) secrets_path: Option, + #[arg(long)] + pub(crate) max_allocation: Option, }