From a03e40709e48c384538fa27d479ccaab7d84786d Mon Sep 17 00:00:00 2001 From: nicholas-mainardi Date: Wed, 13 Nov 2024 17:08:35 +0100 Subject: [PATCH] feat: tabular SELECT Queries Without Aggregation Functions (#373) This PR introduces the support for simple `SELECT` queries without aggregation functions --------- Co-authored-by: Franklin Delehelle Co-authored-by: nikkolasg Co-authored-by: Nicolas Gailly --- Cargo.lock | 1 + devenv.nix | 8 +- groth16-framework/tests/common/context.rs | 16 +- groth16-framework/tests/common/query.rs | 20 +- mp2-common/src/digest.rs | 5 +- mp2-common/src/group_hashing/mod.rs | 11 +- mp2-common/src/u256.rs | 5 +- mp2-v1/src/final_extraction/base_circuit.rs | 1 + mp2-v1/tests/common/cases/mod.rs | 6 +- mp2-v1/tests/common/cases/planner.rs | 4 +- .../{query.rs => query/aggregated_queries.rs} | 327 +---- mp2-v1/tests/common/cases/query/mod.rs | 274 ++++ .../cases/query/simple_select_queries.rs | 542 +++++++ mp2-v1/tests/common/context.rs | 8 +- mp2-v1/tests/common/table.rs | 48 +- mp2-v1/tests/integrated_tests.rs | 38 +- parsil/src/assembler.rs | 112 +- parsil/src/errors.rs | 9 +- parsil/src/executor.rs | 130 +- parsil/src/expand.rs | 55 +- parsil/src/lib.rs | 2 +- parsil/src/symbols.rs | 67 +- parsil/src/tests.rs | 117 +- parsil/src/utils.rs | 128 +- parsil/src/validate.rs | 43 +- verifiable-db/Cargo.toml | 1 + verifiable-db/src/api.rs | 43 +- verifiable-db/src/query/aggregation/mod.rs | 38 +- verifiable-db/src/query/api.rs | 22 +- .../src/query/computational_hash_ids.rs | 48 +- verifiable-db/src/query/merkle_path.rs | 462 ++++++ verifiable-db/src/query/mod.rs | 1 + .../universal_circuit_inputs.rs | 52 +- .../universal_query_circuit.rs | 9 +- .../results_tree/binding/binding_results.rs | 26 +- verifiable-db/src/revelation/api.rs | 505 +++++-- verifiable-db/src/revelation/mod.rs | 51 +- .../src/revelation/placeholders_check.rs | 327 ++++- .../revelation/revelation_unproven_offset.rs | 1296 +++++++++++++++++ .../revelation_without_results_tree.rs | 159 +- verifiable-db/src/row_tree/full_node.rs | 9 +- verifiable-db/src/row_tree/leaf.rs | 5 +- verifiable-db/src/row_tree/mod.rs | 1 - verifiable-db/src/test_utils.rs | 5 +- 44 files changed, 4184 insertions(+), 853 deletions(-) rename mp2-v1/tests/common/cases/{query.rs => query/aggregated_queries.rs} (85%) create mode 100644 mp2-v1/tests/common/cases/query/mod.rs create mode 100644 mp2-v1/tests/common/cases/query/simple_select_queries.rs create mode 100644 verifiable-db/src/query/merkle_path.rs create mode 100644 verifiable-db/src/revelation/revelation_unproven_offset.rs diff --git a/Cargo.lock b/Cargo.lock index a11b5087d..7cd7e7fc8 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -6644,6 +6644,7 @@ dependencies = [ "anyhow", "bincode", "derive_more 0.99.18", + "futures", "itertools 0.12.1", "log", "mp2_common", diff --git a/devenv.nix b/devenv.nix index 40a2ebb84..d090b7930 100644 --- a/devenv.nix +++ b/devenv.nix @@ -55,7 +55,13 @@ in }]; }; - scripts.db.exec = "psql storage -h localhost -p ${builtins.toString config.env.PGPORT}"; + scripts = { + # Open a shell to the DB + db.exec = "psql storage -h localhost -p ${builtins.toString config.env.PGPORT}"; + + # Wipe out the database + reset-db.exec = "rm -rf ${config.env.DEVENV_STATE}/postgres"; + }; # https://devenv.sh/languages/ languages.rust = { diff --git a/groth16-framework/tests/common/context.rs b/groth16-framework/tests/common/context.rs index ac5478f54..ede68e899 100644 --- a/groth16-framework/tests/common/context.rs +++ b/groth16-framework/tests/common/context.rs @@ -8,8 +8,8 @@ use verifiable_db::{ api::WrapCircuitParams, revelation::api::Parameters as RevelationParameters, test_utils::{ - MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_OUTPUTS, MAX_NUM_PLACEHOLDERS, MAX_NUM_PREDICATE_OPS, - MAX_NUM_RESULT_OPS, + INDEX_TREE_MAX_DEPTH, MAX_NUM_COLUMNS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_OUTPUTS, + MAX_NUM_PLACEHOLDERS, MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULT_OPS, ROW_TREE_MAX_DEPTH, }, }; @@ -18,10 +18,14 @@ pub(crate) struct TestContext { pub(crate) preprocessing_circuits: TestingRecursiveCircuits, pub(crate) query_circuits: TestingRecursiveCircuits, pub(crate) revelation_params: RevelationParameters< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - { 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS) }, >, pub(crate) wrap_circuit: WrapCircuitParams, @@ -39,10 +43,14 @@ impl TestContext { // Create the revelation parameters. let revelation_params = RevelationParameters::< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - { 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS) }, >::build( query_circuits.get_recursive_circuit_set(), preprocessing_circuits.get_recursive_circuit_set(), diff --git a/groth16-framework/tests/common/query.rs b/groth16-framework/tests/common/query.rs index 5ae8326a0..5f5f80723 100644 --- a/groth16-framework/tests/common/query.rs +++ b/groth16-framework/tests/common/query.rs @@ -29,19 +29,6 @@ impl TestContext { let max_block_number = 76; let test_data = TestRevelationData::sample(min_block_number, max_block_number); - let placeholder_hash_ids = QueryInput::< - MAX_NUM_COLUMNS, - MAX_NUM_PREDICATE_OPS, - MAX_NUM_RESULT_OPS, - MAX_NUM_ITEMS_PER_OUTPUT, - >::ids_for_placeholder_hash( - test_data.predicate_operations(), - test_data.results(), - test_data.placeholders(), - test_data.query_bounds(), - ) - .unwrap(); - // Generate the query proof. let [query_proof] = self .query_circuits @@ -60,17 +47,18 @@ impl TestContext { let preprocessing_proof = serialize_proof(&preprocessing_proof).unwrap(); // Generate the revelation proof. - let input = CircuitInput::new_revelation_no_results_tree( + let input = CircuitInput::new_revelation_aggregated( query_proof, preprocessing_proof, test_data.query_bounds(), test_data.placeholders(), - placeholder_hash_ids, + test_data.predicate_operations(), + test_data.results(), ) .unwrap(); let revelation_proof = self .revelation_params - .generate_proof(input, self.query_circuits.get_recursive_circuit_set()) + .generate_proof(input, self.query_circuits.get_recursive_circuit_set(), None) .unwrap(); let revelation_proof = ProofWithVK::deserialize(&revelation_proof).unwrap(); let (revelation_proof_with_pi, _) = revelation_proof.clone().into(); diff --git a/mp2-common/src/digest.rs b/mp2-common/src/digest.rs index a876ab92a..9265af657 100644 --- a/mp2-common/src/digest.rs +++ b/mp2-common/src/digest.rs @@ -189,10 +189,7 @@ mod test { }; use crate::utils::TryIntoBool; use mp2_test::circuit::{run_circuit, UserCircuit}; - use plonky2::{ - field::types::Sample, - iop::witness::PartialWitness, - }; + use plonky2::{field::types::Sample, iop::witness::PartialWitness}; use plonky2_ecgfp5::{ curve::curve::Point, gadgets::curve::{CircuitBuilderEcGFp5, PartialWitnessCurve}, diff --git a/mp2-common/src/group_hashing/mod.rs b/mp2-common/src/group_hashing/mod.rs index 57d061952..819eb7c2b 100644 --- a/mp2-common/src/group_hashing/mod.rs +++ b/mp2-common/src/group_hashing/mod.rs @@ -234,11 +234,8 @@ pub fn cond_field_hashed_scalar_mul(cond: bool, mul: Point, base: Point) -> Poin #[cfg(test)] mod test { - use plonky2::{ - field::types::Sample, - iop::witness::PartialWitness, - }; - + use plonky2::{field::types::Sample, iop::witness::PartialWitness}; + use plonky2_ecgfp5::{ curve::curve::{Point, WeierstrassPoint}, gadgets::curve::{CircuitBuilderEcGFp5, CurveTarget, PartialWitnessCurve}, @@ -251,9 +248,7 @@ mod test { }; use mp2_test::circuit::{run_circuit, UserCircuit}; - use super::{ - circuit_hashed_scalar_mul, field_hashed_scalar_mul, weierstrass_to_point, - }; + use super::{circuit_hashed_scalar_mul, field_hashed_scalar_mul, weierstrass_to_point}; #[derive(Clone, Debug)] struct TestScalarMul { diff --git a/mp2-common/src/u256.rs b/mp2-common/src/u256.rs index 5897676e1..ca62f3eb1 100644 --- a/mp2-common/src/u256.rs +++ b/mp2-common/src/u256.rs @@ -48,10 +48,7 @@ pub const NUM_LIMBS: usize = 8; /// the last, the comparison is defined as `l < r` or `l==r`. /// It's corresponding to the `is_less_than_or_equal_to_u256_arr` gadget /// function, and returns two flags: `left < right` and `left == right`. -pub fn is_less_than_or_equal_to_u256_arr( - left: &[U256; L], - right: &[U256; L], -) -> (bool, bool) { +pub fn is_less_than_or_equal_to_u256_arr(left: &[U256], right: &[U256]) -> (bool, bool) { zip_eq(left, right).fold((false, true), |(is_lt, is_eq), (l, r)| { let borrow = if is_lt { U256::from(1) } else { U256::ZERO }; if let Some(l) = l.checked_sub(borrow) { diff --git a/mp2-v1/src/final_extraction/base_circuit.rs b/mp2-v1/src/final_extraction/base_circuit.rs index edc4be00a..d2bc6ff44 100644 --- a/mp2-v1/src/final_extraction/base_circuit.rs +++ b/mp2-v1/src/final_extraction/base_circuit.rs @@ -113,6 +113,7 @@ pub(crate) struct BaseCircuitProofWires { pub(crate) const CONTRACT_SET_NUM_IO: usize = contract_extraction::PublicInputs::::TOTAL_LEN; pub(crate) const VALUE_SET_NUM_IO: usize = values_extraction::PublicInputs::::TOTAL_LEN; +// WARN: clippy is wrong on this one, it is used somewhere else. pub(crate) const BLOCK_SET_NUM_IO: usize = block_extraction::public_inputs::PublicInputs::::TOTAL_LEN; diff --git a/mp2-v1/tests/common/cases/mod.rs b/mp2-v1/tests/common/cases/mod.rs index 4df2cffff..c6445467e 100644 --- a/mp2-v1/tests/common/cases/mod.rs +++ b/mp2-v1/tests/common/cases/mod.rs @@ -2,9 +2,9 @@ use contract::Contract; use mp2_v1::values_extraction::{ - identifier_for_mapping_key_column, identifier_for_mapping_value_column, - identifier_single_var_column, - }; + identifier_for_mapping_key_column, identifier_for_mapping_value_column, + identifier_single_var_column, +}; use table_source::{ContractExtractionArgs, TableSource}; use super::table::Table; diff --git a/mp2-v1/tests/common/cases/planner.rs b/mp2-v1/tests/common/cases/planner.rs index 34cc2e52e..6fccfdf8d 100644 --- a/mp2-v1/tests/common/cases/planner.rs +++ b/mp2-v1/tests/common/cases/planner.rs @@ -11,7 +11,7 @@ use parsil::{assembler::DynamicCircuitPis, ParsilSettings}; use ryhope::{storage::WideLineage, tree::NodeContext, Epoch}; use crate::common::{ - cases::query::prove_non_existence_row, + cases::query::aggregated_queries::prove_non_existence_row, index_tree::MerkleIndexTree, proof_storage::{PlaceholderValues, ProofKey, ProofStorage, QueryID}, rowtree::MerkleRowTree, @@ -19,7 +19,7 @@ use crate::common::{ TestContext, }; -use super::query::{prove_single_row, QueryCooking}; +use super::query::{aggregated_queries::prove_single_row, QueryCooking}; pub(crate) struct QueryPlanner<'a> { pub(crate) query: QueryCooking, diff --git a/mp2-v1/tests/common/cases/query.rs b/mp2-v1/tests/common/cases/query/aggregated_queries.rs similarity index 85% rename from mp2-v1/tests/common/cases/query.rs rename to mp2-v1/tests/common/cases/query/aggregated_queries.rs index f6833df27..3757673f3 100644 --- a/mp2-v1/tests/common/cases/query.rs +++ b/mp2-v1/tests/common/cases/query/aggregated_queries.rs @@ -10,24 +10,21 @@ use std::{ use crate::common::{ cases::{ indexing::BLOCK_COLUMN_NAME, - planner::{IndexInfo, RowInfo}, + planner::{IndexInfo, QueryPlanner, RowInfo, TreeInfo}, + query::{QueryCooking, SqlReturn, SqlType}, table_source::BASE_VALUE, }, - proof_storage::ProofKey, + proof_storage::{ProofKey, ProofStorage}, rowtree::MerkleRowTree, - table::TableColumns, + table::{Table, TableColumns}, TableInfo, }; -use super::{ - super::{context::TestContext, proof_storage::ProofStorage, table::Table}, - planner::{QueryPlanner, TreeInfo}, -}; +use crate::context::TestContext; use alloy::primitives::U256; -use anyhow::{bail, Context, Result}; +use anyhow::{bail, Result}; use futures::{stream, FutureExt, StreamExt}; -use super::TableSource; use itertools::Itertools; use log::*; use mp2_common::{ @@ -52,8 +49,7 @@ use parsil::{ assembler::{DynamicCircuitPis, StaticCircuitPis}, parse_and_validate, queries::{core_keys_for_index_tree, core_keys_for_row_tree}, - ParsilSettings, PlaceholderSettings, DEFAULT_MAX_BLOCK_PLACEHOLDER, - DEFAULT_MIN_BLOCK_PLACEHOLDER, + ParsilSettings, DEFAULT_MAX_BLOCK_PLACEHOLDER, DEFAULT_MIN_BLOCK_PLACEHOLDER, }; use ryhope::{ storage::{ @@ -75,162 +71,38 @@ use verifiable_db::{ }, }, revelation::PublicInputs, + row_tree, }; -pub const MAX_NUM_RESULT_OPS: usize = 20; -pub const MAX_NUM_RESULTS: usize = 10; -pub const MAX_NUM_OUTPUTS: usize = 3; -pub const MAX_NUM_ITEMS_PER_OUTPUT: usize = 5; -pub const MAX_NUM_PLACEHOLDERS: usize = 10; -pub const MAX_NUM_COLUMNS: usize = 20; -pub const MAX_NUM_PREDICATE_OPS: usize = 20; - -pub type GlobalCircuitInput = verifiable_db::api::QueryCircuitInput< - MAX_NUM_COLUMNS, - MAX_NUM_PREDICATE_OPS, - MAX_NUM_RESULT_OPS, - MAX_NUM_OUTPUTS, - MAX_NUM_ITEMS_PER_OUTPUT, - MAX_NUM_PLACEHOLDERS, ->; - -pub type QueryCircuitInput = verifiable_db::query::api::CircuitInput< - MAX_NUM_COLUMNS, - MAX_NUM_PREDICATE_OPS, - MAX_NUM_RESULT_OPS, - MAX_NUM_ITEMS_PER_OUTPUT, ->; - -pub type RevelationCircuitInput = verifiable_db::revelation::api::CircuitInput< - MAX_NUM_OUTPUTS, - MAX_NUM_ITEMS_PER_OUTPUT, - MAX_NUM_PLACEHOLDERS, - { QueryCircuitInput::num_placeholders_ids() }, ->; +use super::{ + GlobalCircuitInput, QueryCircuitInput, RevelationCircuitInput, INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_OUTPUTS, MAX_NUM_PLACEHOLDERS, + MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULT_OPS, ROW_TREE_MAX_DEPTH, +}; pub type RevelationPublicInputs<'a> = PublicInputs<'a, F, MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS>; -pub async fn test_query(ctx: &mut TestContext, table: Table, t: TableInfo) -> Result<()> { - match &t.source { - TableSource::Mapping(_) | TableSource::Merge(_) => query_mapping(ctx, &table, t).await?, - _ => unimplemented!("yet"), - } - Ok(()) -} - -async fn query_mapping(ctx: &mut TestContext, table: &Table, info: TableInfo) -> Result<()> { - let table_hash = info.metadata_hash(); - let query_info = cook_query_between_blocks(table, &info).await?; - test_query_mapping(ctx, table, query_info, &table_hash).await?; - - let query_info = cook_query_unique_secondary_index(table, &info).await?; - test_query_mapping(ctx, table, query_info, &table_hash).await?; - //// cook query with custom placeholders - let query_info = cook_query_secondary_index_placeholder(table, &info).await?; - test_query_mapping(ctx, table, query_info, &table_hash).await?; - let query_info = cook_query_secondary_index_nonexisting_placeholder(table, &info).await?; - test_query_mapping(ctx, table, query_info, &table_hash).await?; - - // cook query filtering over a secondary index value not valid in all the blocks - let query_info = cook_query_non_matching_entries_some_blocks(table, &info).await?; - test_query_mapping(ctx, table, query_info, &table_hash).await?; - // cook query with no valid blocks - let query_info = cook_query_no_matching_entries(table, &info).await?; - test_query_mapping(ctx, table, query_info, &table_hash).await?; - // cook query with block query range partially overlapping with blocks in the DB - let query_info = cook_query_partial_block_range(table, &info).await?; - test_query_mapping(ctx, table, query_info, &table_hash).await?; - Ok(()) -} - -/// Run a test query on the mapping table such as created during the indexing phase -async fn test_query_mapping( - ctx: &mut TestContext, - table: &Table, - query_info: QueryCooking, - table_hash: &MetadataHash, -) -> Result<()> { - let settings = ParsilSettings { - context: table, - placeholders: PlaceholderSettings::with_freestanding(MAX_NUM_PLACEHOLDERS - 2), - }; - - info!("QUERY on the testcase: {}", query_info.query); - let mut parsed = parse_and_validate(&query_info.query, &settings)?; - println!("QUERY table columns -> {:?}", table.columns.to_zkcolumns()); - info!( - "BOUNDS found on query: min {}, max {} - table.genesis_block {}", - query_info.min_block, query_info.max_block, table.genesis_block - ); - - // the query to use to actually get the outputs expected - let mut exec_query = parsil::executor::generate_query_execution(&mut parsed, &settings)?; - let query_params = exec_query.convert_placeholders(&query_info.placeholders); - let res = execute_row_query( - &table.db_pool, - &exec_query - .normalize_placeholder_names() - .to_pgsql_string_with_placeholder(), - &query_params, - ) - .await?; - let res = if is_empty_result(&res, SqlType::Numeric) { - vec![] // empty results, but Postgres still return 1 row - } else { - res - }; - info!( - "Found {} results from query {}", - res.len(), - exec_query.query.to_display() - ); - print_vec_sql_rows(&res, SqlType::Numeric); - - let pis = parsil::assembler::assemble_dynamic(&parsed, &settings, &query_info.placeholders) - .context("while assembling PIs")?; - - let big_row_cache = table - .row - .wide_lineage_between( - table.row.current_epoch(), - &core_keys_for_row_tree( - &query_info.query, - &settings, - &pis.bounds, - &query_info.placeholders, - )?, - (query_info.min_block as Epoch, query_info.max_block as Epoch), - ) - .await?; - - prove_query( - ctx, - table, - query_info, - parsed, - &settings, - &big_row_cache, - res, - table_hash.clone(), - ) - .await - .expect("unable to run universal query proof"); - Ok(()) -} - /// Execute a query to know all the touched rows, and then call the universal circuit on all rows #[warn(clippy::too_many_arguments)] -async fn prove_query( +pub(crate) async fn prove_query( ctx: &mut TestContext, table: &Table, query: QueryCooking, mut parsed: Query, settings: &ParsilSettings<&Table>, - row_cache: &WideLineage>, res: Vec, metadata: MetadataHash, + pis: DynamicCircuitPis, ) -> Result<()> { + let row_cache = table + .row + .wide_lineage_between( + table.row.current_epoch(), + &core_keys_for_row_tree(&query.query, &settings, &pis.bounds, &query.placeholders)?, + (query.min_block as Epoch, query.max_block as Epoch), + ) + .await?; // the query to use to fetch all the rows keys involved in the result tree. let pis = parsil::assembler::assemble_dynamic(&parsed, settings, &query.placeholders)?; let row_keys_per_epoch = row_cache.keys_by_epochs(); @@ -340,10 +212,10 @@ async fn prove_query( info!("Query proofs done! Generating revelation proof..."); let proof = prove_revelation(ctx, table, &query, &pis, table.index.current_epoch()).await?; info!("Revelation proof done! Checking public inputs..."); + // get `StaticPublicInputs`, i.e., the data about the query available only at query registration time, // to check the public inputs - let pis = parsil::assembler::assemble_static(&parsed, settings)?; - + let pis = parsil::assembler::assemble_static(&parsed, &settings)?; // get number of matching rows let mut exec_query = parsil::executor::generate_query_keys(&mut parsed, &settings)?; let query_params = exec_query.convert_placeholders(&query.placeholders); @@ -394,18 +266,13 @@ async fn prove_revelation( let pk = ProofKey::IVC(tree_epoch as BlockPrimaryIndex); ctx.storage.get_proof_exact(&pk)? }; - let pis_hash = QueryCircuitInput::ids_for_placeholder_hash( - &pis.predication_operations, - &pis.result, - &query.placeholders, - &pis.bounds, - )?; - let input = RevelationCircuitInput::new_revelation_no_results_tree( + let input = RevelationCircuitInput::new_revelation_aggregated( query_proof, indexing_proof, &pis.bounds, &query.placeholders, - pis_hash, + &pis.predication_operations, + &pis.result, )?; let proof = ctx.run_query_proof( "querying::revelation", @@ -415,7 +282,7 @@ async fn prove_revelation( } #[warn(clippy::too_many_arguments)] -fn check_final_outputs( +pub(crate) fn check_final_outputs( revelation_proof: Vec, ctx: &TestContext, table: &Table, @@ -450,16 +317,7 @@ fn check_final_outputs( "metadata hash computed by circuit and offcircuit is not the same" ); - let column_ids = ColumnIDs::new( - table.columns.primary.identifier, - table.columns.secondary.identifier, - table - .columns - .non_indexed_columns() - .into_iter() - .map(|column| column.identifier) - .collect_vec(), - ); + let column_ids = ColumnIDs::from(&table.columns); let expected_computational_hash = Identifiers::computational_hash( &column_ids, &pis.predication_operations, @@ -502,19 +360,27 @@ fn check_final_outputs( res.len() as u64, revelation_pis.num_results().to_canonical_u64(), ); - // check results + // check results: we check that each result in res appears in set + // of results exposed by the proof, and vice versa: + // - first, we accumulate each result in `res` to a `HashMap`, + // and we do the same for the set of results exposed by the proof + // - then, we check that the 2 `HashMap`s are the same + let mut expected_res_accumulator: HashMap, usize> = HashMap::new(); + let mut proof_res_accumulator: HashMap, usize> = HashMap::new(); res.into_iter() .zip(revelation_pis.result_values()) - .for_each(|(expected_res, res)| { - (0..expected_res.len()).for_each(|i| { - let SqlReturn::Numeric(expected_res) = - SqlType::Numeric.extract(&expected_res, i).unwrap(); - assert_eq!( - U256::from_str_radix(&expected_res.to_string(), 10).unwrap(), - res[i], - ); - }) + .for_each(|(row, res)| { + let (expected_res, proof_res): (Vec<_>, Vec<_>) = (0..row.len()) + .map(|i| { + let SqlReturn::Numeric(expected_res) = + SqlType::Numeric.extract(&row, i).unwrap(); + (expected_res, res[i]) + }) + .unzip(); + *expected_res_accumulator.entry(expected_res).or_default() += 1; + *proof_res_accumulator.entry(proof_res).or_default() += 1; }); + assert_eq!(expected_res_accumulator, proof_res_accumulator,); Ok(()) } @@ -763,7 +629,7 @@ where // TODO: make it recursive with async - tentative in `fetch_child_info` but it doesn't work, // recursion with async is weird. -async fn get_node_info>( +pub(crate) async fn get_node_info>( lookup: &T, k: &K, at: Epoch, @@ -1054,17 +920,12 @@ pub async fn prove_single_row Result { +pub(crate) async fn cook_query_between_blocks( + table: &Table, + info: &TableInfo, +) -> Result { let max = table.row.current_epoch(); let min = max - 1; @@ -1083,10 +944,12 @@ async fn cook_query_between_blocks(table: &Table, info: &TableInfo) -> Result Result { @@ -1126,13 +989,15 @@ async fn cook_query_secondary_index_nonexisting_placeholder( max_block: max_block as BlockPrimaryIndex, query: query_str, placeholders, + limit: None, + offset: None, }) } // cook up a SQL query on the secondary index and with a predicate on the non-indexed column. // we just iterate on mapping keys and take the one that exist for most blocks. We also choose // a value to filter over the non-indexed column -async fn cook_query_secondary_index_placeholder( +pub(crate) async fn cook_query_secondary_index_placeholder( table: &Table, info: &TableInfo, ) -> Result { @@ -1170,12 +1035,14 @@ async fn cook_query_secondary_index_placeholder( max_block: max_block as BlockPrimaryIndex, query: query_str, placeholders, + limit: None, + offset: None, }) } // cook up a SQL query on the secondary index. For that we just iterate on mapping keys and // take the one that exist for most blocks -async fn cook_query_unique_secondary_index( +pub(crate) async fn cook_query_unique_secondary_index( table: &Table, info: &TableInfo, ) -> Result { @@ -1246,10 +1113,15 @@ async fn cook_query_unique_secondary_index( max_block: max_block as BlockPrimaryIndex, query: query_str, placeholders, + limit: None, + offset: None, }) } -async fn cook_query_partial_block_range(table: &Table, info: &TableInfo) -> Result { +pub(crate) async fn cook_query_partial_block_range( + table: &Table, + info: &TableInfo, +) -> Result { let (longest_key, (min_block, max_block)) = find_longest_lived_key(table, false).await?; let key_value = hex::encode(longest_key.value.to_be_bytes_trimmed_vec()); info!( @@ -1277,10 +1149,15 @@ async fn cook_query_partial_block_range(table: &Table, info: &TableInfo) -> Resu max_block: max_block as BlockPrimaryIndex, query: query_str, placeholders, + limit: None, + offset: None, }) } -async fn cook_query_no_matching_entries(table: &Table, info: &TableInfo) -> Result { +pub(crate) async fn cook_query_no_matching_entries( + table: &Table, + info: &TableInfo, +) -> Result { let initial_epoch = table.row.initial_epoch(); // choose query bounds outside of the range [initial_epoch, last_epoch] let min_block = 0; @@ -1302,12 +1179,14 @@ async fn cook_query_no_matching_entries(table: &Table, info: &TableInfo) -> Resu placeholders, min_block, max_block: max_block as usize, + limit: None, + offset: None, }) } /// Cook a query where there are no entries satisying the secondary query bounds only for some /// blocks of the primary index bounds (not for all the blocks) -async fn cook_query_non_matching_entries_some_blocks( +pub(crate) async fn cook_query_non_matching_entries_some_blocks( table: &Table, info: &TableInfo, ) -> Result { @@ -1339,6 +1218,8 @@ async fn cook_query_non_matching_entries_some_blocks( max_block: max_block as BlockPrimaryIndex, query: query_str, placeholders, + limit: None, + offset: None, }) } @@ -1372,7 +1253,7 @@ async fn extract_row_liveness(table: &Table) -> Result Result<(RowTreeKey, BlockRange)> { @@ -1494,51 +1375,3 @@ async fn check_correct_cells_tree( ); Ok(()) } - -pub enum SqlType { - Numeric, -} - -impl SqlType { - pub fn extract(&self, row: &PsqlRow, idx: usize) -> Option { - match self { - SqlType::Numeric => row.get::<_, Option>(idx).map(SqlReturn::Numeric), - } - } -} - -#[derive(Debug, Clone)] -pub enum SqlReturn { - Numeric(U256), -} - -fn is_empty_result(rows: &[PsqlRow], types: SqlType) -> bool { - if rows.is_empty() { - return true; - } - let columns = rows.first().as_ref().unwrap().columns(); - if columns.is_empty() { - return true; - } - for row in rows { - if types.extract(row, 0).is_none() { - return true; - } - } - false -} - -fn print_vec_sql_rows(rows: &[PsqlRow], types: SqlType) { - if rows.is_empty() { - println!("no rows returned"); - return; - } - let columns = rows.first().as_ref().unwrap().columns(); - println!( - "{:?}", - columns.iter().map(|c| c.name().to_string()).join(" | ") - ); - for row in rows { - println!("{:?}", types.extract(row, 0)); - } -} diff --git a/mp2-v1/tests/common/cases/query/mod.rs b/mp2-v1/tests/common/cases/query/mod.rs new file mode 100644 index 000000000..208504d0b --- /dev/null +++ b/mp2-v1/tests/common/cases/query/mod.rs @@ -0,0 +1,274 @@ +use aggregated_queries::{ + cook_query_between_blocks, cook_query_no_matching_entries, + cook_query_non_matching_entries_some_blocks, cook_query_partial_block_range, + cook_query_secondary_index_nonexisting_placeholder, cook_query_secondary_index_placeholder, + cook_query_unique_secondary_index, prove_query as prove_aggregation_query, +}; +use alloy::primitives::U256; +use anyhow::{Context, Result}; +use itertools::Itertools; +use log::info; +use mp2_v1::{ + api::MetadataHash, indexing::block::BlockPrimaryIndex, query::planner::execute_row_query, +}; +use parsil::{parse_and_validate, utils::ParsilSettingsBuilder, PlaceholderSettings}; +use simple_select_queries::{ + cook_query_no_matching_rows, cook_query_too_big_offset, cook_query_with_distinct, + cook_query_with_matching_rows, cook_query_with_max_num_matching_rows, + cook_query_with_wildcard_and_distinct, cook_query_with_wildcard_no_distinct, + prove_query as prove_no_aggregation_query, +}; +use tokio_postgres::Row as PsqlRow; +use verifiable_db::query::{ + computational_hash_ids::Output, universal_circuit::universal_circuit_inputs::Placeholders, +}; + +use crate::common::{cases::planner::QueryPlanner, table::Table, TableInfo, TestContext}; + +use super::table_source::TableSource; + +pub mod aggregated_queries; +pub mod simple_select_queries; + +pub const MAX_NUM_RESULT_OPS: usize = 20; +pub const MAX_NUM_OUTPUTS: usize = 3; +pub const MAX_NUM_ITEMS_PER_OUTPUT: usize = 5; +pub const MAX_NUM_PLACEHOLDERS: usize = 10; +pub const MAX_NUM_COLUMNS: usize = 20; +pub const MAX_NUM_PREDICATE_OPS: usize = 20; +pub const ROW_TREE_MAX_DEPTH: usize = 10; +pub const INDEX_TREE_MAX_DEPTH: usize = 15; + +pub type GlobalCircuitInput = verifiable_db::api::QueryCircuitInput< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_OUTPUTS, + MAX_NUM_ITEMS_PER_OUTPUT, + MAX_NUM_PLACEHOLDERS, +>; + +pub type QueryCircuitInput = verifiable_db::query::api::CircuitInput< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, +>; + +pub type RevelationCircuitInput = verifiable_db::revelation::api::CircuitInput< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_OUTPUTS, + MAX_NUM_ITEMS_PER_OUTPUT, + MAX_NUM_PLACEHOLDERS, +>; + +#[derive(Clone, Debug)] +pub struct QueryCooking { + pub(crate) query: String, + pub(crate) placeholders: Placeholders, + pub(crate) min_block: BlockPrimaryIndex, + pub(crate) max_block: BlockPrimaryIndex, + pub(crate) limit: Option, + pub(crate) offset: Option, +} + +pub async fn test_query(ctx: &mut TestContext, table: Table, t: TableInfo) -> Result<()> { + match &t.source { + TableSource::Mapping(_) | TableSource::Merge(_) => query_mapping(ctx, &table, &t).await?, + _ => unimplemented!("yet"), + } + Ok(()) +} + +async fn query_mapping(ctx: &mut TestContext, table: &Table, info: &TableInfo) -> Result<()> { + let table_hash = info.metadata_hash(); + let query_info = cook_query_between_blocks(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + let query_info = cook_query_unique_secondary_index(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + //// cook query with custom placeholders + let query_info = cook_query_secondary_index_placeholder(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + // cook query with a non-existing value for secondary index + let query_info = cook_query_secondary_index_nonexisting_placeholder(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + // cook query filtering over a secondary index value not valid in all the blocks + let query_info = cook_query_non_matching_entries_some_blocks(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + // cook query with no valid blocks + let query_info = cook_query_no_matching_entries(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + // cook query with block query range partially overlapping with blocks in the DB + let query_info = cook_query_partial_block_range(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + // cook simple no aggregation query with matching rows + let query_info = cook_query_with_matching_rows(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + // cook simple no aggregation query with maximum number of matching rows + let query_info = cook_query_with_max_num_matching_rows(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + let query_info = cook_query_no_matching_rows(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + let query_info = cook_query_too_big_offset(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + let query_info = cook_query_with_distinct(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + // test queries with wilcards only if the number of columns of the table + // doesn't make the number of items returned for each row bigger than + // the maximum allowed value (i.e, MAX_NUM_ITEMS_PER_OUTPUT), as + // otherwise query validation on Parsil will fail + let num_output_items_wildcard_queries = info.columns.non_indexed_columns().len() + + 2 // primary and secondary indexed columns + + 1 // there is an additional item besides columns of the tables in SELECT + ; + if num_output_items_wildcard_queries <= MAX_NUM_ITEMS_PER_OUTPUT { + let query_info = cook_query_with_wildcard_no_distinct(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + let query_info = cook_query_with_wildcard_and_distinct(table, info).await?; + test_query_mapping(ctx, table, query_info, &table_hash).await?; + } + Ok(()) +} + +/// Run a test query on the mapping table such as created during the indexing phase +async fn test_query_mapping( + ctx: &mut TestContext, + table: &Table, + query_info: QueryCooking, + table_hash: &MetadataHash, +) -> Result<()> { + let settings = ParsilSettingsBuilder::default() + .context(table) + .placeholders(PlaceholderSettings::with_freestanding( + MAX_NUM_PLACEHOLDERS - 2, + )) + .maybe_limit(query_info.limit) + .maybe_offset(query_info.offset) + .build() + .unwrap(); + + info!("QUERY on the testcase: {}", query_info.query); + let mut parsed = parse_and_validate(&query_info.query, &settings)?; + println!("QUERY table columns -> {:?}", table.columns.to_zkcolumns()); + info!( + "BOUNDS found on query: min {}, max {} - table.genesis_block {}", + query_info.min_block, query_info.max_block, table.genesis_block + ); + + // the query to use to actually get the outputs expected + let mut exec_query = parsil::executor::generate_query_execution(&mut parsed, &settings)?; + let query_params = exec_query.convert_placeholders(&query_info.placeholders); + let res = execute_row_query( + &table.db_pool, + &exec_query + .normalize_placeholder_names() + .to_pgsql_string_with_placeholder(), + &query_params, + ) + .await?; + let res = if is_empty_result(&res, SqlType::Numeric) { + vec![] // empty results, but Postgres still return 1 row + } else { + res + }; + info!( + "Found {} results from query {}", + res.len(), + exec_query.query.to_display() + ); + print_vec_sql_rows(&res, SqlType::Numeric); + + let pis = parsil::assembler::assemble_dynamic(&parsed, &settings, &query_info.placeholders) + .context("while assembling PIs")?; + + let mut planner = QueryPlanner { + query: query_info.clone(), + pis: &pis, + ctx, + settings: &settings, + table, + columns: table.columns.clone(), + }; + + match pis.result.query_variant() { + Output::Aggregation => { + prove_aggregation_query( + ctx, + table, + query_info, + parsed, + &settings, + res, + table_hash.clone(), + pis, + ) + .await + } + Output::NoAggregation => { + prove_no_aggregation_query(parsed, &table_hash, &mut planner, res).await + } + } +} + +pub enum SqlType { + Numeric, +} + +impl SqlType { + pub fn extract(&self, row: &PsqlRow, idx: usize) -> Option { + match self { + SqlType::Numeric => row + .get::<_, Option>(idx) + .map(|num| SqlReturn::Numeric(num)), + } + } +} + +#[derive(Debug, Clone)] +pub enum SqlReturn { + Numeric(U256), +} + +fn is_empty_result(rows: &[PsqlRow], types: SqlType) -> bool { + if rows.len() == 0 { + return true; + } + let columns = rows.first().as_ref().unwrap().columns(); + if columns.len() == 0 { + return true; + } + for row in rows { + if types.extract(row, 0).is_none() { + return true; + } + } + false +} + +fn print_vec_sql_rows(rows: &[PsqlRow], types: SqlType) { + if rows.len() == 0 { + println!("no rows returned"); + return; + } + let columns = rows.first().as_ref().unwrap().columns(); + println!( + "{:?}", + columns.iter().map(|c| c.name().to_string()).join(" | ") + ); + for row in rows { + println!( + "{:?}", + columns + .iter() + .enumerate() + .map(|(i, _)| format!("{:?}", types.extract(row, i))) + .join(" | ") + ); + } +} diff --git a/mp2-v1/tests/common/cases/query/simple_select_queries.rs b/mp2-v1/tests/common/cases/query/simple_select_queries.rs new file mode 100644 index 000000000..a18adfc58 --- /dev/null +++ b/mp2-v1/tests/common/cases/query/simple_select_queries.rs @@ -0,0 +1,542 @@ +use alloy::primitives::U256; +use anyhow::{Error, Result}; +use itertools::Itertools; +use log::info; +use mp2_common::types::HashOutput; +use mp2_v1::{ + api::MetadataHash, + indexing::{block::BlockPrimaryIndex, row::RowTreeKey, LagrangeNode}, + query::planner::execute_row_query, +}; +use parsil::{ + executor::generate_query_execution_with_keys, DEFAULT_MAX_BLOCK_PLACEHOLDER, + DEFAULT_MIN_BLOCK_PLACEHOLDER, +}; +use ryhope::{ + storage::{pgsql::ToFromBytea, RoEpochKvStorage}, + Epoch, NodePayload, +}; +use sqlparser::ast::Query; +use std::{fmt::Debug, hash::Hash}; +use tokio_postgres::Row as PgSqlRow; +use verifiable_db::{ + query::{ + aggregation::{ChildPosition, NodeInfo}, + computational_hash_ids::ColumnIDs, + universal_circuit::universal_circuit_inputs::{PlaceholderId, Placeholders}, + }, + revelation::{api::MatchingRow, RowPath}, + test_utils::MAX_NUM_OUTPUTS, +}; + +use crate::common::{ + cases::{ + indexing::BLOCK_COLUMN_NAME, + planner::{IndexInfo, QueryPlanner, RowInfo, TreeInfo}, + query::{ + aggregated_queries::{ + check_final_outputs, find_longest_lived_key, get_node_info, prove_single_row, + }, + GlobalCircuitInput, RevelationCircuitInput, SqlReturn, SqlType, + }, + }, + proof_storage::{ProofKey, ProofStorage}, + table::Table, + TableInfo, +}; + +use super::QueryCooking; + +pub(crate) async fn prove_query<'a>( + mut parsed: Query, + table_hash: &MetadataHash, + planner: &mut QueryPlanner<'a>, + results: Vec, +) -> Result<()> { + let mut exec_query = generate_query_execution_with_keys(&mut parsed, &planner.settings)?; + let query_params = exec_query.convert_placeholders(&planner.query.placeholders); + let res = execute_row_query( + &planner.table.db_pool, + &exec_query + .normalize_placeholder_names() + .to_pgsql_string_with_placeholder(), + &query_params, + ) + .await?; + let matching_rows = res + .iter() + .map(|row| { + let key = RowTreeKey::from_bytea(row.try_get::<_, &[u8]>(0)?.to_vec()); + let epoch = row.try_get::<_, Epoch>(1)?; + // all the other items are query results + let result = (2..row.len()) + .filter_map(|i| { + SqlType::Numeric.extract(&row, i).map(|res| match res { + SqlReturn::Numeric(uint) => uint, + }) + }) + .collect_vec(); + Ok((key, epoch, result)) + }) + .collect::>>()?; + // compute input for each matching row + let row_tree_info = RowInfo { + satisfiying_rows: matching_rows + .iter() + .map(|(key, _, _)| key) + .cloned() + .collect(), + tree: &planner.table.row, + }; + let index_tree_info = IndexInfo { + bounds: (planner.query.min_block, planner.query.max_block), + tree: &planner.table.index, + }; + let current_epoch = index_tree_info.tree.current_epoch(); + let mut matching_rows_input = vec![]; + for (key, epoch, result) in matching_rows.into_iter() { + let row_proof = prove_single_row( + planner.ctx, + &row_tree_info, + &planner.columns, + epoch as BlockPrimaryIndex, + &key, + &planner.pis, + &planner.query, + ) + .await?; + let (row_node_info, _, _) = get_node_info(&row_tree_info, &key, epoch).await; + let (row_tree_path, row_tree_siblings) = get_path_info(&key, &row_tree_info, epoch).await?; + let index_node_key = epoch as BlockPrimaryIndex; + let (index_node_info, _, _) = + get_node_info(&index_tree_info, &index_node_key, current_epoch).await; + let (index_tree_path, index_tree_siblings) = + get_path_info(&index_node_key, &index_tree_info, current_epoch).await?; + let path = RowPath::new( + row_node_info, + row_tree_path, + row_tree_siblings, + index_node_info, + index_tree_path, + index_tree_siblings, + ); + matching_rows_input.push(MatchingRow::new(row_proof, path, result)); + } + // load the preprocessing proof at the same epoch + let indexing_proof = { + let pk = ProofKey::IVC(current_epoch as BlockPrimaryIndex); + planner.ctx.storage.get_proof_exact(&pk)? + }; + let column_ids = ColumnIDs::from(&planner.table.columns); + let num_matching_rows = matching_rows_input.len(); + let input = RevelationCircuitInput::new_revelation_tabular( + indexing_proof, + matching_rows_input, + &planner.pis.bounds, + &planner.query.placeholders, + &column_ids, + &planner.pis.predication_operations, + &planner.pis.result, + planner.query.limit.unwrap(), + planner.query.offset.unwrap(), + )?; + info!("Generating revelation proof"); + let final_proof = planner.ctx.run_query_proof( + "querying::revelation", + GlobalCircuitInput::Revelation(input), + )?; + // get `StaticPublicInputs`, i.e., the data about the query available only at query registration time, + // to check the public inputs + let pis = parsil::assembler::assemble_static(&parsed, planner.settings)?; + check_final_outputs( + final_proof, + &planner.ctx, + &planner.table, + &planner.query, + &pis, + current_epoch, + num_matching_rows, + results, + table_hash.clone(), + )?; + info!("Revelation done!"); + Ok(()) +} + +async fn get_path_info>( + key: &K, + tree_info: &T, + epoch: Epoch, +) -> Result<(Vec<(NodeInfo, ChildPosition)>, Vec>)> +where + K: Debug + Hash + Clone + Send + Sync + Eq, + V: NodePayload + Send + Sync + LagrangeNode + Clone, +{ + let mut tree_path = vec![]; + let mut siblings = vec![]; + let (mut node_ctx, mut node_payload) = tree_info + .fetch_ctx_and_payload_at(epoch, key) + .await + .ok_or(Error::msg(format!("Node not found for key {:?}", key)))?; + let mut previous_node_hash = node_payload.hash(); + let mut previous_node_key = key.clone(); + while node_ctx.parent.is_some() { + let parent_key = node_ctx.parent.unwrap(); + (node_ctx, node_payload) = tree_info + .fetch_ctx_and_payload_at(epoch, &parent_key) + .await + .ok_or(Error::msg(format!( + "Node not found for key {:?}", + parent_key + )))?; + let child_pos = node_ctx + .iter_children() + .position(|child| child.map(|c| *c == previous_node_key).unwrap_or(false)); + let is_left_child = child_pos.unwrap() == 0; // unwrap is safe + let (left_child_hash, right_child_hash) = if is_left_child { + ( + Some(previous_node_hash), + match node_ctx.right { + Some(k) => { + let (_, payload) = tree_info + .fetch_ctx_and_payload_at(epoch, &k) + .await + .ok_or(Error::msg(format!("Node not found for key {:?}", k)))?; + Some(payload.hash()) + } + None => None, + }, + ) + } else { + ( + match node_ctx.left { + Some(k) => { + let (_, payload) = tree_info + .fetch_ctx_and_payload_at(epoch, &k) + .await + .ok_or(Error::msg(format!("Node not found for key {:?}", k)))?; + Some(payload.hash()) + } + None => None, + }, + Some(previous_node_hash), + ) + }; + let node_info = NodeInfo::new( + &node_payload.embedded_hash(), + left_child_hash.as_ref(), + right_child_hash.as_ref(), + node_payload.value(), + node_payload.min(), + node_payload.max(), + ); + tree_path.push(( + node_info, + if is_left_child { + ChildPosition::Left + } else { + ChildPosition::Right + }, + )); + siblings.push(if is_left_child { + right_child_hash + } else { + left_child_hash + }); + previous_node_hash = node_payload.hash(); + previous_node_key = parent_key; + } + + Ok((tree_path, siblings)) +} + +/// Cook a query where the number of matching rows is the same as the maximum number of +/// outputs allowed +pub(crate) async fn cook_query_with_max_num_matching_rows( + table: &Table, + info: &TableInfo, +) -> Result { + let (longest_key, (min_block, max_block)) = find_longest_lived_key(table, false).await?; + let key_value = hex::encode(longest_key.value.to_be_bytes_trimmed_vec()); + info!( + "Longest sequence is for key {longest_key:?} -> from block {:?} to {:?}, hex -> {}", + min_block, max_block, key_value + ); + // now we can fetch the key that we want + let key_column = table.columns.secondary.name.clone(); + let value_column = &info.value_column; + let table_name = &table.public_name; + + let added_placeholder = U256::from(42); + + let placeholders = Placeholders::from(( + vec![(PlaceholderId::Generic(1), added_placeholder)], + U256::from(min_block), + U256::from(max_block), + )); + + let limit = MAX_NUM_OUTPUTS as u32; + let offset = 0; + + let query_str = format!( + "SELECT {BLOCK_COLUMN_NAME}, {value_column} + $1 + FROM {table_name} + WHERE {BLOCK_COLUMN_NAME} >= {DEFAULT_MIN_BLOCK_PLACEHOLDER} + AND {BLOCK_COLUMN_NAME} <= {DEFAULT_MAX_BLOCK_PLACEHOLDER} + AND {key_column} = '0x{key_value}';" + ); + Ok(QueryCooking { + min_block: min_block as BlockPrimaryIndex, + max_block: max_block as BlockPrimaryIndex, + query: query_str, + placeholders, + limit: Some(limit), + offset: Some(offset), + }) +} + +pub(crate) async fn cook_query_with_matching_rows( + table: &Table, + info: &TableInfo, +) -> Result { + let (longest_key, (min_block, max_block)) = find_longest_lived_key(table, false).await?; + let key_value = hex::encode(longest_key.value.to_be_bytes_trimmed_vec()); + info!( + "Longest sequence is for key {longest_key:?} -> from block {:?} to {:?}, hex -> {}", + min_block, max_block, key_value + ); + // now we can fetch the key that we want + let key_column = table.columns.secondary.name.clone(); + let value_column = &info.value_column; + let table_name = &table.public_name; + + let added_placeholder = U256::from(42); + + let placeholders = Placeholders::from(( + vec![(PlaceholderId::Generic(1), added_placeholder)], + U256::from(min_block), + U256::from(max_block), + )); + + let limit: u32 = (MAX_NUM_OUTPUTS - 2).min(1).try_into().unwrap(); + let offset: u32 = (max_block - min_block + 1 - limit as usize) + .try_into() + .unwrap(); // get the matching rows in the last blocks + + let query_str = format!( + "SELECT {BLOCK_COLUMN_NAME}, {value_column} + $1 + FROM {table_name} + WHERE {BLOCK_COLUMN_NAME} >= {DEFAULT_MIN_BLOCK_PLACEHOLDER} + AND {BLOCK_COLUMN_NAME} <= {DEFAULT_MAX_BLOCK_PLACEHOLDER} + AND {key_column} = '0x{key_value}';" + ); + Ok(QueryCooking { + min_block: min_block as BlockPrimaryIndex, + max_block: max_block as BlockPrimaryIndex, + query: query_str, + placeholders, + limit: Some(limit), + offset: Some(offset), + }) +} + +/// Cook a query where the offset is big enough to have no matching rows +pub(crate) async fn cook_query_too_big_offset( + table: &Table, + info: &TableInfo, +) -> Result { + let (longest_key, (min_block, max_block)) = find_longest_lived_key(table, false).await?; + let key_value = hex::encode(longest_key.value.to_be_bytes_trimmed_vec()); + info!( + "Longest sequence is for key {longest_key:?} -> from block {:?} to {:?}, hex -> {}", + min_block, max_block, key_value + ); + // now we can fetch the key that we want + let key_column = table.columns.secondary.name.clone(); + let value_column = &info.value_column; + let table_name = &table.public_name; + + let added_placeholder = U256::from(42); + + let placeholders = Placeholders::from(( + vec![(PlaceholderId::Generic(1), added_placeholder)], + U256::from(min_block), + U256::from(max_block), + )); + + let limit: u32 = MAX_NUM_OUTPUTS.try_into().unwrap(); + let offset = 100; + + let query_str = format!( + "SELECT {BLOCK_COLUMN_NAME}, {value_column} + $1 + FROM {table_name} + WHERE {BLOCK_COLUMN_NAME} >= {DEFAULT_MIN_BLOCK_PLACEHOLDER} + AND {BLOCK_COLUMN_NAME} <= {DEFAULT_MAX_BLOCK_PLACEHOLDER} + AND {key_column} = '0x{key_value}';" + ); + Ok(QueryCooking { + min_block: min_block as BlockPrimaryIndex, + max_block: max_block as BlockPrimaryIndex, + query: query_str, + placeholders, + limit: Some(limit), + offset: Some(offset), + }) +} + +pub(crate) async fn cook_query_no_matching_rows( + table: &Table, + info: &TableInfo, +) -> Result { + let initial_epoch = table.index.initial_epoch(); + let current_epoch = table.index.current_epoch(); + let min_block = initial_epoch as BlockPrimaryIndex; + let max_block = current_epoch as BlockPrimaryIndex; + + let key_column = table.columns.secondary.name.clone(); + let value_column = &info.value_column; + let table_name = &table.public_name; + + let key_value = U256::from(1234567890); // dummy value + + let added_placeholder = U256::from(42); + + let placeholders = Placeholders::from(( + vec![ + (PlaceholderId::Generic(1), key_value), + (PlaceholderId::Generic(2), added_placeholder), + ], + U256::from(min_block), + U256::from(max_block), + )); + + let limit: u32 = MAX_NUM_OUTPUTS.try_into().unwrap(); + let offset = 0; + + let query_str = format!( + "SELECT {BLOCK_COLUMN_NAME}, {value_column} + $2 + FROM {table_name} + WHERE {BLOCK_COLUMN_NAME} >= {DEFAULT_MIN_BLOCK_PLACEHOLDER} + AND {BLOCK_COLUMN_NAME} <= {DEFAULT_MAX_BLOCK_PLACEHOLDER} + AND {key_column} = $1;" + ); + Ok(QueryCooking { + min_block: min_block as BlockPrimaryIndex, + max_block: max_block as BlockPrimaryIndex, + query: query_str, + placeholders, + limit: Some(limit), + offset: Some(offset), + }) +} + +pub(crate) async fn cook_query_with_distinct( + table: &Table, + info: &TableInfo, +) -> Result { + let (longest_key, (min_block, max_block)) = find_longest_lived_key(table, false).await?; + let key_value = hex::encode(longest_key.value.to_be_bytes_trimmed_vec()); + info!( + "Longest sequence is for key {longest_key:?} -> from block {:?} to {:?}, hex -> {}", + min_block, max_block, key_value + ); + // now we can fetch the key that we want + let key_column = table.columns.secondary.name.clone(); + let value_column = &info.value_column; + let table_name = &table.public_name; + + let added_placeholder = U256::from(42); + + let placeholders = Placeholders::from(( + vec![(PlaceholderId::Generic(1), added_placeholder)], + U256::from(min_block), + U256::from(max_block), + )); + + let limit: u32 = MAX_NUM_OUTPUTS.try_into().unwrap(); + let offset = 0; + + let query_str = format!( + "SELECT DISTINCT {value_column} + $1 + FROM {table_name} + WHERE {BLOCK_COLUMN_NAME} >= {DEFAULT_MIN_BLOCK_PLACEHOLDER} + AND {BLOCK_COLUMN_NAME} <= {DEFAULT_MAX_BLOCK_PLACEHOLDER} + AND {key_column} = '0x{key_value}';" + ); + Ok(QueryCooking { + min_block: min_block as BlockPrimaryIndex, + max_block: max_block as BlockPrimaryIndex, + query: query_str, + placeholders, + limit: Some(limit), + offset: Some(offset), + }) +} + +pub(crate) async fn cook_query_with_wildcard( + table: &Table, + distinct: bool, + info: &TableInfo, +) -> Result { + let (longest_key, (min_block, max_block)) = find_longest_lived_key(table, false).await?; + let key_value = hex::encode(longest_key.value.to_be_bytes_trimmed_vec()); + info!( + "Longest sequence is for key {longest_key:?} -> from block {:?} to {:?}, hex -> {}", + min_block, max_block, key_value + ); + // now we can fetch the key that we want + let key_column = table.columns.secondary.name.clone(); + let value_column = &info.value_column; + let table_name = &table.public_name; + + let added_placeholder = U256::from(42); + + let placeholders = Placeholders::from(( + vec![(PlaceholderId::Generic(1), added_placeholder)], + U256::from(min_block), + U256::from(max_block), + )); + + let limit: u32 = MAX_NUM_OUTPUTS.try_into().unwrap(); + let offset = 0; + + let query_str = if distinct { + format!( + "SELECT DISTINCT *, {value_column} + $1 + FROM {table_name} + WHERE {BLOCK_COLUMN_NAME} >= {DEFAULT_MIN_BLOCK_PLACEHOLDER} + AND {BLOCK_COLUMN_NAME} <= {DEFAULT_MAX_BLOCK_PLACEHOLDER} + AND {key_column} = '0x{key_value}';" + ) + } else { + format!( + "SELECT *, {value_column} + $1 + FROM {table_name} + WHERE {BLOCK_COLUMN_NAME} >= {DEFAULT_MIN_BLOCK_PLACEHOLDER} + AND {BLOCK_COLUMN_NAME} <= {DEFAULT_MAX_BLOCK_PLACEHOLDER} + AND {key_column} = '0x{key_value}';" + ) + }; + Ok(QueryCooking { + min_block: min_block as BlockPrimaryIndex, + max_block: max_block as BlockPrimaryIndex, + query: query_str, + placeholders, + limit: Some(limit), + offset: Some(offset), + }) +} + +pub(crate) async fn cook_query_with_wildcard_no_distinct( + table: &Table, + info: &TableInfo, +) -> Result { + cook_query_with_wildcard(table, false, info).await +} + +pub(crate) async fn cook_query_with_wildcard_and_distinct( + table: &Table, + info: &TableInfo, +) -> Result { + cook_query_with_wildcard(table, true, info).await +} diff --git a/mp2-v1/tests/common/context.rs b/mp2-v1/tests/common/context.rs index 2c8409957..fafe04562 100644 --- a/mp2-v1/tests/common/context.rs +++ b/mp2-v1/tests/common/context.rs @@ -27,8 +27,8 @@ use super::{ cases::{ self, query::{ - MAX_NUM_COLUMNS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_OUTPUTS, MAX_NUM_PLACEHOLDERS, - MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULTS, MAX_NUM_RESULT_OPS, + INDEX_TREE_MAX_DEPTH, MAX_NUM_COLUMNS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_OUTPUTS, + MAX_NUM_PLACEHOLDERS, MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULT_OPS, ROW_TREE_MAX_DEPTH, }, }, proof_storage::ProofKV, @@ -56,6 +56,8 @@ pub(crate) struct TestContext { pub(crate) params: Option, pub(crate) query_params: Option< verifiable_db::api::QueryParameters< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, MAX_NUM_COLUMNS, MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULT_OPS, @@ -126,7 +128,6 @@ impl ParamsType { pub fn build(&self, ctx: &mut TestContext, path: PathBuf) -> Result<()> where [(); MAX_NUM_COLUMNS + MAX_NUM_RESULT_OPS]:, - [(); MAX_NUM_RESULTS - 1]:, { match self { ParamsType::Query => { @@ -157,7 +158,6 @@ impl ParamsType { pub fn build_and_save(&self, path: PathBuf, ctx: &mut TestContext) -> Result<()> where [(); MAX_NUM_COLUMNS + MAX_NUM_RESULT_OPS]:, - [(); MAX_NUM_RESULTS - 1]:, { self.build(ctx, path.clone())?; match self { diff --git a/mp2-v1/tests/common/table.rs b/mp2-v1/tests/common/table.rs index ea7e692c4..5420dd31e 100644 --- a/mp2-v1/tests/common/table.rs +++ b/mp2-v1/tests/common/table.rs @@ -6,7 +6,7 @@ use futures::{ FutureExt, }; use itertools::Itertools; -use log::debug; +use log::{debug, info}; use mp2_v1::indexing::{ block::{BlockPrimaryIndex, BlockTreeKey}, cell::{self, Cell, CellTreeKey, MerkleCell, MerkleCellTree}, @@ -26,8 +26,17 @@ use ryhope::{ }; use serde::{Deserialize, Serialize}; use std::{hash::Hash, iter::once}; +use verifiable_db::query::computational_hash_ids::ColumnIDs; -use super::{index_tree::MerkleIndexTree, rowtree::MerkleRowTree, ColumnIdentifier}; +use super::{ + cases::query::{ + MAX_NUM_COLUMNS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_OUTPUTS, MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + }, + index_tree::MerkleIndexTree, + rowtree::MerkleRowTree, + ColumnIdentifier, +}; pub type TableID = String; @@ -119,6 +128,20 @@ impl TableColumns { } } +impl From<&TableColumns> for ColumnIDs { + fn from(columns: &TableColumns) -> Self { + ColumnIDs::new( + columns.primary.identifier, + columns.secondary.identifier, + columns + .non_indexed_columns() + .into_iter() + .map(|column| column.identifier) + .collect_vec(), + ) + } +} + pub type DBPool = Pool>; async fn new_db_pool(db_url: &str) -> Result { let db_manager = PostgresConnectionManager::new_from_stringlike(db_url, NoTls) @@ -593,7 +616,18 @@ impl ContextProvider for Table { fn fetch_table(&self, table_name: &str) -> Result { <&Self as ContextProvider>::fetch_table(&self, table_name) } + + const MAX_NUM_COLUMNS: usize = <&Self as ContextProvider>::MAX_NUM_COLUMNS; + + const MAX_NUM_PREDICATE_OPS: usize = <&Self as ContextProvider>::MAX_NUM_PREDICATE_OPS; + + const MAX_NUM_RESULT_OPS: usize = <&Self as ContextProvider>::MAX_NUM_RESULT_OPS; + + const MAX_NUM_ITEMS_PER_OUTPUT: usize = <&Self as ContextProvider>::MAX_NUM_ITEMS_PER_OUTPUT; + + const MAX_NUM_OUTPUTS: usize = <&Self as ContextProvider>::MAX_NUM_OUTPUTS; } + impl ContextProvider for &Table { fn fetch_table(&self, table_name: &str) -> Result { ensure!( @@ -604,4 +638,14 @@ impl ContextProvider for &Table { ); self.to_zktable() } + + const MAX_NUM_COLUMNS: usize = MAX_NUM_COLUMNS; + + const MAX_NUM_PREDICATE_OPS: usize = MAX_NUM_PREDICATE_OPS; + + const MAX_NUM_RESULT_OPS: usize = MAX_NUM_RESULT_OPS; + + const MAX_NUM_ITEMS_PER_OUTPUT: usize = MAX_NUM_ITEMS_PER_OUTPUT; + + const MAX_NUM_OUTPUTS: usize = MAX_NUM_OUTPUTS; } diff --git a/mp2-v1/tests/integrated_tests.rs b/mp2-v1/tests/integrated_tests.rs index ead38b76f..3b70c4695 100644 --- a/mp2-v1/tests/integrated_tests.rs +++ b/mp2-v1/tests/integrated_tests.rs @@ -21,7 +21,8 @@ use common::{ indexing::{ChangeType, UpdateType}, query::{ test_query, GlobalCircuitInput, QueryCircuitInput, RevelationCircuitInput, - MAX_NUM_PLACEHOLDERS, + MAX_NUM_COLUMNS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_OUTPUTS, MAX_NUM_PLACEHOLDERS, + MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULT_OPS, }, TableIndexing, }, @@ -36,7 +37,8 @@ use parsil::{ assembler::DynamicCircuitPis, parse_and_validate, symbols::{ContextProvider, ZkTable}, - ParsilSettings, PlaceholderSettings, + utils::ParsilSettingsBuilder, + PlaceholderSettings, }; use test_log::test; use verifiable_db::query::universal_circuit::universal_circuit_inputs::Placeholders; @@ -181,6 +183,16 @@ impl ContextProvider for T { fn fetch_table(&self, _table_name: &str) -> Result { Ok(self.0.clone()) } + + const MAX_NUM_COLUMNS: usize = MAX_NUM_COLUMNS; + + const MAX_NUM_PREDICATE_OPS: usize = MAX_NUM_PREDICATE_OPS; + + const MAX_NUM_RESULT_OPS: usize = MAX_NUM_RESULT_OPS; + + const MAX_NUM_ITEMS_PER_OUTPUT: usize = MAX_NUM_ITEMS_PER_OUTPUT; + + const MAX_NUM_OUTPUTS: usize = MAX_NUM_OUTPUTS; } #[tokio::test] @@ -202,10 +214,13 @@ async fn test_andrus_query() -> Result<()> { let query = "select AVG(field1) from primitive1_rows WHERE block_number >= $MIN_BLOCK and block_number <= $MAX_BLOCK"; let zktable_str = r#"{"user_name":"primitive1","name":"primitive1_rows","columns":[{"name":"block_number","kind":"PrimaryIndex","id":15542555334667826467},{"name":"field1","kind":"SecondaryIndex","id":10143644063834010325},{"name":"field2","kind":"Standard","id":14738928498191419754},{"name":"field3","kind":"Standard","id":2724380514203373020},{"name":"field4","kind":"Standard","id":1084192582840933701}]}"#; let table: ZkTable = serde_json::from_str(zktable_str)?; - let settings = ParsilSettings { - context: T(table), - placeholders: PlaceholderSettings::with_freestanding(MAX_NUM_PLACEHOLDERS - 2), - }; + let settings = ParsilSettingsBuilder::default() + .context(T(table)) + .placeholders(PlaceholderSettings::with_freestanding( + MAX_NUM_PLACEHOLDERS - 2, + )) + .build() + .unwrap(); let parsed = parse_and_validate(query, &settings)?; let computed_pis = parsil::assembler::assemble_dynamic(&parsed, &settings, &ph)?; @@ -219,18 +234,13 @@ async fn test_andrus_query() -> Result<()> { info!("Building querying params"); ctx.build_params(ParamsType::Query).unwrap(); - let pis_hash = QueryCircuitInput::ids_for_placeholder_hash( - &computed_pis.predication_operations, - &computed_pis.result, - &ph, - &computed_pis.bounds, - )?; - let input = RevelationCircuitInput::new_revelation_no_results_tree( + let input = RevelationCircuitInput::new_revelation_aggregated( root_query_proof, ivc_proof, &computed_pis.bounds, &ph, - pis_hash, + &computed_pis.predication_operations, + &computed_pis.result, )?; info!("Generating the revelation proof"); let _proof = ctx.run_query_proof("revelation", GlobalCircuitInput::Revelation(input))?; diff --git a/parsil/src/assembler.rs b/parsil/src/assembler.rs index fd063231a..bb4e22c1d 100644 --- a/parsil/src/assembler.rs +++ b/parsil/src/assembler.rs @@ -150,6 +150,8 @@ pub(crate) struct Assembler<'a, C: ContextProvider> { /// cryptographic column ID. columns: Vec, secondary_index_bounds: Bounds, + /// Flag specifying whether DISTINCT keyword is employed in the query + distinct: bool, } impl<'a, C: ContextProvider> Assembler<'a, C> { /// Create a new empty [`Resolver`] @@ -161,6 +163,7 @@ impl<'a, C: ContextProvider> Assembler<'a, C> { constants: Default::default(), columns: Vec::new(), secondary_index_bounds: Default::default(), + distinct: false, } } @@ -644,38 +647,37 @@ impl<'a, C: ContextProvider> Assembler<'a, C> { fn prepare_result(&self) -> Result { let root_scope = &self.scopes.scope_at(1); - Ok( - if root_scope - .metadata() - .aggregation - .iter() - .all(|&a| a == AggregationOperation::IdOp) - { - ResultStructure::new_for_query_no_aggregation( - self.query_ops.ops.clone(), - root_scope.metadata().outputs.clone(), - vec![0; root_scope.metadata().outputs.len()], - ) - } else if root_scope - .metadata() - .aggregation - .iter() - .all(|&a| a != AggregationOperation::IdOp) - { - ResultStructure::new_for_query_with_aggregation( - self.query_ops.ops.clone(), - root_scope.metadata().outputs.clone(), - root_scope - .metadata() - .aggregation - .iter() - .map(|x| x.to_id()) - .collect(), - ) - } else { - unreachable!() - }, - ) + if root_scope + .metadata() + .aggregation + .iter() + .all(|&a| a == AggregationOperation::IdOp) + { + ResultStructure::new_for_query_no_aggregation( + self.query_ops.ops.clone(), + root_scope.metadata().outputs.clone(), + vec![0; root_scope.metadata().outputs.len()], + self.distinct, + ) + } else if root_scope + .metadata() + .aggregation + .iter() + .all(|&a| a != AggregationOperation::IdOp) + { + ResultStructure::new_for_query_with_aggregation( + self.query_ops.ops.clone(), + root_scope.metadata().outputs.clone(), + root_scope + .metadata() + .aggregation + .iter() + .map(|x| x.to_id()) + .collect(), + ) + } else { + unreachable!() + } } /// Generate appropriate universal query circuit PIs in static mode from the @@ -684,7 +686,7 @@ impl<'a, C: ContextProvider> Assembler<'a, C> { let result = self.prepare_result()?; let root_scope = &self.scopes.scope_at(1); - Ok(CircuitPis { + let pis = CircuitPis { result, column_ids: self.columns.clone(), query_aggregations: root_scope.metadata().aggregation.to_vec(), @@ -693,7 +695,9 @@ impl<'a, C: ContextProvider> Assembler<'a, C> { self.secondary_index_bounds.low.clone(), self.secondary_index_bounds.high.clone(), ), - }) + }; + pis.validate::()?; + Ok(pis) } /// Generate appropriate universal query circuit PIs in runtime mode from @@ -702,7 +706,7 @@ impl<'a, C: ContextProvider> Assembler<'a, C> { let result = self.prepare_result()?; let root_scope = &self.scopes.scope_at(1); - Ok(CircuitPis { + let pis = CircuitPis { result, column_ids: self.columns.clone(), query_aggregations: root_scope.metadata().aggregation.to_vec(), @@ -713,7 +717,9 @@ impl<'a, C: ContextProvider> Assembler<'a, C> { self.secondary_index_bounds.high.clone(), ) .context("while setting query bounds")?, - }) + }; + pis.validate::()?; + Ok(pis) } } @@ -721,7 +727,7 @@ impl<'a, C: ContextProvider> Assembler<'a, C> { /// place them in a [`CircuitPis`] that may be either build in static mode (i.e. /// no reference to runtime value) at query registration time, or in dynamic /// mode at query execution time. -pub trait BuildableBounds: Sized { +pub trait BuildableBounds: Sized + Serialize { fn without_values(low: Option, high: Option) -> Self; fn with_values( @@ -733,7 +739,7 @@ pub trait BuildableBounds: Sized { /// Similar to [`QueryBounds`], but only containing the static expressions /// defining the query bounds, without any reference to runtime values. -#[derive(Debug)] +#[derive(Debug, Serialize)] pub struct StaticQueryBounds { pub min_query_secondary: Option, pub max_query_secondary: Option, @@ -798,6 +804,33 @@ pub type StaticCircuitPis = CircuitPis; /// runtime. pub type DynamicCircuitPis = CircuitPis; +impl CircuitPis { + fn validate(&self) -> Result<()> { + ensure!( + self.predication_operations.len() <= C::MAX_NUM_PREDICATE_OPS, + format!( + "too many basic operations found in WHERE clause: found {}, maximum allowed is {}", + self.predication_operations.len(), + C::MAX_NUM_PREDICATE_OPS, + ) + ); + ensure!( + self.column_ids.len() <= C::MAX_NUM_COLUMNS, + format!( + "too many columns found in the table: found {}, maximum allowed is {}", + self.column_ids.len(), + C::MAX_NUM_COLUMNS, + ) + ); + self.result + .validate(C::MAX_NUM_RESULT_OPS, C::MAX_NUM_ITEMS_PER_OUTPUT) + } + + pub fn to_json(&self) -> Vec { + serde_json::to_vec(self).unwrap() + } +} + impl AstVisitor for Assembler<'_, C> { type Error = anyhow::Error; @@ -932,6 +965,7 @@ impl AstVisitor for Assembler<'_, C> { } fn post_select(&mut self, select: &Select) -> Result<()> { + self.distinct = select.distinct.is_some(); if let Some(where_clause) = select.selection.as_ref() { // As the expression are traversed depth-first, the top level // expression will mechnically find itself at the last position, as @@ -990,7 +1024,7 @@ impl AstVisitor for Assembler<'_, C> { pub fn validate(query: &Query, settings: &ParsilSettings) -> Result<()> { let mut resolver = Assembler::new(settings); query.visit(&mut resolver)?; - resolver.prepare_result().map(|_| ()) + resolver.to_static_inputs().map(|_| ()) } /// Generate static circuit public inputs, i.e. without reference to runtime diff --git a/parsil/src/errors.rs b/parsil/src/errors.rs index ec0a5adb4..463536b86 100644 --- a/parsil/src/errors.rs +++ b/parsil/src/errors.rs @@ -3,12 +3,6 @@ use thiserror::Error; #[derive(Error, Debug)] pub enum ValidationError { - // HACK: refuse non-aggregated queries - #[error( - "only aggregated query projections are supported for now, e.g. `SELECT AVG(x) FROM ...`" - )] - TabularQuery, - #[error("query projection must not mix aggregates and scalars")] MixedQuery, @@ -76,4 +70,7 @@ pub enum ValidationError { #[error("NULL-related ordering specifiers unsupported")] NullRelatedOrdering, + + #[error("Clause `{0}` value should be set in the approporiate parameter at execution time")] + UseInvocationParameter(String), } diff --git a/parsil/src/executor.rs b/parsil/src/executor.rs index 911cffb09..c7fda830f 100644 --- a/parsil/src/executor.rs +++ b/parsil/src/executor.rs @@ -5,7 +5,7 @@ use alloy::primitives::U256; use anyhow::*; use ryhope::{EPOCH, KEY, PAYLOAD, VALID_FROM, VALID_UNTIL}; use sqlparser::ast::{ - BinaryOperator, CastKind, DataType, ExactNumberInfo, Expr, Function, FunctionArg, + BinaryOperator, CastKind, DataType, Distinct, ExactNumberInfo, Expr, Function, FunctionArg, FunctionArgExpr, FunctionArgumentList, FunctionArguments, GroupByExpr, Ident, ObjectName, Query, Select, SelectItem, SetExpr, TableAlias, TableFactor, TableWithJoins, Value, }; @@ -25,7 +25,7 @@ use crate::{ /// Safely wraps a [`Query`], ensuring its meaning and the status of its /// placeholders. -#[derive(Debug, PartialEq, Eq)] +#[derive(Debug, Clone, PartialEq, Eq)] pub enum SafeQuery { /// A query featuring placeholders as defined in a [`PlaceholderRegister`] ZkQuery(Query), @@ -87,7 +87,7 @@ impl AsMut for SafeQuery { /// A data structure wrapping a zkSQL query converted into a pgSQL able to be /// executed on zkTables and its accompanying metadata. -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct TranslatedQuery { /// The translated query, should be converted to string pub query: SafeQuery, @@ -680,6 +680,116 @@ impl AstMutator for Executor<'_, C> { } } +/// Executor to prepare a query that returns both the results of a user query +/// and the matching rows, each identified by the pair (row_key, epoch) +struct ExecutorWithKey<'a, C: ContextProvider> { + settings: &'a ParsilSettings, +} + +impl<'a, C: ContextProvider> ExecutorWithKey<'a, C> { + fn new(settings: &'a ParsilSettings) -> Self { + Self { settings } + } +} + +impl<'a, C: ContextProvider> AstMutator for ExecutorWithKey<'a, C> { + type Error = anyhow::Error; + + fn post_expr(&mut self, expr: &mut Expr) -> Result<()> { + let mut executor = Executor { + settings: &mut self.settings, + }; + executor.post_expr(expr) + } + + fn post_table_factor(&mut self, table_factor: &mut TableFactor) -> Result<()> { + let mut key_fetcher = KeyFetcher { + settings: &mut self.settings, + }; + key_fetcher.post_table_factor(table_factor) + } + + fn post_select(&mut self, select: &mut Select) -> Result<()> { + let replace_wildcard = || { + // we expand the Wildcard by replacing it will all the columns of the original table + assert_eq!(select.from.len(), 1); // single table queries + let table = &select.from.first().unwrap().relation; + match table { + TableFactor::Derived { subquery, .. } => { + subquery + .as_ref() + .body + .as_ref() + .as_select() + .unwrap() + .projection + .iter() + .filter_map(|item| { + let expr = match item { + SelectItem::ExprWithAlias { alias, .. } => { + Expr::Identifier(alias.clone()) + } + SelectItem::UnnamedExpr(expr) => expr.clone(), + _ => unreachable!(), + }; + // we need to filter out KEY and EPOCH from the columns expanded by the Wildcard, + // as these ones are the columns over which we need to apply DISTINCT + match &expr { + Expr::Identifier(ident) + if ident.value == EPOCH || ident.value == KEY => + { + None + } + _ => Some(expr), + } + }) + .collect::>() + } + _ => unreachable!(), // post_table_factor makes `TableFactor::Derived` + } + }; + // need to: + // 1. add KEY and EPOCH to existing `SelectItem`s + // 2. Ensure that, if there is DISTINCT keyword in the original query, + // the original `SelectItem`s are wrapped in `DISTINCT ON`, to + // ensure that we return only DISTINCT results + // first, turn existing `SelectItem`s in a vector of Expressions + if let Some(distinct) = select.distinct.as_mut() { + let items = select + .projection + .iter() + .flat_map(|item| { + match item { + SelectItem::UnnamedExpr(expr) => vec![expr.clone()], + SelectItem::ExprWithAlias { expr, .. } => vec![expr.clone()], // we don't care about alias here + SelectItem::QualifiedWildcard(_, _) => unreachable!(), + SelectItem::Wildcard(_) => replace_wildcard(), + } + }) + .collect::>(); + *distinct = Distinct::On(items) + } + // we add KEY and EPOCH to existing `SelectItem`s + select.projection = vec![ + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(KEY))), + SelectItem::UnnamedExpr(Expr::Identifier(Ident::new(EPOCH))), + ] + .into_iter() + .chain(select.projection.iter().flat_map(|item| { + match item { + SelectItem::Wildcard(_) => replace_wildcard() + .into_iter() + .map(|expr| SelectItem::UnnamedExpr(expr)) + .collect(), + _ => vec![item.clone()], + } + })) + .collect(); + + Ok(()) + } +} + pub fn generate_query_execution( query: &mut Query, settings: &ParsilSettings, @@ -691,6 +801,20 @@ pub fn generate_query_execution( TranslatedQuery::make(SafeQuery::ZkQuery(query_execution), settings) } +/// Build a statement to be executed in order to fetch the matching rows for +/// a query, each identified by a pair (row_key, epoch), altogether with the +/// results of the query corresponding to each matching row +pub fn generate_query_execution_with_keys( + query: &mut Query, + settings: &ParsilSettings, +) -> Result { + let mut executor = ExecutorWithKey::new(settings); + let mut query_execution = query.clone(); + query_execution.visit_mut(&mut executor)?; + + TranslatedQuery::make(SafeQuery::ZkQuery(query_execution), settings) +} + pub fn generate_query_keys( query: &mut Query, settings: &ParsilSettings, diff --git a/parsil/src/expand.rs b/parsil/src/expand.rs index 810ae0073..8d063bb97 100644 --- a/parsil/src/expand.rs +++ b/parsil/src/expand.rs @@ -1,11 +1,24 @@ //! Expand high-level operations (e.g. IN or BETWEEN) into combination of //! operations supported by the circuits. -use crate::visitor::{AstMutator, VisitMut}; -use sqlparser::ast::{BinaryOperator, Expr, Query, UnaryOperator, Value}; +use crate::{ + errors::ValidationError, + symbols::ContextProvider, + utils::int_to_expr, + validate::is_query_with_no_aggregation, + visitor::{AstMutator, VisitMut}, + ParsilSettings, +}; +use anyhow::ensure; +use sqlparser::ast::{ + BinaryOperator, Expr, Offset, OffsetRows, Query, SetExpr, UnaryOperator, Value, +}; -struct Expander; -impl AstMutator for Expander { +struct Expander<'a, C: ContextProvider> { + settings: &'a ParsilSettings, +} + +impl<'a, C: ContextProvider> AstMutator for Expander<'a, C> { type Error = anyhow::Error; fn pre_expr(&mut self, e: &mut Expr) -> anyhow::Result<()> { @@ -132,8 +145,38 @@ impl AstMutator for Expander { Ok(()) } + + fn pre_query(&mut self, query: &mut Query) -> anyhow::Result<()> { + ensure!( + query.limit.is_none(), + ValidationError::UseInvocationParameter("LIMIT".into()) + ); + ensure!( + query.offset.is_none(), + ValidationError::UseInvocationParameter("OFFSET".into()) + ); + + if let SetExpr::Select(ref select) = *query.body { + if is_query_with_no_aggregation(select) { + query.limit = Some(int_to_expr(self.settings.limit())); + if let Some(offset) = self.settings.offset { + if offset != 0 { + query.offset = Some(Offset { + value: int_to_expr(self.settings.offset()), + rows: OffsetRows::None, + }); + } + } + } + } + Ok(()) + } } -pub fn expand(q: &mut Query) { - q.visit_mut(&mut Expander).expect("can not fail"); +pub fn expand( + settings: &ParsilSettings, + q: &mut Query, +) -> anyhow::Result<()> { + let mut expander = Expander { settings }; + q.visit_mut(&mut expander) } diff --git a/parsil/src/lib.rs b/parsil/src/lib.rs index aef428f36..499f4b06d 100644 --- a/parsil/src/lib.rs +++ b/parsil/src/lib.rs @@ -21,7 +21,7 @@ pub mod queries; pub mod symbols; #[cfg(test)] mod tests; -mod utils; +pub mod utils; mod validate; mod visitor; diff --git a/parsil/src/symbols.rs b/parsil/src/symbols.rs index f0de87b01..ac552715d 100644 --- a/parsil/src/symbols.rs +++ b/parsil/src/symbols.rs @@ -106,6 +106,13 @@ impl std::fmt::Display for Handle { /// data from the contraact, and available in the JSON payload exposed by /// Ryhope. pub trait ContextProvider { + // query bounds to validate queries + const MAX_NUM_COLUMNS: usize; + const MAX_NUM_PREDICATE_OPS: usize; + const MAX_NUM_RESULT_OPS: usize; + const MAX_NUM_ITEMS_PER_OUTPUT: usize; + const MAX_NUM_OUTPUTS: usize; + /// Return, if it exists, the structure of the given virtual table. fn fetch_table(&self, table_name: &str) -> Result; } @@ -115,12 +122,42 @@ impl ContextProvider for EmptyProvider { fn fetch_table(&self, _table_name: &str) -> Result { bail!("empty provider") } + + const MAX_NUM_COLUMNS: usize = 0; + + const MAX_NUM_PREDICATE_OPS: usize = 0; + + const MAX_NUM_RESULT_OPS: usize = 0; + + const MAX_NUM_ITEMS_PER_OUTPUT: usize = 0; + + const MAX_NUM_OUTPUTS: usize = 0; } -pub struct FileContextProvider { +pub struct FileContextProvider< + const MAX_NUM_COLUMNS: usize, + const MAX_NUM_PREDICATE_OPS: usize, + const MAX_NUM_RESULT_OPS: usize, + const MAX_NUM_ITEMS_PER_OUTPUT: usize, + const MAX_NUM_OUTPUTS: usize, +> { tables: HashMap, } -impl FileContextProvider { +impl< + const MAX_NUM_COLUMNS: usize, + const MAX_NUM_PREDICATE_OPS: usize, + const MAX_NUM_RESULT_OPS: usize, + const MAX_NUM_ITEMS_PER_OUTPUT: usize, + const MAX_NUM_OUTPUTS: usize, + > + FileContextProvider< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, + MAX_NUM_OUTPUTS, + > +{ pub fn from_file(filename: &str) -> Result { let tables: Vec = serde_json::from_reader(std::fs::File::open(filename)?)?; Ok(FileContextProvider { @@ -131,7 +168,21 @@ impl FileContextProvider { }) } } -impl ContextProvider for FileContextProvider { +impl< + const MAX_NUM_COLUMNS: usize, + const MAX_NUM_PREDICATE_OPS: usize, + const MAX_NUM_RESULT_OPS: usize, + const MAX_NUM_ITEMS_PER_OUTPUT: usize, + const MAX_NUM_OUTPUTS: usize, + > ContextProvider + for FileContextProvider< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, + MAX_NUM_OUTPUTS, + > +{ fn fetch_table(&self, table_name: &str) -> Result { self.tables.get(table_name).cloned().ok_or_else(|| { anyhow!( @@ -141,6 +192,16 @@ impl ContextProvider for FileContextProvider { ) }) } + + const MAX_NUM_COLUMNS: usize = MAX_NUM_COLUMNS; + + const MAX_NUM_PREDICATE_OPS: usize = MAX_NUM_PREDICATE_OPS; + + const MAX_NUM_RESULT_OPS: usize = MAX_NUM_RESULT_OPS; + + const MAX_NUM_ITEMS_PER_OUTPUT: usize = MAX_NUM_ITEMS_PER_OUTPUT; + + const MAX_NUM_OUTPUTS: usize = MAX_NUM_OUTPUTS; } /// The [`Kind`] of a [`Scope`] defines how it behaves when being traversed. diff --git a/parsil/src/tests.rs b/parsil/src/tests.rs index 60bb6ae5d..6b574f92b 100644 --- a/parsil/src/tests.rs +++ b/parsil/src/tests.rs @@ -1,5 +1,6 @@ use crate::assembler::{assemble_dynamic, DynamicCircuitPis}; use crate::isolator; +use crate::utils::ParsilSettingsBuilder; use crate::{ symbols::FileContextProvider, utils::{parse_and_validate, ParsilSettings, PlaceholderSettings}, @@ -16,14 +17,34 @@ const CAREFUL: &[&str] = &[ "SELECT pipo.not_tt FROM (SELECT t AS tt FROM b) AS pipo (not_tt);", ]; +const MAX_NUM_COLUMNS: usize = 10; +const MAX_NUM_PREDICATE_OPS: usize = 20; +const MAX_NUM_RESULT_OPS: usize = 20; +const MAX_NUM_ITEMS_PER_OUTPUT: usize = 10; +const MAX_NUM_OUTPUTS: usize = 5; + +type TestFileContextProvider = FileContextProvider< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, + MAX_NUM_OUTPUTS, +>; + #[test] fn must_accept() -> Result<()> { let settings = ParsilSettings { - context: FileContextProvider::from_file("tests/context.json")?, + context: TestFileContextProvider::from_file("tests/context.json")?, placeholders: PlaceholderSettings::with_freestanding(3), + limit: None, + offset: None, }; for q in [ + "SELECT foo FROM table2", + "SELECT foo FROM table2 WHERE bar < 3", + "SELECT foo, * FROM table2", + "SELECT AVG(foo) FROM table2 WHERE block BETWEEN 43 and 68", // "SELECT 25", "SELECT AVG(foo), MIN(bar) FROM table2 WHERE block = 3", // "SELECT '0x1122334455667788990011223344556677889900112233445566778899001122'", @@ -31,6 +52,10 @@ fn must_accept() -> Result<()> { // "SELECT '1234567'", // "SELECT '0b01001'", // "SELECT '0o1234567'", + "SELECT foo, bar FROM table2 WHERE block = 3", + "SELECT foo FROM table2 WHERE block IN (1, 2, 4)", + "SELECT bar FROM table2 WHERE NOT block BETWEEN 12 AND 15", + "SELECT a, c FROM table2 AS tt (a, b, c)", ] { parse_and_validate(q, &settings)?; } @@ -39,16 +64,16 @@ fn must_accept() -> Result<()> { #[test] fn must_reject() { - let settings = ParsilSettings { - context: FileContextProvider::from_file("tests/context.json").unwrap(), - placeholders: PlaceholderSettings::with_freestanding(3), - }; + let settings = ParsilSettingsBuilder::default() + .context(TestFileContextProvider::from_file("tests/context.json").unwrap()) + .placeholders(PlaceholderSettings::with_freestanding(3)) + .build() + .unwrap(); for q in [ - "SELECT foo, bar FROM table2 WHERE block = 3", - "SELECT foo FROM table2 WHERE block IN (1, 2, 4)", - "SELECT bar FROM table2 WHERE NOT block BETWEEN 12 AND 15", - "SELECT a, c FROM table2 AS tt (a, b, c)", + // No ORDER BY + "SELECT foo, bar FROM table2 ORDER BY bar", + "SELECT foo, bar FROM table2 ORDER BY foo, bar", // Mixing aggregates and scalars "SELECT q, MIN(r) FROM pipo WHERE block = 3", // Bitwise operators unsupported @@ -77,48 +102,43 @@ fn must_reject() { "SELECT '0t11223344556677889900112233445566778899001122334455667788990011223'", // Invalid digit "SELECT '0o12345678'", + // Too many items in SELECT + "SELECT a+b, a-b, a, b, c*a, c+b, c= b+63 OR a < b AND (a-b)*(a+b) >= a*c+b-4", + // Too many operations in SELECT + "SELECT c+b-c*(a+c)-75 + 42*(a-b*c+a*(b-c)), a*56 >= b+63, a < b, (a-b)*(a+b) >= a*c+b-4 FROM table2 as tt (a,b,c)", + // LIMIT + "SELECT a+b FROM t LIMIT 10", + "SELECT b*c FROM t LIMIT a", + // OFFSET + "SELECT a+b FROM t OFFSET 10", + "SELECT b*c FROM t OFFSET $1", ] { assert!(dbg!(parse_and_validate(q, &settings)).is_err()) } } -#[test] -fn must_resolve() -> Result<()> { - let settings = ParsilSettings { - context: FileContextProvider::from_file("tests/context.json")?, - placeholders: PlaceholderSettings::with_freestanding(3), - }; - for q in [ - // "SELECT foo FROM table2", - // "SELECT foo FROM table2 WHERE bar < 3", - // "SELECT foo, * FROM table2", - "SELECT AVG(foo) FROM table2 WHERE block BETWEEN 43 and 68", - // "SELECT foo, bar FROM table2 ORDER BY bar", - // "SELECT foo, bar FROM table2 ORDER BY foo, bar", - ] { - parse_and_validate(q, &settings)?; - } - Ok(()) -} - #[test] fn ref_query() -> Result<()> { - let settings = ParsilSettings { - context: FileContextProvider::from_file("tests/context.json")?, - placeholders: PlaceholderSettings::with_freestanding(2), - }; + let settings = ParsilSettingsBuilder::default() + .context(TestFileContextProvider::from_file("tests/context.json").unwrap()) + .placeholders(PlaceholderSettings::with_freestanding(2)) + .build() + .unwrap(); let q = "SELECT AVG(C1+C2/(C2*C3)), SUM(C1+C2), MIN(C1+$1), MAX(C4-2), AVG(C5) FROM T WHERE (C5 > 5 AND C1*C3 <= C4+C5 OR C3 == $2) AND C2 >= 75 AND C2 < 99"; - let query = parse_and_validate(q, &settings)?; + let _query = parse_and_validate(q, &settings)?; Ok(()) } #[test] fn test_serde_circuit_pis() { - let settings = ParsilSettings { - context: FileContextProvider::from_file("tests/context.json").unwrap(), - placeholders: PlaceholderSettings::with_freestanding(3), - }; + let settings = ParsilSettingsBuilder::default() + .context(TestFileContextProvider::from_file("tests/context.json").unwrap()) + .placeholders(PlaceholderSettings::with_freestanding(3)) + .build() + .unwrap(); let q = "SELECT AVG(foo) FROM table2"; let query = parse_and_validate(q, &settings).unwrap(); @@ -136,13 +156,13 @@ fn test_serde_circuit_pis() { } #[test] -#[ignore = "wait for non-aggregation SELECT to come back"] fn isolation() { fn isolated_to_string(q: &str, lo_sec: bool, hi_sec: bool) -> String { - let settings = ParsilSettings { - context: FileContextProvider::from_file("tests/context.json").unwrap(), - placeholders: PlaceholderSettings::with_freestanding(3), - }; + let settings = ParsilSettingsBuilder::default() + .context(TestFileContextProvider::from_file("tests/context.json").unwrap()) + .placeholders(PlaceholderSettings::with_freestanding(3)) + .build() + .unwrap(); let mut query = parse_and_validate(q, &settings).unwrap(); isolator::isolate_with(&mut query, &settings, lo_sec, hi_sec) @@ -156,7 +176,7 @@ fn isolation() { false, false ), - "SELECT * FROM table1 WHERE (block >= 1 AND block <= 5)" + format!("SELECT * FROM table1 WHERE (block >= 1 AND block <= 5) LIMIT {MAX_NUM_OUTPUTS}") ); // Drop references to other columns @@ -166,7 +186,7 @@ fn isolation() { false, false ), - "SELECT * FROM table2 WHERE (block >= 1 AND block <= 5)" + format!("SELECT * FROM table2 WHERE (block >= 1 AND block <= 5) LIMIT {MAX_NUM_OUTPUTS}") ); // Drop sec. ind. references if it has no kown bounds. @@ -176,7 +196,7 @@ fn isolation() { false, false ), - "SELECT * FROM table2 WHERE (block >= $MIN_BLOCK AND block <= $MAX_BLOCK)" + format!("SELECT * FROM table2 WHERE (block >= $MIN_BLOCK AND block <= $MAX_BLOCK) LIMIT {MAX_NUM_OUTPUTS}") ); // Drop sec.ind. < [...] if it has a defined higher bound @@ -186,7 +206,7 @@ fn isolation() { true, false ), - "SELECT * FROM table2 WHERE (block >= $MIN_BLOCK AND block <= $MAX_BLOCK)" + format!("SELECT * FROM table2 WHERE (block >= $MIN_BLOCK AND block <= $MAX_BLOCK) LIMIT {MAX_NUM_OUTPUTS}") ); // Keep sec.ind. < [...] if it has a defined higher bound @@ -196,7 +216,7 @@ fn isolation() { false, true ), - "SELECT * FROM table2 WHERE (block >= $MIN_BLOCK AND block <= $MAX_BLOCK) AND foo < 5" + format!("SELECT * FROM table2 WHERE (block >= $MIN_BLOCK AND block <= $MAX_BLOCK) AND foo < 5 LIMIT {MAX_NUM_OUTPUTS}") ); // Nicholas's example @@ -205,5 +225,6 @@ fn isolation() { "SELECT * FROM table2 WHERE block BETWEEN 5 AND 10 AND (foo = 4 OR foo = 15) AND bar = 12", false, false), - "SELECT * FROM table2 WHERE (block >= 5 AND block <= 10)"); + format!("SELECT * FROM table2 WHERE (block >= 5 AND block <= 10) LIMIT {MAX_NUM_OUTPUTS}") + ); } diff --git a/parsil/src/utils.rs b/parsil/src/utils.rs index 47430c1e7..a45022b55 100644 --- a/parsil/src/utils.rs +++ b/parsil/src/utils.rs @@ -1,5 +1,5 @@ use alloy::primitives::U256; -use anyhow::*; +use anyhow::{bail, ensure}; use sqlparser::ast::{BinaryOperator, Expr, Query, UnaryOperator, Value}; use std::str::FromStr; use verifiable_db::query::computational_hash_ids::PlaceholderIdentifier; @@ -13,12 +13,100 @@ use crate::{ validate::{self}, }; -#[derive(Debug)] pub struct ParsilSettings { /// A handle to an object providing a register of the existing virtual /// tables and their columns. pub context: C, pub placeholders: PlaceholderSettings, + pub limit: Option, + pub offset: Option, +} +impl ParsilSettings { + pub fn builder() -> ParsilSettingsBuilder { + Default::default() + } + + pub fn max_num_outputs() -> usize { + C::MAX_NUM_OUTPUTS + } + + pub fn limit(&self) -> u32 { + self.limit.unwrap_or(C::MAX_NUM_OUTPUTS.try_into().unwrap()) + } + + pub fn offset(&self) -> u32 { + self.offset.unwrap_or(0) + } +} + +pub struct ParsilSettingsBuilder { + context: Option, + placeholders_settings: Option, + limit: Option, + offset: Option, +} +impl std::default::Default for ParsilSettingsBuilder { + fn default() -> Self { + ParsilSettingsBuilder { + context: None, + placeholders_settings: None, + limit: None, + offset: None, + } + } +} +impl ParsilSettingsBuilder { + pub fn context(mut self, context: C) -> Self { + self.context = Some(context); + self + } + + pub fn placeholders(mut self, placeholders_settings: PlaceholderSettings) -> Self { + self.placeholders_settings = Some(placeholders_settings); + self + } + + pub fn maybe_limit(mut self, limit: Option) -> Self { + self.limit = limit; + self + } + + pub fn maybe_offset(mut self, offset: Option) -> Self { + self.offset = offset; + self + } + + pub fn limit(mut self, limit: u32) -> Self { + self.limit = Some(limit); + self + } + + pub fn offset(mut self, offset: u32) -> Self { + self.offset = Some(offset); + self + } + + pub fn build(mut self) -> anyhow::Result> { + anyhow::ensure!( + self.limit + .map(|l| l <= C::MAX_NUM_OUTPUTS.try_into().unwrap()) + .unwrap_or(true), + anyhow::anyhow!("limit can not be greater than `{}`", C::MAX_NUM_OUTPUTS) + ); + + Ok(ParsilSettings { + context: self + .context + .take() + .ok_or_else(|| anyhow::anyhow!("context is not set"))?, + placeholders: self + .placeholders_settings + .take() + .ok_or_else(|| anyhow::anyhow!("placehoder settings are not set"))?, + limit: self.limit, + offset: self.offset, + }) + } } #[derive(Debug)] @@ -52,7 +140,7 @@ impl PlaceholderSettings { min_block: &str, max_block: &str, n: usize, - ) -> Result { + ) -> anyhow::Result { ensure!( min_block.starts_with('$'), "placeholders must start with '$'" @@ -71,7 +159,7 @@ impl PlaceholderSettings { /// Ensure that the given placeholder is valid, and update the validator /// internal state accordingly. - pub fn resolve_placeholder(&self, name: &str) -> Result { + pub fn resolve_placeholder(&self, name: &str) -> anyhow::Result { if self.min_block_placeholder == name { return Ok(PlaceholderIdentifier::MinQueryOnIdx1); } @@ -102,9 +190,9 @@ impl PlaceholderSettings { pub fn parse_and_validate( query: &str, settings: &ParsilSettings, -) -> Result { - let mut query = parser::parse(settings, query)?; - expand::expand(&mut query); +) -> anyhow::Result { + let mut query = parser::parse(&settings, query)?; + expand::expand(&settings, &mut query)?; placeholders::validate(settings, &query)?; validate::validate(settings, &query)?; @@ -114,12 +202,12 @@ pub fn parse_and_validate( /// Convert a string to a U256. Case is not conserved, and the string may be /// prefixed by a radix indicator. -pub fn str_to_u256(s: &str) -> Result { +pub fn str_to_u256(s: &str) -> anyhow::Result { let s = s.to_lowercase(); - U256::from_str(&s).map_err(|e| anyhow!("{s}: invalid U256: {e}")) + U256::from_str(&s).map_err(|e| anyhow::anyhow!("{s}: invalid U256: {e}")) } -fn val_to_expr(x: U256) -> Expr { +pub(crate) fn u256_to_expr(x: U256) -> Expr { if let Result::Ok(x_int) = TryInto::::try_into(x) { Expr::Value(Value::Number(x_int.to_string(), false)) } else { @@ -127,6 +215,14 @@ fn val_to_expr(x: U256) -> Expr { } } +pub(crate) fn int_to_expr + ToString>(x: X) -> Expr { + if let Result::Ok(x_int) = TryInto::::try_into(x) { + Expr::Value(Value::Number(x_int.to_string(), false)) + } else { + Expr::Value(Value::SingleQuotedString(x.to_string())) + } +} + /// Reduce all the parts of an expression that can be computed at compile-time. pub(crate) fn const_reduce(expr: &mut Expr) { #[allow(non_snake_case)] @@ -145,14 +241,14 @@ pub(crate) fn const_reduce(expr: &mut Expr) { } (None, Some(new_right)) => { const_reduce(left); - *right = Box::new(val_to_expr(new_right)); + *right = Box::new(u256_to_expr(new_right)); } (Some(new_left), None) => { const_reduce(right); - *left = Box::new(val_to_expr(new_left)); + *left = Box::new(u256_to_expr(new_left)); } (Some(new_left), Some(new_right)) => { - *expr = val_to_expr(match op { + *expr = u256_to_expr(match op { BinaryOperator::Plus => new_left + new_right, BinaryOperator::Minus => new_left - new_right, BinaryOperator::Multiply => new_left * new_right, @@ -229,9 +325,9 @@ pub(crate) fn const_reduce(expr: &mut Expr) { Expr::UnaryOp { op, expr } => { if let Result::Ok(new_e) = const_eval(expr) { match op { - UnaryOperator::Plus => *expr = Box::new(val_to_expr(new_e)), + UnaryOperator::Plus => *expr = Box::new(u256_to_expr(new_e)), UnaryOperator::Not => { - *expr = Box::new(val_to_expr(if new_e.is_zero() { ONE } else { ZERO })); + *expr = Box::new(u256_to_expr(if new_e.is_zero() { ONE } else { ZERO })); } _ => unreachable!(), } @@ -251,7 +347,7 @@ pub(crate) fn const_reduce(expr: &mut Expr) { /// /// NOTE: this will be used (i) in optimization and (ii) when boundaries /// will accept more complex expression. -fn const_eval(expr: &Expr) -> Result { +pub(crate) fn const_eval(expr: &Expr) -> anyhow::Result { #[allow(non_snake_case)] let ONE = U256::from_str_radix("1", 2).unwrap(); const ZERO: U256 = U256::ZERO; diff --git a/parsil/src/validate.rs b/parsil/src/validate.rs index 8a93fee09..75cdb7df4 100644 --- a/parsil/src/validate.rs +++ b/parsil/src/validate.rs @@ -385,9 +385,41 @@ impl AstVisitor for SqlValidator<'_, C> { q.fetch.is_none(), ValidationError::NonStandardSql("FETCH".into()) ); + ensure!( + q.order_by.is_none(), + ValidationError::UnsupportedFeature("ORDER BY".into()) + ); Ok(()) } } + +// Determine if the query does not aggregate values across different matching rows +pub(crate) fn is_query_with_no_aggregation(select: &Select) -> bool { + select.projection.iter().all(|s| { + !matches!( + s, + SelectItem::UnnamedExpr(Expr::Function(_)) + | SelectItem::ExprWithAlias { + expr: Expr::Function(_), + .. + } + ) + }) +} +// Determine if the query does aggregates values across different matching rows +pub(crate) fn is_query_with_aggregation(select: &Select) -> bool { + select.projection.iter().all(|s| { + matches!( + s, + SelectItem::UnnamedExpr(Expr::Function(_)) + | SelectItem::ExprWithAlias { + expr: Expr::Function(_), + .. + } + ) + }) +} + /// Instantiate a new [`Validator`] and validate this query with it. pub fn validate( settings: &ParsilSettings, @@ -395,15 +427,8 @@ pub fn validate( ) -> Result<(), ValidationError> { if let SetExpr::Select(ref select) = *query.body { ensure!( - select.projection.iter().all(|s| matches!( - s, - SelectItem::UnnamedExpr(Expr::Function(_)) - | SelectItem::ExprWithAlias { - expr: Expr::Function(_), - .. - } - )), - ValidationError::TabularQuery + is_query_with_aggregation(select) || is_query_with_no_aggregation(select), + ValidationError::MixedQuery ); } else { return Err(ValidationError::NotASelect); diff --git a/verifiable-db/Cargo.toml b/verifiable-db/Cargo.toml index c7d8b87b5..284a77dde 100644 --- a/verifiable-db/Cargo.toml +++ b/verifiable-db/Cargo.toml @@ -21,6 +21,7 @@ derive_more = "0.99.18" itertools.workspace = true [dev-dependencies] +futures = "0.3.31" mp2_test = { path = "../mp2-test" } rand.workspace = true serial_test.workspace = true diff --git a/verifiable-db/src/api.rs b/verifiable-db/src/api.rs index cad63daba..852fac874 100644 --- a/verifiable-db/src/api.rs +++ b/verifiable-db/src/api.rs @@ -192,6 +192,8 @@ where #[derive(Serialize, Deserialize)] pub struct QueryParameters< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, const MAX_NUM_COLUMNS: usize, const MAX_NUM_PREDICATE_OPS: usize, const MAX_NUM_RESULT_OPS: usize, @@ -203,6 +205,9 @@ pub struct QueryParameters< [(); MAX_NUM_ITEMS_PER_OUTPUT - 1]:, [(); NUM_QUERY_IO::]:, [(); 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS)]:, + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); MAX_NUM_ITEMS_PER_OUTPUT * MAX_NUM_OUTPUTS]:, { query_params: QueryParams< MAX_NUM_COLUMNS, @@ -211,10 +216,14 @@ pub struct QueryParameters< MAX_NUM_ITEMS_PER_OUTPUT, >, revelation_params: RevelationParams< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - { 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS) }, >, wrap_circuit: WrapCircuitParams, @@ -222,6 +231,8 @@ pub struct QueryParameters< #[derive(Serialize, Deserialize)] pub enum QueryCircuitInput< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, const MAX_NUM_COLUMNS: usize, const MAX_NUM_PREDICATE_OPS: usize, const MAX_NUM_RESULT_OPS: usize, @@ -230,6 +241,9 @@ pub enum QueryCircuitInput< const MAX_NUM_PLACEHOLDERS: usize, > where [(); 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS)]:, + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); MAX_NUM_ITEMS_PER_OUTPUT * MAX_NUM_OUTPUTS]:, { Query( query::api::CircuitInput< @@ -241,15 +255,21 @@ pub enum QueryCircuitInput< ), Revelation( revelation::api::CircuitInput< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - { 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS) }, >, ), } impl< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, const MAX_NUM_COLUMNS: usize, const MAX_NUM_PREDICATE_OPS: usize, const MAX_NUM_RESULT_OPS: usize, @@ -258,6 +278,8 @@ impl< const MAX_NUM_PLACEHOLDERS: usize, > QueryParameters< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, MAX_NUM_COLUMNS, MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULT_OPS, @@ -272,6 +294,9 @@ where [(); 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS)]:, [(); QUERY_PI_LEN::]:, [(); REVELATION_PI_LEN::]:, + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); MAX_NUM_ITEMS_PER_OUTPUT * MAX_NUM_OUTPUTS]:, { /// Build `QueryParameters` from serialized `ParamsInfo` of `PublicParamaters` pub fn build_params(preprocessing_params_info: &[u8]) -> Result { @@ -296,6 +321,8 @@ where pub fn generate_proof( &self, input: QueryCircuitInput< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, MAX_NUM_COLUMNS, MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULT_OPS, @@ -307,9 +334,11 @@ where match input { QueryCircuitInput::Query(input) => self.query_params.generate_proof(input), QueryCircuitInput::Revelation(input) => { - let proof = self - .revelation_params - .generate_proof(input, self.query_params.get_circuit_set())?; + let proof = self.revelation_params.generate_proof( + input, + self.query_params.get_circuit_set(), + Some(&self.query_params), + )?; self.wrap_circuit.generate_proof( self.revelation_params.get_circuit_set(), &ProofWithVK::deserialize(&proof)?, @@ -336,6 +365,8 @@ mod tests { const MAX_NUM_OUTPUTS: usize = 3; const MAX_NUM_ITEMS_PER_OUTPUT: usize = 5; const MAX_NUM_PLACEHOLDERS: usize = 10; + const ROW_TREE_MAX_DEPTH: usize = 10; + const INDEX_TREE_MAX_DEPTH: usize = 15; // This is only used for testing on local. #[ignore] @@ -348,6 +379,8 @@ mod tests { let file = File::open(QUERY_PARAMS_FILE_PATH).unwrap(); let reader = BufReader::new(file); let query_params: QueryParameters< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, MAX_NUM_COLUMNS, MAX_NUM_PREDICATE_OPS, MAX_NUM_RESULT_OPS, diff --git a/verifiable-db/src/query/aggregation/mod.rs b/verifiable-db/src/query/aggregation/mod.rs index b5fd8bfca..6a36be605 100644 --- a/verifiable-db/src/query/aggregation/mod.rs +++ b/verifiable-db/src/query/aggregation/mod.rs @@ -1,13 +1,21 @@ +use std::iter::once; + use alloy::primitives::U256; use anyhow::Result; +use itertools::Itertools; use mp2_common::{ - poseidon::empty_poseidon_hash, + poseidon::{empty_poseidon_hash, HashPermutation}, proof::ProofWithVK, serialization::{deserialize_long_array, serialize_long_array}, types::HashOutput, + utils::{Fieldable, ToFields}, F, }; -use plonky2::{hash::hash_types::HashOut, plonk::config::GenericHashOut}; +use plonky2::{ + field::types::Field, + hash::{hash_types::HashOut, hashing::hash_n_to_hash_no_pad}, + plonk::config::GenericHashOut, +}; use serde::{Deserialize, Serialize}; pub(crate) mod child_proven_single_path_node; @@ -140,7 +148,7 @@ impl QueryBounds { } /// Data structure containing all the information needed as input by aggregation circuits for a single node of the tree -#[derive(Clone, Debug, Default, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, Default, PartialEq, Eq, Serialize, Deserialize)] pub struct NodeInfo { /// The hash of the embedded tree at this node. It can be the hash of the row tree if this node is a node in /// the index tree, or it can be a hash of the cells tree if this node is a node in a rows tree @@ -157,6 +165,8 @@ pub struct NodeInfo { /// minimum value associated to the current node. It can be a primary index value if the node is a node in the index tree, /// a secondary index value if the node is a node in a rows tree pub(crate) max: U256, + /// Flag specifying whether this is a leaf node or not + pub(crate) is_leaf: bool, } impl NodeInfo { @@ -184,10 +194,30 @@ impl NodeInfo { value, min, max, + is_leaf: left_child_hash.is_none() && right_child_hash.is_none(), } } + + pub fn node_hash(&self, index_id: u64) -> HashOutput { + HashOutput::try_from(self.compute_node_hash(index_id.to_field()).to_bytes()).unwrap() + } + + pub(crate) fn compute_node_hash(&self, index_id: F) -> HashOut { + hash_n_to_hash_no_pad::( + &self + .child_hashes + .into_iter() + .flat_map(|h| h.to_vec()) + .chain(self.min.to_fields()) + .chain(self.max.to_fields()) + .chain(once(index_id)) + .chain(self.value.to_fields()) + .chain(self.embedded_tree_hash.to_vec()) + .collect_vec(), + ) + } } -#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)] +#[derive(Clone, Copy, Debug, PartialEq, Eq, Serialize, Deserialize)] /// enum to specify whether a node is the left or right child of another node pub enum ChildPosition { Left, diff --git a/verifiable-db/src/query/api.rs b/verifiable-db/src/query/api.rs index e87b068d9..96c77a8bf 100644 --- a/verifiable-db/src/query/api.rs +++ b/verifiable-db/src/query/api.rs @@ -71,7 +71,7 @@ use recursion_framework::{ }; use serde::{Deserialize, Serialize}; -#[derive(Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] #[allow(clippy::large_enum_variant)] // we need to clone data if we fix by put variants inside a `Box` pub enum CircuitInput< const MAX_NUM_COLUMNS: usize, @@ -813,23 +813,6 @@ mod tests { }, }; - impl NodeInfo { - pub(crate) fn compute_node_hash(&self, index_id: F) -> HashOut { - hash_n_to_hash_no_pad::( - &self - .child_hashes - .into_iter() - .flat_map(|h| h.to_vec()) - .chain(self.min.to_fields()) - .chain(self.max.to_fields()) - .chain(once(index_id)) - .chain(self.value.to_fields()) - .chain(self.embedded_tree_hash.to_vec()) - .collect_vec(), - ) - } - } - #[test] fn test_api() { // Simple query for testing SELECT SUM(C1 + C3) FROM T WHERE C3 >= 5 AND C1 > 56 AND C1 <= 67 AND C2 > 34 AND C2 <= $1 @@ -903,7 +886,8 @@ mod tests { result_operations, output_items, aggregation_op_ids.clone(), - ); + ) + .unwrap(); let first_placeholder_id = PlaceholderIdentifier::Generic(0); let placeholders = Placeholders::from(( vec![(first_placeholder_id, U256::from(max_query_secondary))], diff --git a/verifiable-db/src/query/computational_hash_ids.rs b/verifiable-db/src/query/computational_hash_ids.rs index 95422063c..b1d78c4a2 100644 --- a/verifiable-db/src/query/computational_hash_ids.rs +++ b/verifiable-db/src/query/computational_hash_ids.rs @@ -201,8 +201,15 @@ impl Identifiers { //ToDo: add ORDER BY info and DISTINCT info for queries without the results tree, when adding results tree // circuits APIs + let computational_hash = match results.output_variant { + Output::Aggregation => ComputationalHash::from_bytes((&hash).into()), + Output::NoAggregation => ResultIdentifier::result_id_hash( + ComputationalHash::from_bytes((&hash).into()), + results.distinct.unwrap_or(false), + ), + }; - let inputs = ComputationalHash::from_bytes((&hash).into()) + let inputs = computational_hash .to_vec() .into_iter() .chain(placeholder_id_hash.to_vec()) @@ -227,7 +234,7 @@ impl ToField for Identifiers { } } /// Data structure to provide identifiers of columns of a table to compute computational hash -#[derive(Clone, Debug)] +#[derive(Clone, Debug, Serialize, Deserialize)] pub struct ColumnIDs { pub(crate) primary: F, pub(crate) secondary: F, @@ -712,3 +719,40 @@ impl ToField for ResultIdentifier { Identifiers::ResultIdentifiers(*self).to_field() } } + +impl ResultIdentifier { + pub(crate) fn result_id_hash( + computational_hash: ComputationalHash, + distinct: bool, + ) -> ComputationalHash { + let res_id = if distinct { + ResultIdentifier::ResultWithDistinct + } else { + ResultIdentifier::ResultNoDistinct + }; + let input = once(res_id.to_field()) + .chain(computational_hash.to_fields()) + .collect_vec(); + hash_n_to_hash_no_pad::<_, HashPermutation>(&input) + } + + pub(crate) fn result_id_hash_circuit( + b: &mut CBuilder, + computational_hash: ComputationalHashTarget, + distinct: &BoolTarget, + ) -> ComputationalHashTarget { + let [res_no_distinct, res_with_distinct] = [ + ResultIdentifier::ResultNoDistinct, + ResultIdentifier::ResultWithDistinct, + ] + .map(|id| b.constant(id.to_field())); + let res_id = b.select(*distinct, res_with_distinct, res_no_distinct); + + // Compute the computational hash: + // H(res_id || pQ.C) + let inputs = once(res_id) + .chain(computational_hash.to_targets()) + .collect(); + b.hash_n_to_hash_no_pad::(inputs) + } +} diff --git a/verifiable-db/src/query/merkle_path.rs b/verifiable-db/src/query/merkle_path.rs new file mode 100644 index 000000000..050bbadc8 --- /dev/null +++ b/verifiable-db/src/query/merkle_path.rs @@ -0,0 +1,462 @@ +//! Gadget to reconstruct the Merkle root of a tree from a Merkle path + +use std::{array, iter::once}; + +use alloy::primitives::U256; +use anyhow::{ensure, Result}; +use itertools::Itertools; +use mp2_common::{ + hash::hash_maybe_first, + poseidon::empty_poseidon_hash, + serialization::{ + deserialize_array, deserialize_long_array, serialize_array, serialize_long_array, + }, + types::HashOutput, + u256::{CircuitBuilderU256, UInt256Target, WitnessWriteU256}, + utils::{SelectHashBuilder, ToTargets}, + D, F, +}; +use plonky2::{ + hash::hash_types::{HashOut, HashOutTarget}, + iop::{ + target::{BoolTarget, Target}, + witness::{PartialWitness, WitnessWrite}, + }, + plonk::{circuit_builder::CircuitBuilder, config::GenericHashOut}, +}; +use serde::{Deserialize, Serialize}; + +use super::aggregation::{ChildPosition, NodeInfo}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +/// Input wires for Merkle path verification gadget +pub struct MerklePathTargetInputs +where + [(); MAX_DEPTH - 1]:, +{ + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + is_left_child: [BoolTarget; MAX_DEPTH - 1], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + sibling_hash: [HashOutTarget; MAX_DEPTH - 1], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + node_min: [UInt256Target; MAX_DEPTH - 1], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + node_max: [UInt256Target; MAX_DEPTH - 1], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + node_value: [UInt256Target; MAX_DEPTH - 1], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + embedded_tree_hash: [HashOutTarget; MAX_DEPTH - 1], + /// Array of MAX_DEPTH-1 flags specifying whether the current node is a real node in the path or a dummy one. + /// That is, if the path being proven has depth d <= MAX_DEPTH, then the first d-1 entries of this array + /// are true, while the remaining D-d ones are false + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + is_real_node: [BoolTarget; MAX_DEPTH - 1], +} + +#[derive(Clone, Debug)] +/// Set of input/output wires built by merkle path verification gadget +pub struct MerklePathTarget +where + [(); MAX_DEPTH - 1]:, +{ + pub(crate) inputs: MerklePathTargetInputs, + /// Recomputed root for the Merkle path + pub(crate) root: HashOutTarget, +} + +#[derive(Clone, Copy, Debug, Serialize, Deserialize)] +pub struct MerklePathGadget +where + [(); MAX_DEPTH - 1]:, +{ + /// Array of MAX_DEPTH-1 flags, each specifying whether the previous node in the path + /// is the left child of a given node in the path + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + is_left_child: [bool; MAX_DEPTH - 1], + /// Hash of the sibling of the previous node in the path (empty hash if there is no sibling) + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + sibling_hash: [HashOut; MAX_DEPTH - 1], + /// Minimum value associated to each node in the path + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + node_min: [U256; MAX_DEPTH - 1], + /// Maximum value associated to each node in the path + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + node_max: [U256; MAX_DEPTH - 1], + /// Value stored in each node in the path + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + node_value: [U256; MAX_DEPTH - 1], + /// Hash of the embedded tree stored in each node in the path + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + embedded_tree_hash: [HashOut; MAX_DEPTH - 1], + /// Number of real nodes in the path + num_real_nodes: usize, +} + +impl Default for MerklePathGadget +where + [(); MAX_DEPTH - 1]:, +{ + fn default() -> Self { + Self { + is_left_child: [Default::default(); MAX_DEPTH - 1], + sibling_hash: [Default::default(); MAX_DEPTH - 1], + node_min: [Default::default(); MAX_DEPTH - 1], + node_max: [Default::default(); MAX_DEPTH - 1], + node_value: [Default::default(); MAX_DEPTH - 1], + embedded_tree_hash: [Default::default(); MAX_DEPTH - 1], + num_real_nodes: Default::default(), + } + } +} + +impl MerklePathGadget +where + [(); MAX_DEPTH - 1]:, +{ + /// Build a new instance of `Self`, representing the `path` provided as input. The `siblings` + /// input provides the siblings of the nodes in the path, if any + pub fn new( + path: &[(NodeInfo, ChildPosition)], + siblings: &[Option], + index_id: u64, + ) -> Result { + let num_real_nodes = path.len(); + ensure!( + siblings.len() == num_real_nodes, + "Number of siblings must be the same as the nodes in the path" + ); + + let mut is_left_child = [false; MAX_DEPTH - 1]; + let mut embedded_tree_hash = [HashOut::default(); MAX_DEPTH - 1]; + let mut node_min = [U256::default(); MAX_DEPTH - 1]; + let mut node_max = [U256::default(); MAX_DEPTH - 1]; + let mut node_value = [U256::default(); MAX_DEPTH - 1]; + + path.iter().enumerate().for_each(|(i, (node, position))| { + is_left_child[i] = match position { + ChildPosition::Left => true, + ChildPosition::Right => false, + }; + embedded_tree_hash[i] = node.embedded_tree_hash; + node_min[i] = node.min; + node_max[i] = node.max; + node_value[i] = node.value; + }); + + let sibling_hash = array::from_fn(|i| { + siblings + .get(i) + .and_then(|sibling| { + sibling + .clone() + .and_then(|node_hash| Some(HashOut::from_bytes((&node_hash).into()))) + }) + .unwrap_or(*empty_poseidon_hash()) + }); + + Ok(Self { + is_left_child, + sibling_hash, + node_min, + node_max, + node_value, + embedded_tree_hash, + num_real_nodes, + }) + } + + /// Build wires for `MerklePathGadget`. The requrested inputs are: + /// - `start_node`: The hash of the first node in the path + /// - `index_id`: Integer identifier of the index column to be placed in the hash + /// of the nodes of the path + pub fn build( + b: &mut CircuitBuilder, + start_node: HashOutTarget, + index_id: Target, + ) -> MerklePathTarget { + let is_left_child = array::from_fn(|_| b.add_virtual_bool_target_unsafe()); + let [sibling_hash, embedded_tree_hash] = + [0, 1].map(|_| array::from_fn(|_| b.add_virtual_hash())); + let [node_min, node_max, node_value] = [0, 1, 2].map( + |_| b.add_virtual_u256_arr_unsafe(), // unsafe should be ok since we just need to hash them + ); + let is_real_node = array::from_fn(|_| b.add_virtual_bool_target_safe()); + + let mut final_hash = start_node; + for i in 0..MAX_DEPTH - 1 { + let rest = node_min[i] + .to_targets() + .into_iter() + .chain(node_max[i].to_targets()) + .chain(once(index_id)) + .chain(node_value[i].to_targets()) + .chain(embedded_tree_hash[i].to_targets()) + .collect_vec(); + let node_hash = HashOutTarget::from_vec(hash_maybe_first( + b, + is_left_child[i], + sibling_hash[i].elements, + final_hash.elements, + rest.as_slice(), + )); + final_hash = b.select_hash(is_real_node[i], &node_hash, &final_hash); + } + + MerklePathTarget { + inputs: MerklePathTargetInputs { + is_left_child, + sibling_hash, + node_min, + node_max, + node_value, + embedded_tree_hash, + is_real_node, + }, + root: final_hash, + } + } + + pub fn assign(&self, pw: &mut PartialWitness, wires: &MerklePathTargetInputs) { + self.is_left_child + .iter() + .zip(wires.is_left_child) + .for_each(|(&value, target)| pw.set_bool_target(target, value)); + [ + (self.sibling_hash, wires.sibling_hash), + (self.embedded_tree_hash, wires.embedded_tree_hash), + ] + .into_iter() + .for_each(|(value_hash, target_hash)| { + value_hash + .iter() + .zip(target_hash) + .for_each(|(&value, target)| pw.set_hash_target(target, value)) + }); + [ + (self.node_min, &wires.node_min), + (self.node_max, &wires.node_max), + (self.node_value, &wires.node_value), + ] + .into_iter() + .for_each(|(values, targets)| { + values + .iter() + .zip(targets) + .for_each(|(&value, target)| pw.set_u256_target(target, value)) + }); + wires + .is_real_node + .iter() + .enumerate() + .for_each(|(i, &target)| pw.set_bool_target(target, i < self.num_real_nodes)); + } +} + +#[cfg(test)] +mod tests { + use std::array; + + use mp2_common::{types::HashOutput, utils::ToTargets, C, D, F}; + use mp2_test::{ + circuit::{run_circuit, UserCircuit}, + utils::{gen_random_field_hash, gen_random_u256}, + }; + use plonky2::{ + field::types::{PrimeField64, Sample}, + hash::hash_types::{HashOut, HashOutTarget}, + iop::{ + target::Target, + witness::{PartialWitness, WitnessWrite}, + }, + plonk::{circuit_builder::CircuitBuilder, config::GenericHashOut}, + }; + use rand::thread_rng; + + use crate::query::aggregation::{ChildPosition, NodeInfo}; + + use super::{MerklePathGadget, MerklePathTargetInputs}; + + #[derive(Clone, Debug)] + struct TestMerklePathGadget + where + [(); MAX_DEPTH - 1]:, + { + merkle_path_inputs: MerklePathGadget, + start_node: NodeInfo, + index_id: F, + } + + impl UserCircuit for TestMerklePathGadget + where + [(); MAX_DEPTH - 1]:, + { + type Wires = (MerklePathTargetInputs, HashOutTarget, Target); + + fn build(c: &mut CircuitBuilder) -> Self::Wires { + let index_id = c.add_virtual_target(); + let start_node = c.add_virtual_hash(); + let merkle_path_wires = MerklePathGadget::build(c, start_node, index_id); + + c.register_public_inputs(&merkle_path_wires.root.to_targets()); + + (merkle_path_wires.inputs, start_node, index_id) + } + + fn prove(&self, pw: &mut PartialWitness, wires: &Self::Wires) { + self.merkle_path_inputs.assign(pw, &wires.0); + pw.set_hash_target(wires.1, self.start_node.compute_node_hash(self.index_id)); + pw.set_target(wires.2, self.index_id); + } + } + + #[test] + fn test_merkle_path() { + // Test a Merkle-path on the following Merkle-tree + // A + // B C + // D G + // E F + + // first, build the Merkle-tree + let rng = &mut thread_rng(); + let index_id = F::rand(); + // closure to generate a random node of the tree from the 2 children, if any + let mut random_node = + |left_child: Option<&HashOutput>, right_child: Option<&HashOutput>| -> NodeInfo { + let embedded_tree_hash = + HashOutput::try_from(gen_random_field_hash::().to_bytes()).unwrap(); + let [node_min, node_max, node_value] = array::from_fn(|_| gen_random_u256(rng)); + NodeInfo::new( + &embedded_tree_hash, + left_child, + right_child, + node_value, + node_min, + node_max, + ) + }; + + let node_E = random_node(None, None); // it's a leaf node, so no children + let node_F = random_node(None, None); + let node_G = random_node(None, None); + let node_E_hash = + HashOutput::try_from(node_E.compute_node_hash(index_id).to_bytes()).unwrap(); + let node_D = random_node( + Some(&node_E_hash), + Some(&HashOutput::try_from(node_F.compute_node_hash(index_id).to_bytes()).unwrap()), + ); + let node_B = random_node( + Some(&HashOutput::try_from(node_D.compute_node_hash(index_id).to_bytes()).unwrap()), + None, + ); + let node_C = random_node( + None, + Some(&HashOutput::try_from(node_G.compute_node_hash(index_id).to_bytes()).unwrap()), + ); + let node_B_hash = + HashOutput::try_from(node_B.compute_node_hash(index_id).to_bytes()).unwrap(); + let node_C_hash = + HashOutput::try_from(node_C.compute_node_hash(index_id).to_bytes()).unwrap(); + let node_A = random_node(Some(&node_B_hash), Some(&node_C_hash)); + let root = node_A.compute_node_hash(index_id); + + // verify Merkle-path related to leaf F + const MAX_DEPTH: usize = 10; + let path = vec![ + (node_D.clone(), ChildPosition::Right), // we start from the ancestor of the start node of the path + (node_B.clone(), ChildPosition::Left), + (node_A.clone(), ChildPosition::Left), + ]; + let siblings = vec![Some(node_E_hash), None, Some(node_C_hash.clone())]; + let merkle_path_inputs = + MerklePathGadget::::new(&path, &siblings, index_id.to_canonical_u64()) + .unwrap(); + + let circuit = TestMerklePathGadget:: { + merkle_path_inputs, + start_node: node_F.clone(), + index_id, + }; + + let proof = run_circuit::(circuit); + // check that the re-computed root is correct + assert_eq!(proof.public_inputs, root.to_vec()); + + // verify Merkle-path related to leaf G + let path = vec![ + (node_C.clone(), ChildPosition::Right), + (node_A.clone(), ChildPosition::Right), + ]; + let siblings = vec![None, Some(node_B_hash)]; + let merkle_path_inputs = + MerklePathGadget::::new(&path, &siblings, index_id.to_canonical_u64()) + .unwrap(); + let circuit = TestMerklePathGadget:: { + merkle_path_inputs, + start_node: node_G.clone(), + index_id, + }; + + let proof = run_circuit::(circuit); + // check that the re-computed root is correct + assert_eq!(proof.public_inputs, root.to_vec()); + + // Verify Merkle-path related to node D + let path = vec![ + (node_B.clone(), ChildPosition::Left), + (node_A.clone(), ChildPosition::Left), + ]; + let siblings = vec![None, Some(node_C_hash)]; + let merkle_path_inputs = + MerklePathGadget::::new(&path, &siblings, index_id.to_canonical_u64()) + .unwrap(); + let circuit = TestMerklePathGadget:: { + merkle_path_inputs, + start_node: node_D.clone(), + index_id, + }; + + let proof = run_circuit::(circuit); + // check that the re-computed root is correct + assert_eq!(proof.public_inputs, root.to_vec()); + } +} diff --git a/verifiable-db/src/query/mod.rs b/verifiable-db/src/query/mod.rs index 9d3e6d9d1..2366b4ae8 100644 --- a/verifiable-db/src/query/mod.rs +++ b/verifiable-db/src/query/mod.rs @@ -4,6 +4,7 @@ use public_inputs::PublicInputs; pub mod aggregation; pub mod api; pub mod computational_hash_ids; +pub mod merkle_path; pub mod public_inputs; pub mod universal_circuit; diff --git a/verifiable-db/src/query/universal_circuit/universal_circuit_inputs.rs b/verifiable-db/src/query/universal_circuit/universal_circuit_inputs.rs index 4f245566a..ee314ae59 100644 --- a/verifiable-db/src/query/universal_circuit/universal_circuit_inputs.rs +++ b/verifiable-db/src/query/universal_circuit/universal_circuit_inputs.rs @@ -337,6 +337,7 @@ pub struct ResultStructure { pub output_items: Vec, pub output_ids: Vec, pub output_variant: Output, + pub distinct: Option, } impl ResultStructure { @@ -369,8 +370,12 @@ impl ResultStructure { result_operations: Vec, output_items: Vec, aggregation_op_ids: Vec, - ) -> Self { - Self { + ) -> Result { + ensure!( + output_items.len() == aggregation_op_ids.len(), + "output items and aggregation operations identifiers have different length" + ); + Ok(Self { result_operations, output_items, output_ids: aggregation_op_ids @@ -378,20 +383,55 @@ impl ResultStructure { .map(|id| id.to_field()) .collect_vec(), output_variant: Output::Aggregation, - } + distinct: None, + }) } pub fn new_for_query_no_aggregation( result_operations: Vec, output_items: Vec, output_ids: Vec, - ) -> Self { - Self { + distinct: bool, + ) -> Result { + ensure!( + output_items.len() == output_ids.len(), + "output items and output ids have different length" + ); + Ok(Self { result_operations, output_items, output_ids: output_ids.into_iter().map(|id| id.to_field()).collect_vec(), output_variant: Output::NoAggregation, - } + distinct: Some(distinct), + }) + } + + pub fn query_variant(&self) -> Output { + self.output_variant + } + + /// Validate an instance of `self` with respect to the upper bounds provided as input, that are: + /// - The upper bound `max_num_results_ops` on the number of basic operations allowed to + /// compute the results + /// - The upper bound `max_num_results` on the number of results returned for each row + pub fn validate(&self, max_num_result_ops: usize, max_num_results: usize) -> Result<()> { + ensure!( + self.result_operations.len() <= max_num_result_ops, + format!( + "too many basic operations found in SELECT clause: found {}, maximum allowed is {}", + self.result_operations.len(), + max_num_result_ops, + ) + ); + ensure!( + self.output_items.len() <= max_num_results, + format!( + "too many result items specified in SELECT clause: found {}, maximum allowed is {}", + self.output_items.len(), + max_num_results, + ) + ); + Ok(()) } } diff --git a/verifiable-db/src/query/universal_circuit/universal_query_circuit.rs b/verifiable-db/src/query/universal_circuit/universal_query_circuit.rs index df619ba35..7cd8ed9c0 100644 --- a/verifiable-db/src/query/universal_circuit/universal_query_circuit.rs +++ b/verifiable-db/src/query/universal_circuit/universal_query_circuit.rs @@ -1062,7 +1062,7 @@ where } } -#[derive(Debug, Serialize, Deserialize)] +#[derive(Clone, Debug, Serialize, Deserialize)] /// Inputs for the 2 variant of universal query circuit pub enum UniversalCircuitInput< const MAX_NUM_COLUMNS: usize, @@ -1434,7 +1434,8 @@ mod tests { .iter() .map(|op| op.to_canonical_u64()) .collect_vec(), - ); + ) + .unwrap(); let query_bounds = QueryBounds::new( &placeholders, @@ -1812,7 +1813,9 @@ mod tests { .iter() .map(|id| id.to_canonical_u64()) .collect_vec(), - ); + false, + ) + .unwrap(); let query_bounds = QueryBounds::new( &placeholders, Some(QueryBoundSource::Placeholder(third_placeholder_id)), diff --git a/verifiable-db/src/results_tree/binding/binding_results.rs b/verifiable-db/src/results_tree/binding/binding_results.rs index 179505028..00454fc16 100644 --- a/verifiable-db/src/results_tree/binding/binding_results.rs +++ b/verifiable-db/src/results_tree/binding/binding_results.rs @@ -4,6 +4,7 @@ use crate::{ query::{ computational_hash_ids::{AggregationOperation, ResultIdentifier}, public_inputs::PublicInputs as QueryProofPI, + universal_circuit::ComputationalHashTarget, }, results_tree::{ binding::public_inputs::PublicInputs, @@ -16,7 +17,7 @@ use mp2_common::{ }; use plonky2::iop::target::Target; use serde::{Deserialize, Serialize}; -use std::{iter::once, slice}; +use std::slice; #[derive(Clone, Debug, Serialize, Deserialize)] pub struct BindingResultsWires; @@ -56,25 +57,12 @@ impl BindingResultsCircuit { // res_id = "RESULT_DISTINCT" // else: // res_id = "RESULT" - let [res_no_distinct, res_with_distinct] = [ - ResultIdentifier::ResultNoDistinct, - ResultIdentifier::ResultWithDistinct, - ] - .map(|id| b.constant(id.to_field())); - let res_id = b.select( - results_construction_proof.no_duplicates_flag_target(), - res_with_distinct, - res_no_distinct, + let computational_hash = ResultIdentifier::result_id_hash_circuit( + b, + ComputationalHashTarget::from_vec(query_proof.to_computational_hash_raw().to_vec()), + &results_construction_proof.no_duplicates_flag_target(), ); - // Compute the computational hash: - // H(res_id || pQ.C) - let inputs = once(&res_id) - .chain(query_proof.to_computational_hash_raw()) - .cloned() - .collect(); - let computational_hash = b.hash_n_to_hash_no_pad::(inputs); - // Compute the placeholder hash: // H(pQ.H_p || pQ.MIN_I || pQ.MAX_I) let inputs = query_proof @@ -211,7 +199,7 @@ mod tests { }; // H(res_id || pQ.C) - let inputs = once(&res_id.to_field()) + let inputs = std::iter::once(&res_id.to_field()) .chain(query_pi.to_computational_hash_raw()) .cloned() .collect_vec(); diff --git a/verifiable-db/src/revelation/api.rs b/verifiable-db/src/revelation/api.rs index 90fc5f373..21837dd7f 100644 --- a/verifiable-db/src/revelation/api.rs +++ b/verifiable-db/src/revelation/api.rs @@ -1,17 +1,19 @@ +use std::{array, cmp::Ordering, collections::BTreeSet, fmt::Debug, iter::repeat}; + +use alloy::primitives::U256; use anyhow::{ensure, Result}; -use std::iter::repeat; use itertools::Itertools; use mp2_common::{ - array::ToField, default_config, poseidon::H, proof::{deserialize_proof, ProofWithVK}, - utils::FromFields, + u256::is_less_than_or_equal_to_u256_arr, C, D, F, }; -use plonky2::plonk::{ - circuit_data::VerifierOnlyCircuitData, config::Hasher, proof::ProofWithPublicInputs, +use plonky2::{ + field::types::PrimeField64, + plonk::{circuit_data::VerifierOnlyCircuitData, config::Hasher, proof::ProofWithPublicInputs}, }; use recursion_framework::{ circuit_builder::{CircuitWithUniversalVerifier, CircuitWithUniversalVerifierBuilder}, @@ -23,39 +25,133 @@ use serde::{Deserialize, Serialize}; use crate::{ query::{ + self, aggregation::QueryBounds, - computational_hash_ids::PlaceholderIdentifier, - universal_circuit::{ - universal_circuit_inputs::{PlaceholderId, Placeholders}, - universal_query_circuit::QueryBound, + api::{CircuitInput as QueryCircuitInput, Parameters as QueryParams}, + computational_hash_ids::ColumnIDs, + universal_circuit::universal_circuit_inputs::{ + BasicOperation, Placeholders, ResultStructure, + }, + PI_LEN as QUERY_PI_LEN, + }, + revelation::{ + placeholders_check::CheckPlaceholderGadget, + revelation_unproven_offset::{ + generate_dummy_row_proof_inputs, + RecursiveCircuitWires as RecursiveCircuitWiresUnprovenOffset, }, }, - revelation::placeholders_check::CheckedPlaceholder, }; use super::{ + revelation_unproven_offset::{ + RecursiveCircuitInputs as RecursiveCircuitInputsUnporvenOffset, + RevelationCircuit as RevelationCircuitUnprovenOffset, RowPath, + }, revelation_without_results_tree::{ CircuitBuilderParams, RecursiveCircuitInputs, RecursiveCircuitWires, RevelationWithoutResultsTreeCircuit, }, NUM_QUERY_IO, PI_LEN, }; +#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, Eq)] +/// Data structure employed to provide input data related to a matching row +/// for the revelation circuit with unproven offset +pub struct MatchingRow { + proof: Vec, + path: RowPath, + result: Vec, +} + +impl MatchingRow { + /// Instantiate a new `MatchingRow` from the following inputs: + /// - `proof`: proof for the matching row, generated with the universal query circuit + /// - `path`: Data employed to verify the membership of the row in the tree + /// - `result`: Set of results associated to this row, to be exposed as outputs of the query + pub fn new(proof: Vec, path: RowPath, result: Vec) -> Self { + Self { + proof, + path, + result, + } + } +} + +impl PartialOrd for MatchingRow { + fn partial_cmp(&self, other: &Self) -> Option { + Some(self.cmp(other)) + } +} + +impl Ord for MatchingRow { + fn cmp(&self, other: &Self) -> Ordering { + let (left, right) = match self.result.len().cmp(&other.result.len()) { + Ordering::Less => { + let target_len = other.result.len(); + ( + self.result + .iter() + .chain(repeat(&U256::default())) + .take(target_len) + .cloned() + .collect_vec(), + other.result.clone(), + ) + } + Ordering::Equal => (self.result.clone(), other.result.clone()), + Ordering::Greater => { + let target_len = self.result.len(); + ( + self.result.clone(), + other + .result + .iter() + .chain(repeat(&U256::default())) + .take(target_len) + .cloned() + .collect_vec(), + ) + } + }; + let (is_smaller, is_eq) = is_less_than_or_equal_to_u256_arr(&left, &right); + if is_smaller { + return Ordering::Less; + } + if is_eq { + return Ordering::Equal; + } + Ordering::Greater + } +} #[derive(Debug, Serialize, Deserialize)] /// Parameters for revelation circuits. The following const generic values need to be specified: +/// - `ROW_TREE_MAX_DEPTH`: upper bound on the depth of a rows tree for Lagrange DB tables +/// - `INDEX_TREE_MAX_DEPTH`: upper bound on the depth of an index tree for Lagrange DB tables +/// - `MAX_NUM_COLUMNS`: upper bound on the number of columns of a table +/// - `MAX_NUM_PREDICATE_OPS`: upper bound on the number of basic operations allowed in the `WHERE` clause of a query +/// - `MAX_NUM_RESULT_OPS`: upper bound on the number of basic operations allowed in the `SELECT` statement of a query /// - `MAX_NUM_OUTPUTS`: upper bound on the number of output rows which can be exposed as public outputs of the circuit /// - `MAX_NUM_ITEMS_PER_OUTPUT`: upper bound on the number of items per output row; should correspond to the -/// upper bound on the number of items being found in `SELECT` statement of the query +/// upper bound on the number of items being found in `SELECT` statement of a query /// - `MAX_NUM_PLACEHOLDERS`: upper bound on the number of placeholders we allow in a query /// - `NUM_PLACEHOLDERS_HASHED`: number of placeholders being hashed in the placeholder hash pub struct Parameters< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const MAX_NUM_COLUMNS: usize, + const MAX_NUM_PREDICATE_OPS: usize, + const MAX_NUM_RESULT_OPS: usize, const MAX_NUM_OUTPUTS: usize, const MAX_NUM_ITEMS_PER_OUTPUT: usize, const MAX_NUM_PLACEHOLDERS: usize, - const NUM_PLACEHOLDERS_HASHED: usize, > where [(); MAX_NUM_ITEMS_PER_OUTPUT - 1]:, [(); NUM_QUERY_IO::]:, + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); MAX_NUM_ITEMS_PER_OUTPUT * MAX_NUM_OUTPUTS]:, + [(); 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS)]:, { revelation_no_results_tree: CircuitWithUniversalVerifier< F, @@ -66,7 +162,21 @@ pub struct Parameters< MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - NUM_PLACEHOLDERS_HASHED, + { 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS) }, + >, + >, + revelation_unproven_offset: CircuitWithUniversalVerifier< + F, + C, + D, + 0, + RecursiveCircuitWiresUnprovenOffset< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_OUTPUTS, + MAX_NUM_ITEMS_PER_OUTPUT, + MAX_NUM_PLACEHOLDERS, + { 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS) }, >, >, //ToDo: add revelation circuit with results tree @@ -75,17 +185,31 @@ pub struct Parameters< #[derive(Clone, Debug, Serialize, Deserialize)] /// Circuit inputs for revelation circuits. The following const generic values need to be specified: +/// - `ROW_TREE_MAX_DEPTH`: upper bound on the depth of a rows tree for Lagrange DB tables +/// - `INDEX_TREE_MAX_DEPTH`: upper bound on the depth of an index tree for Lagrange DB tables +/// - `MAX_NUM_COLUMNS`: upper bound on the number of columns of a table +/// - `MAX_NUM_PREDICATE_OPS`: upper bound on the number of basic operations allowed in the `WHERE` clause of a query +/// - `MAX_NUM_RESULT_OPS`: upper bound on the number of basic operations allowed in the `SELECT` statement of a query /// - `MAX_NUM_OUTPUTS`: upper bound on the number of output rows which can be exposed as public outputs of the circuit /// - `MAX_NUM_ITEMS_PER_OUTPUT`: upper bound on the number of items per output row; should correspond to the /// upper bound on the number of items being found in `SELECT` statement of the query /// - `MAX_NUM_PLACEHOLDERS`: upper bound on the number of placeholders we allow in a query /// - `NUM_PLACEHOLDERS_HASHED`: number of placeholders being hashed in the placeholder hash pub enum CircuitInput< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const MAX_NUM_COLUMNS: usize, + const MAX_NUM_PREDICATE_OPS: usize, + const MAX_NUM_RESULT_OPS: usize, const MAX_NUM_OUTPUTS: usize, const MAX_NUM_ITEMS_PER_OUTPUT: usize, const MAX_NUM_PLACEHOLDERS: usize, - const NUM_PLACEHOLDERS_HASHED: usize, -> { +> where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); MAX_NUM_ITEMS_PER_OUTPUT * MAX_NUM_OUTPUTS]:, + [(); 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS)]:, +{ NoResultsTree { query_proof: ProofWithVK, preprocessing_proof: ProofWithPublicInputs, @@ -93,24 +217,59 @@ pub enum CircuitInput< MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - NUM_PLACEHOLDERS_HASHED, + { 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS) }, >, }, - //ToDo: add circuit input for revelation circuit with results tree + UnprovenOffset { + row_proofs: Vec, + preprocessing_proof: ProofWithPublicInputs, + revelation_circuit: RevelationCircuitUnprovenOffset< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_OUTPUTS, + MAX_NUM_ITEMS_PER_OUTPUT, + MAX_NUM_PLACEHOLDERS, + { 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS) }, + >, + dummy_row_proof_input: Option< + QueryCircuitInput< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, + >, + >, + }, //ToDo: add circuit input for revelation circuit with results tree } impl< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const MAX_NUM_COLUMNS: usize, + const MAX_NUM_PREDICATE_OPS: usize, + const MAX_NUM_RESULT_OPS: usize, const MAX_NUM_OUTPUTS: usize, const MAX_NUM_ITEMS_PER_OUTPUT: usize, const MAX_NUM_PLACEHOLDERS: usize, - const NUM_PLACEHOLDERS_HASHED: usize, > CircuitInput< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - NUM_PLACEHOLDERS_HASHED, > +where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); MAX_NUM_ITEMS_PER_OUTPUT * MAX_NUM_OUTPUTS]:, + [(); MAX_NUM_ITEMS_PER_OUTPUT - 1]:, + [(); QUERY_PI_LEN::]:, + [(); 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS)]:, + [(); MAX_NUM_COLUMNS + MAX_NUM_RESULT_OPS]:, { /// Initialize circuit inputs for the revelation circuit for queries without a results tree. /// The method requires the following inputs: @@ -119,124 +278,175 @@ impl< /// IVC set of circuit /// - `query_bounds`: bounds on values of primary and secondary indexes specified in the query /// - `placeholders`: set of placeholders employed in the query. They must be less than `MAX_NUM_PLACEHOLDERS` - /// - `placeholder_hash_ids`: Identifiers of the placeholders employed to compute the placeholder hash; they can be - /// obtained by the method `ids_for_placeholder_hash` of `query::api::Parameters` - pub fn new_revelation_no_results_tree( + /// - `predicate_operations`: Operations employed in the query to compute the filtering predicate in the `WHERE` clause + /// - `results_structure`: Data about the operations and items returned in the `SELECT` clause of the query + pub fn new_revelation_aggregated( query_proof: Vec, preprocessing_proof: Vec, query_bounds: &QueryBounds, placeholders: &Placeholders, - placeholder_hash_ids: [PlaceholderId; NUM_PLACEHOLDERS_HASHED], + predicate_operations: &[BasicOperation], + results_structure: &ResultStructure, ) -> Result { let query_proof = ProofWithVK::deserialize(&query_proof)?; let preprocessing_proof = deserialize_proof(&preprocessing_proof)?; - let num_placeholders = placeholders.len(); + let placeholder_hash_ids = query::api::CircuitInput::< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, + >::ids_for_placeholder_hash( + predicate_operations, + results_structure, + placeholders, + query_bounds, + )?; + let revelation_circuit = RevelationWithoutResultsTreeCircuit { + check_placeholder: CheckPlaceholderGadget::new( + query_bounds, + placeholders, + placeholder_hash_ids, + )?, + }; + + Ok(CircuitInput::NoResultsTree { + query_proof, + preprocessing_proof, + revelation_circuit, + }) + } + + /// Initialize circuit inputs for the revelation circuit for queries with unproven offset. + /// The method requires the following inputs: + /// - `preprocessing_proof`: Proof of construction of the tree over which the query was performed, generated with the + /// IVC set of circuit + /// - `matching_rows`: Data about the matching rows employed to compute the results of the query; they have to be at + /// most `MAX_NUM_OUTPUTS` + /// - `query_bounds`: bounds on values of primary and secondary indexes specified in the query + /// - `placeholders`: set of placeholders employed in the query. They must be less than `MAX_NUM_PLACEHOLDERS` + /// - `placeholder_hash_ids`: Identifiers of the placeholders employed to compute the placeholder hash; they can be + /// obtained by the method `ids_for_placeholder_hash` of `query::api::Parameters` + /// - `column_ids`: Ids of the columns of the original table + /// - `predicate_operations`: Operations employed in the query to compute the filtering predicate in the `WHERE` clause + /// - `results_structure`: Data about the operations and items returned in the `SELECT` clause of the query + /// - `limit, offset`: limit and offset values specified in the query + /// - `distinct`: Flag specifying whether the DISTINCT keyword was specified in the query + pub fn new_revelation_tabular( + preprocessing_proof: Vec, + matching_rows: Vec, + query_bounds: &QueryBounds, + placeholders: &Placeholders, + column_ids: &ColumnIDs, + predicate_operations: &[BasicOperation], + results_structure: &ResultStructure, + limit: u32, + offset: u32, + ) -> Result + where + [(); MAX_NUM_COLUMNS + MAX_NUM_RESULT_OPS]:, + [(); 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS)]:, + { + let preprocessing_proof = deserialize_proof(&preprocessing_proof)?; ensure!( - num_placeholders <= MAX_NUM_PLACEHOLDERS, - "number of placeholders provided is more than the maximum number of placeholders" + matching_rows.len() <= MAX_NUM_OUTPUTS, + "Number of matching rows bigger than the maximum number of outputs" ); - // get placeholder ids from `placeholders` in the order expected by the circuit - let placeholder_ids = placeholders.ids(); - let (padded_placeholder_ids, padded_placeholder_values): (Vec, Vec<_>) = placeholder_ids - .iter() - .map(|id| (*id, placeholders.get(id).unwrap())) - // pad placeholder ids and values with the first items in the arrays, as expected by the circuit - .chain(repeat(( - PlaceholderIdentifier::MinQueryOnIdx1, - placeholders - .get(&PlaceholderIdentifier::MinQueryOnIdx1) - .unwrap(), - ))) - .take(MAX_NUM_PLACEHOLDERS) - .map(|(id, value)| { - let id: F = id.to_field(); - (id, value) - }) - .unzip(); - let compute_checked_placeholder_for_id = |placeholder_id: PlaceholderIdentifier| { - let value = placeholders.get(&placeholder_id)?; - // locate placeholder with id `placeholder_id` in `padded_placeholder_ids` - let pos = padded_placeholder_ids - .iter() - .find_position(|&&id| id == placeholder_id.to_field()); - ensure!( - pos.is_some(), - "placeholder with id {:?} not found in padded placeholder ids", - placeholder_id - ); - // sanity check: `padded_placeholder_values[pos] = value` - assert_eq!( - padded_placeholder_values[pos.unwrap().0], - value, - "placehoder values doesn't match for id {:?}", - placeholder_id - ); - Ok(CheckedPlaceholder { - id: placeholder_id.to_field(), - value, - pos: pos.unwrap().0.to_field(), - }) + let dummy_row_proof_input = if matching_rows.len() < MAX_NUM_OUTPUTS { + // we need to generate inputs to prove a dummy row, employed to pad the matching rows provided as input + // to `MAX_NUM_OUTPUTS` + Some(generate_dummy_row_proof_inputs( + column_ids, + predicate_operations, + results_structure, + placeholders, + query_bounds, + )?) + } else { + None }; - let to_be_checked_placeholders = placeholder_hash_ids - .into_iter() - .map(&compute_checked_placeholder_for_id) - .collect::>>()?; - // compute placeholders data to be hashed for secondary query bounds - let min_query_secondary = - QueryBound::new_secondary_index_bound(placeholders, query_bounds.min_query_secondary()) - .unwrap(); - let max_query_secondary = - QueryBound::new_secondary_index_bound(placeholders, query_bounds.max_query_secondary()) - .unwrap(); - let secondary_query_bound_placeholders = [min_query_secondary, max_query_secondary] - .into_iter() - .flat_map(|query_bound| { - [ - compute_checked_placeholder_for_id(PlaceholderId::from_fields(&[query_bound - .operation - .placeholder_ids[0]])), - compute_checked_placeholder_for_id(PlaceholderId::from_fields(&[query_bound - .operation - .placeholder_ids[1]])), - ] + // sort matching rows according to result values, which is needed to enforce DISTINCT + let matching_rows = matching_rows.into_iter().collect::>(); + let mut row_paths = array::from_fn(|_| RowPath::default()); + let mut result_values = + array::from_fn(|_| vec![U256::default(); results_structure.output_ids.len()]); + let row_proofs = matching_rows + .iter() + .enumerate() + .map(|(i, row)| { + row_paths[i] = row.path.clone(); + result_values[i] = row.result.clone(); + ProofWithVK::deserialize(&row.proof) }) .collect::>>()?; - let revelation_circuit = RevelationWithoutResultsTreeCircuit { - num_placeholders, - placeholder_ids: padded_placeholder_ids.try_into().unwrap(), - placeholder_values: padded_placeholder_values.try_into().unwrap(), - to_be_checked_placeholders: to_be_checked_placeholders.try_into().unwrap(), - secondary_query_bound_placeholders: secondary_query_bound_placeholders - .try_into() - .unwrap(), - }; + let placeholder_hash_ids = query::api::CircuitInput::< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, + >::ids_for_placeholder_hash( + predicate_operations, + results_structure, + placeholders, + query_bounds, + )?; + let placeholder_inputs = + CheckPlaceholderGadget::new(query_bounds, placeholders, placeholder_hash_ids)?; + let index_ids = [ + column_ids.primary.to_canonical_u64(), + column_ids.secondary.to_canonical_u64(), + ]; + let revelation_circuit = RevelationCircuitUnprovenOffset::new( + row_paths, + index_ids, + &results_structure.output_ids, + result_values, + limit, + offset, + results_structure.distinct.unwrap_or(false), + placeholder_inputs, + )?; - Ok(CircuitInput::NoResultsTree { - query_proof, + Ok(Self::UnprovenOffset { + row_proofs, preprocessing_proof, revelation_circuit, + dummy_row_proof_input, }) } } -const REVELATION_CIRCUIT_SET_SIZE: usize = 1; +const REVELATION_CIRCUIT_SET_SIZE: usize = 2; impl< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const MAX_NUM_COLUMNS: usize, + const MAX_NUM_PREDICATE_OPS: usize, + const MAX_NUM_RESULT_OPS: usize, const MAX_NUM_OUTPUTS: usize, const MAX_NUM_ITEMS_PER_OUTPUT: usize, const MAX_NUM_PLACEHOLDERS: usize, - const NUM_PLACEHOLDERS_HASHED: usize, > Parameters< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - NUM_PLACEHOLDERS_HASHED, > where [(); MAX_NUM_ITEMS_PER_OUTPUT - 1]:, [(); NUM_QUERY_IO::]:, [(); >::HASH_SIZE]:, [(); PI_LEN::]:, + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); MAX_NUM_ITEMS_PER_OUTPUT * MAX_NUM_OUTPUTS]:, + [(); MAX_NUM_COLUMNS + MAX_NUM_RESULT_OPS]:, + [(); QUERY_PI_LEN::]:, + [(); 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS)]:, { pub fn build( query_circuit_set: &RecursiveCircuits, @@ -253,16 +463,19 @@ where preprocessing_circuit_set: preprocessing_circuit_set.clone(), preprocessing_vk: preprocessing_vk.clone(), }; - let revelation_no_results_tree = builder.build_circuit(build_parameters); + let revelation_no_results_tree = builder.build_circuit(build_parameters.clone()); + let revelation_unproven_offset = builder.build_circuit(build_parameters); - let circuits = vec![prepare_recursive_circuit_for_circuit_set( - &revelation_no_results_tree, - )]; + let circuits = vec![ + prepare_recursive_circuit_for_circuit_set(&revelation_no_results_tree), + prepare_recursive_circuit_for_circuit_set(&revelation_unproven_offset), + ]; let circuit_set = RecursiveCircuits::new(circuits); Self { revelation_no_results_tree, + revelation_unproven_offset, circuit_set, } } @@ -270,12 +483,24 @@ where pub fn generate_proof( &self, input: CircuitInput< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - NUM_PLACEHOLDERS_HASHED, >, query_circuit_set: &RecursiveCircuits, + query_params: Option< + &QueryParams< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, + >, + >, ) -> Result> { let proof = ProofWithVK::from(match input { CircuitInput::NoResultsTree { @@ -299,6 +524,41 @@ where self.revelation_no_results_tree.get_verifier_data().clone(), ) } + CircuitInput::UnprovenOffset { + row_proofs, + preprocessing_proof, + revelation_circuit, + dummy_row_proof_input, + } => { + let row_proofs = if let Some(input) = dummy_row_proof_input { + let proof = query_params.unwrap().generate_proof(input)?; + let proof = ProofWithVK::deserialize(&proof)?; + row_proofs + .into_iter() + .chain(repeat(proof)) + .take(MAX_NUM_OUTPUTS) + .collect_vec() + .try_into() + .unwrap() + } else { + row_proofs.try_into().unwrap() + }; + let input = RecursiveCircuitInputsUnporvenOffset { + inputs: revelation_circuit, + row_proofs, + preprocessing_proof, + query_circuit_set: query_circuit_set.clone(), + }; + ( + self.circuit_set.generate_proof( + &self.revelation_unproven_offset, + [], + [], + input, + )?, + self.revelation_unproven_offset.get_verifier_data().clone(), + ) + } }); proof.serialize() } @@ -345,6 +605,9 @@ mod tests { fn test_api() { init_logging(); + const ROW_TREE_MAX_DEPTH: usize = 10; + const INDEX_TREE_MAX_DEPTH: usize = 15; + let query_circuits = TestingRecursiveCircuits::< F, C, @@ -355,10 +618,14 @@ mod tests { TestingRecursiveCircuits::::default(); println!("building params"); let params = Parameters::< + ROW_TREE_MAX_DEPTH, + INDEX_TREE_MAX_DEPTH, + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, MAX_NUM_OUTPUTS, MAX_NUM_ITEMS_PER_OUTPUT, MAX_NUM_PLACEHOLDERS, - { 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS) }, >::build( query_circuits.get_recursive_circuit_set(), preprocessing_circuits.get_recursive_circuit_set(), @@ -371,19 +638,6 @@ mod tests { // Generate the testing data for revalation circuit. let test_data = TestRevelationData::sample(42, 76); - let placeholder_hash_ids = QueryInput::< - MAX_NUM_COLUMNS, - MAX_NUM_PREDICATE_OPS, - MAX_NUM_RESULT_OPS, - MAX_NUM_ITEMS_PER_OUTPUT, - >::ids_for_placeholder_hash( - test_data.predicate_operations(), - test_data.results(), - test_data.placeholders(), - test_data.query_bounds(), - ) - .unwrap(); - let query_pi = QueryPI::::from_slice(test_data.query_pi_raw()); // generate query proof @@ -401,16 +655,17 @@ mod tests { let preprocessing_pi = PreprocessingPI::from_slice(&preprocessing_proof.public_inputs); let preprocessing_proof = serialize_proof(&preprocessing_proof).unwrap(); - let input = CircuitInput::new_revelation_no_results_tree( + let input = CircuitInput::new_revelation_aggregated( query_proof, preprocessing_proof, test_data.query_bounds(), test_data.placeholders(), - placeholder_hash_ids, + test_data.predicate_operations(), + test_data.results(), ) .unwrap(); let proof = params - .generate_proof(input, query_circuits.get_recursive_circuit_set()) + .generate_proof(input, query_circuits.get_recursive_circuit_set(), None) .unwrap(); let (proof, _) = ProofWithVK::deserialize(&proof).unwrap().into(); let pi = PublicInputs::::from_slice(&proof.public_inputs); diff --git a/verifiable-db/src/revelation/mod.rs b/verifiable-db/src/revelation/mod.rs index 799a58683..5253b4ca3 100644 --- a/verifiable-db/src/revelation/mod.rs +++ b/verifiable-db/src/revelation/mod.rs @@ -6,9 +6,11 @@ use mp2_common::F; pub mod api; pub(crate) mod placeholders_check; mod public_inputs; +mod revelation_unproven_offset; mod revelation_without_results_tree; pub use public_inputs::PublicInputs; +pub use revelation_unproven_offset::RowPath; // L: maximum number of results // S: maximum number of items in each result @@ -32,8 +34,10 @@ pub(crate) mod tests { use alloy::primitives::U256; use itertools::Itertools; use mp2_common::{array::ToField, poseidon::H, utils::ToFields, F}; + use mp2_test::utils::gen_random_u256; use placeholders_check::{ - placeholder_ids_hash, CheckedPlaceholder, NUM_SECONDARY_INDEX_PLACEHOLDERS, + placeholder_ids_hash, CheckPlaceholderGadget, CheckedPlaceholder, + NUM_SECONDARY_INDEX_PLACEHOLDERS, }; use plonky2::{field::types::PrimeField64, hash::hash_types::HashOut, plonk::config::Hasher}; use rand::{thread_rng, Rng}; @@ -45,12 +49,7 @@ pub(crate) mod tests { #[derive(Clone, Debug)] pub(crate) struct TestPlaceholders { // Input arguments for `check_placeholders` function - pub(crate) num_placeholders: usize, - pub(crate) placeholder_ids: [F; PH], - pub(crate) placeholder_values: [U256; PH], - pub(crate) to_be_checked_placeholders: [CheckedPlaceholder; PP], - pub(crate) secondary_query_bound_placeholders: - [CheckedPlaceholder; NUM_SECONDARY_INDEX_PLACEHOLDERS], + pub(crate) check_placeholder_inputs: CheckPlaceholderGadget, pub(crate) final_placeholder_hash: HashOut, // Output result for `check_placeholders` function pub(crate) placeholder_ids_hash: HashOut, @@ -72,6 +71,12 @@ pub(crate) mod tests { array::from_fn(|_| PlaceholderIdentifier::Generic(rng.gen())); let mut placeholder_values = array::from_fn(|_| U256::from_limbs(rng.gen())); + // ensure that min_query <= max_query + while placeholder_values[0] > placeholder_values[1] { + placeholder_values[0] = gen_random_u256(rng); + placeholder_values[1] = gen_random_u256(rng); + } + // Set the first 2 placeholder identifiers as below constants. [ PlaceholderIdentifier::MinQueryOnIdx1, @@ -132,10 +137,18 @@ pub(crate) mod tests { } }); + let check_placeholder_inputs = CheckPlaceholderGadget:: { + num_placeholders, + placeholder_ids, + placeholder_values, + to_be_checked_placeholders, + secondary_query_bound_placeholders, + }; + // Re-compute the placeholder hash from placeholder_pairs and minmum, // maximum query bounds. Then check it should be same with the specified // final placeholder hash. - let [min_i1, max_i1] = array::from_fn(|i| &placeholder_values[i]); + let (min_i1, max_i1) = check_placeholder_inputs.primary_query_bounds(); let placeholder_hash = H::hash_no_pad(&placeholder_hash_payload); // query_placeholder_hash = H(placeholder_hash || min_i2 || max_i2) let inputs = placeholder_hash @@ -161,14 +174,10 @@ pub(crate) mod tests { .collect_vec(); let final_placeholder_hash = H::hash_no_pad(&inputs); - let [min_query, max_query] = [*min_i1, *max_i1]; + let [min_query, max_query] = [min_i1, max_i1]; Self { - num_placeholders, - placeholder_ids, - placeholder_values, - to_be_checked_placeholders, - secondary_query_bound_placeholders, + check_placeholder_inputs, final_placeholder_hash, placeholder_ids_hash, query_placeholder_hash, @@ -187,8 +196,7 @@ pub(crate) mod tests { { // Convert the entry count to an Uint256. let entry_count = U256::from(query_pi.num_matching_rows().to_canonical_u64()); - let mut overflow = false; - + let [op_avg, op_count] = [AggregationOperation::AvgOp, AggregationOperation::CountOp].map(|op| op.to_field()); @@ -199,14 +207,7 @@ pub(crate) mod tests { let op = ops[i]; if op == op_avg { - match value.checked_div(entry_count) { - Some(dividend) => dividend, - None => { - // Set the overflow flag to true if the divisor is zero. - overflow = true; - U256::ZERO - } - } + value.checked_div(entry_count).unwrap_or(U256::ZERO) } else if op == op_count { entry_count } else { @@ -214,6 +215,6 @@ pub(crate) mod tests { } }); - (result, query_pi.overflow_flag() || overflow) + (result, query_pi.overflow_flag()) } } diff --git a/verifiable-db/src/revelation/placeholders_check.rs b/verifiable-db/src/revelation/placeholders_check.rs index 61e41a8b9..91bb02ca0 100644 --- a/verifiable-db/src/revelation/placeholders_check.rs +++ b/verifiable-db/src/revelation/placeholders_check.rs @@ -1,15 +1,26 @@ //! Check the placeholder identifiers and values with the specified `final_placeholder_hash`, //! compute and return the `num_placeholders` and the `placeholder_ids_hash`. -use crate::query::computational_hash_ids::PlaceholderIdentifier; +use crate::query::{ + aggregation::QueryBounds, + computational_hash_ids::PlaceholderIdentifier, + universal_circuit::{ + universal_circuit_inputs::{PlaceholderId, Placeholders}, + universal_query_circuit::QueryBound, + }, +}; use alloy::primitives::U256; +use anyhow::{ensure, Result}; use itertools::Itertools; use mp2_common::{ array::ToField, poseidon::{empty_poseidon_hash, H}, + serialization::{ + deserialize_array, deserialize_long_array, serialize_array, serialize_long_array, + }, types::CBuilder, u256::{CircuitBuilderU256, UInt256Target, WitnessWriteU256}, - utils::{SelectHashBuilder, ToFields, ToTargets}, + utils::{FromFields, SelectHashBuilder, ToFields, ToTargets}, F, }; use plonky2::{ @@ -21,7 +32,10 @@ use plonky2::{ plonk::config::Hasher, }; use serde::{Deserialize, Serialize}; -use std::{array, iter::once}; +use std::{ + array, + iter::{once, repeat}, +}; #[derive(Debug, Clone, Serialize, Deserialize)] /// Data structure representing a placeholder target to be checked in the `check_placeholders` gadget @@ -57,10 +71,247 @@ impl CheckedPlaceholder { pw.set_u256_target(&wires.value, self.value); } } +#[derive(Clone, Debug, Serialize, Deserialize)] +pub(crate) struct CheckPlaceholderInputWires { + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + pub(crate) is_placeholder_valid: [BoolTarget; PH], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + pub(crate) placeholder_ids: [Target; PH], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + pub(crate) placeholder_values: [UInt256Target; PH], + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + pub(crate) to_be_checked_placeholders: [CheckedPlaceholderTarget; PP], + pub(crate) secondary_query_bound_placeholders: + [CheckedPlaceholderTarget; NUM_SECONDARY_INDEX_PLACEHOLDERS], +} + +pub(crate) struct CheckPlaceholderWires { + pub(crate) input_wires: CheckPlaceholderInputWires, + pub(crate) num_placeholders: Target, + pub(crate) placeholder_id_hash: HashOutTarget, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub(crate) struct CheckPlaceholderGadget { + /// Real number of the valid placeholders + pub(crate) num_placeholders: usize, + /// Array of the placeholder identifiers that can be employed in the query: + /// - The first 4 items are expected to be constant identifiers of the query + /// bounds `MIN_I1, MAX_I1` and `MIN_I2, MAX_I2` + /// - The following `num_placeholders - 4` values are expected to be the + /// identifiers of the placeholders employed in the query + /// - The remaining `PH - num_placeholders` items are expected to be the + /// same as `placeholders_ids[0]` + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + pub(crate) placeholder_ids: [F; PH], + /// Array of the placeholder values that can be employed in the query: + /// - The first 4 values are expected to be the bounds `MIN_I1, MAX_I1` and + /// `MIN_I2, MAX_I2` found in the query for the primary and secondary + /// indexed columns + /// - The following `num_placeholders - 4` values are expected to be the + /// values for the placeholders employed in the query + /// - The remaining `PH - num_placeholders` values are expected to be the + /// same as `placeholder_values[0]` + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + pub(crate) placeholder_values: [U256; PH], + /// Placeholders data to be provided to `check_placeholder` gadget to + /// check that placeholders employed in universal query circuit matches + /// with the `placeholder_values` exposed as public input by this proof + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + pub(crate) to_be_checked_placeholders: [CheckedPlaceholder; PP], + /// Placeholders data related to the placeholders employed in the + /// universal query circuit to hash the query bounds for the secondary + /// index; they are provided as well to `check_placeholder` gadget to + /// check the correctness of the placeholders employed for query bounds + pub(crate) secondary_query_bound_placeholders: + [CheckedPlaceholder; NUM_SECONDARY_INDEX_PLACEHOLDERS], +} /// Number of placeholders being hashed to include query bounds in the placeholder hash pub(crate) const NUM_SECONDARY_INDEX_PLACEHOLDERS: usize = 4; +impl CheckPlaceholderGadget { + pub(crate) fn new( + query_bounds: &QueryBounds, + placeholders: &Placeholders, + placeholder_hash_ids: [PlaceholderId; PP], + ) -> Result { + let num_placeholders = placeholders.len(); + ensure!( + num_placeholders <= PH, + "number of placeholders provided is more than the maximum number of placeholders" + ); + // get placeholder ids from `placeholders` in the order expected by the circuit + let placeholder_ids = placeholders.ids(); + let (padded_placeholder_ids, padded_placeholder_values): (Vec, Vec<_>) = placeholder_ids + .iter() + .map(|id| (*id, placeholders.get(id).unwrap())) + // pad placeholder ids and values with the first items in the arrays, as expected by the circuit + .chain(repeat(( + PlaceholderIdentifier::MinQueryOnIdx1, + placeholders + .get(&PlaceholderIdentifier::MinQueryOnIdx1) + .unwrap(), + ))) + .take(PH) + .map(|(id, value)| { + let id: F = id.to_field(); + (id, value) + }) + .unzip(); + let compute_checked_placeholder_for_id = |placeholder_id: PlaceholderIdentifier| { + let value = placeholders.get(&placeholder_id)?; + // locate placeholder with id `placeholder_id` in `padded_placeholder_ids` + let pos = padded_placeholder_ids + .iter() + .find_position(|&&id| id == placeholder_id.to_field()); + ensure!( + pos.is_some(), + "placeholder with id {:?} not found in padded placeholder ids", + placeholder_id + ); + // sanity check: `padded_placeholder_values[pos] = value` + assert_eq!( + padded_placeholder_values[pos.unwrap().0], + value, + "placehoder values doesn't match for id {:?}", + placeholder_id + ); + Ok(CheckedPlaceholder { + id: placeholder_id.to_field(), + value, + pos: pos.unwrap().0.to_field(), + }) + }; + let to_be_checked_placeholders = placeholder_hash_ids + .into_iter() + .map(|placeholder_id| compute_checked_placeholder_for_id(placeholder_id)) + .collect::>>()?; + // compute placeholders data to be hashed for secondary query bounds + let min_query_secondary = QueryBound::new_secondary_index_bound( + &placeholders, + &query_bounds.min_query_secondary(), + ) + .unwrap(); + let max_query_secondary = QueryBound::new_secondary_index_bound( + &placeholders, + &query_bounds.max_query_secondary(), + ) + .unwrap(); + let secondary_query_bound_placeholders = [min_query_secondary, max_query_secondary] + .into_iter() + .flat_map(|query_bound| { + [ + compute_checked_placeholder_for_id(PlaceholderId::from_fields(&[query_bound + .operation + .placeholder_ids[0]])), + compute_checked_placeholder_for_id(PlaceholderId::from_fields(&[query_bound + .operation + .placeholder_ids[1]])), + ] + }) + .collect::>>()?; + + Ok(Self { + num_placeholders, + placeholder_ids: padded_placeholder_ids.try_into().unwrap(), + placeholder_values: padded_placeholder_values.try_into().unwrap(), + to_be_checked_placeholders: to_be_checked_placeholders.try_into().unwrap(), + secondary_query_bound_placeholders: secondary_query_bound_placeholders + .try_into() + .unwrap(), + }) + } + + pub(crate) fn build( + b: &mut CBuilder, + final_placeholder_hash: &HashOutTarget, + ) -> CheckPlaceholderWires { + let is_placeholder_valid = array::from_fn(|_| b.add_virtual_bool_target_safe()); + let placeholder_ids = b.add_virtual_target_arr(); + // `placeholder_values` are exposed as public inputs to the Solidity contract + // which will not do range-check. + let placeholder_values = array::from_fn(|_| b.add_virtual_u256()); + let to_be_checked_placeholders = array::from_fn(|_| CheckedPlaceholderTarget::new(b)); + let secondary_query_bound_placeholders = + array::from_fn(|_| CheckedPlaceholderTarget::new(b)); + let (num_placeholders, placeholder_id_hash) = check_placeholders( + b, + &is_placeholder_valid, + &placeholder_ids, + &placeholder_values, + &to_be_checked_placeholders, + &secondary_query_bound_placeholders, + final_placeholder_hash, + ); + + CheckPlaceholderWires:: { + input_wires: CheckPlaceholderInputWires:: { + is_placeholder_valid, + placeholder_ids, + placeholder_values, + to_be_checked_placeholders, + secondary_query_bound_placeholders, + }, + num_placeholders, + placeholder_id_hash, + } + } + + pub(crate) fn assign( + &self, + pw: &mut PartialWitness, + wires: &CheckPlaceholderInputWires, + ) { + wires + .is_placeholder_valid + .iter() + .enumerate() + .for_each(|(i, t)| pw.set_bool_target(*t, i < self.num_placeholders)); + pw.set_target_arr(&wires.placeholder_ids, &self.placeholder_ids); + wires + .placeholder_values + .iter() + .zip(self.placeholder_values) + .for_each(|(t, v)| pw.set_u256_target(t, v)); + wires + .to_be_checked_placeholders + .iter() + .zip(&self.to_be_checked_placeholders) + .for_each(|(t, v)| v.assign(pw, t)); + wires + .secondary_query_bound_placeholders + .iter() + .zip(&self.secondary_query_bound_placeholders) + .for_each(|(t, v)| v.assign(pw, t)); + } + // Return the query bounds on the primary index, taken from the placeholder values + pub(crate) fn primary_query_bounds(&self) -> (U256, U256) { + (self.placeholder_values[0], self.placeholder_values[1]) + } +} + /// This gadget checks that the placeholders identifiers and values employed to /// compute the `final_placeholder_hash` are found in placeholder_ids and /// placeholder_values arrays respectively. @@ -225,12 +476,7 @@ mod tests { #[derive(Clone, Debug)] pub(crate) struct TestPlaceholdersWires { - is_placeholder_valid: [BoolTarget; PH], - placeholder_ids: [Target; PH], - placeholder_values: [UInt256Target; PH], - to_be_checked_placeholders: [CheckedPlaceholderTarget; PP], - secondary_query_bound_placeholders: - [CheckedPlaceholderTarget; NUM_SECONDARY_INDEX_PLACEHOLDERS], + input_wires: CheckPlaceholderInputWires, final_placeholder_hash: HashOutTarget, exp_placeholder_ids_hash: HashOutTarget, exp_num_placeholders: Target, @@ -240,44 +486,25 @@ mod tests { type Wires = TestPlaceholdersWires; fn build(b: &mut CBuilder) -> Self::Wires { - let is_placeholder_valid = array::from_fn(|_| b.add_virtual_bool_target_unsafe()); - let placeholder_ids = b.add_virtual_target_arr(); - let placeholder_values = array::from_fn(|_| b.add_virtual_u256_unsafe()); - let to_be_checked_placeholders = array::from_fn(|_| CheckedPlaceholderTarget { - id: b.add_virtual_target(), - value: b.add_virtual_u256_unsafe(), - pos: b.add_virtual_target(), - }); - let secondary_query_bound_placeholders = array::from_fn(|_| CheckedPlaceholderTarget { - id: b.add_virtual_target(), - value: b.add_virtual_u256_unsafe(), - pos: b.add_virtual_target(), - }); let [final_placeholder_hash, exp_placeholder_ids_hash] = array::from_fn(|_| b.add_virtual_hash()); let exp_num_placeholders = b.add_virtual_target(); // Invoke the `check_placeholders` function. - let (num_placeholders, placeholder_ids_hash) = check_placeholders( - b, - &is_placeholder_valid, - &placeholder_ids, - &placeholder_values, - &to_be_checked_placeholders, - &secondary_query_bound_placeholders, - &final_placeholder_hash, - ); + let check_placeholder_wires = CheckPlaceholderGadget::build(b, &final_placeholder_hash); // Check the returned `num_placeholders` and `placeholder_ids_hash`. - b.connect(num_placeholders, exp_num_placeholders); - b.connect_hashes(placeholder_ids_hash, exp_placeholder_ids_hash); + b.connect( + check_placeholder_wires.num_placeholders, + exp_num_placeholders, + ); + b.connect_hashes( + check_placeholder_wires.placeholder_id_hash, + exp_placeholder_ids_hash, + ); Self::Wires { - is_placeholder_valid, - placeholder_ids, - placeholder_values, - to_be_checked_placeholders, - secondary_query_bound_placeholders, + input_wires: check_placeholder_wires.input_wires, final_placeholder_hash, exp_placeholder_ids_hash, exp_num_placeholders, @@ -285,27 +512,7 @@ mod tests { } fn prove(&self, pw: &mut PartialWitness, wires: &Self::Wires) { - wires - .is_placeholder_valid - .iter() - .enumerate() - .for_each(|(i, t)| pw.set_bool_target(*t, i < self.num_placeholders)); - pw.set_target_arr(&wires.placeholder_ids, &self.placeholder_ids); - wires - .placeholder_values - .iter() - .zip(self.placeholder_values) - .for_each(|(t, v)| pw.set_u256_target(t, v)); - wires - .to_be_checked_placeholders - .iter() - .zip(&self.to_be_checked_placeholders) - .for_each(|(t, v)| v.assign(pw, t)); - wires - .secondary_query_bound_placeholders - .iter() - .zip(&self.secondary_query_bound_placeholders) - .for_each(|(t, v)| v.assign(pw, t)); + self.check_placeholder_inputs.assign(pw, &wires.input_wires); [ (wires.final_placeholder_hash, self.final_placeholder_hash), (wires.exp_placeholder_ids_hash, self.placeholder_ids_hash), @@ -314,7 +521,7 @@ mod tests { .for_each(|(t, v)| pw.set_hash_target(*t, *v)); pw.set_target( wires.exp_num_placeholders, - F::from_canonical_usize(self.num_placeholders), + F::from_canonical_usize(self.check_placeholder_inputs.num_placeholders), ); } } diff --git a/verifiable-db/src/revelation/revelation_unproven_offset.rs b/verifiable-db/src/revelation/revelation_unproven_offset.rs new file mode 100644 index 000000000..15241c814 --- /dev/null +++ b/verifiable-db/src/revelation/revelation_unproven_offset.rs @@ -0,0 +1,1296 @@ +//! This module contains the final revelation circuit for SELECT queries without +//! aggregate function, where we just return at most `LIMIT` results, without +//! proving the `OFFSET` in the set of results. Note that this means that the +//! prover could censor some actual results of the query, but they cannot be +//! faked + +use anyhow::{ensure, Result}; +use std::{ + array, + iter::{once, repeat}, +}; + +use alloy::primitives::U256; +use itertools::Itertools; +use mp2_common::{ + default_config, + group_hashing::CircuitBuilderGroupHashing, + poseidon::{flatten_poseidon_hash_target, H}, + proof::ProofWithVK, + public_inputs::PublicInputCommon, + serialization::{ + deserialize, deserialize_array, deserialize_long_array, serialize, serialize_array, + serialize_long_array, + }, + types::{CBuilder, HashOutput}, + u256::{CircuitBuilderU256, UInt256Target, WitnessWriteU256}, + utils::{Fieldable, SelectHashBuilder, ToTargets}, + C, D, F, +}; +use plonky2::{ + field::types::PrimeField64, + hash::hash_types::HashOutTarget, + iop::{ + target::{BoolTarget, Target}, + witness::{PartialWitness, WitnessWrite}, + }, + plonk::{ + config::Hasher, + proof::{ProofWithPublicInputs, ProofWithPublicInputsTarget}, + }, +}; +use plonky2_ecgfp5::gadgets::curve::CircuitBuilderEcGFp5; +use recursion_framework::{ + circuit_builder::CircuitLogicWires, + framework::{ + RecursiveCircuits, RecursiveCircuitsVerifierGagdet, RecursiveCircuitsVerifierTarget, + }, +}; +use serde::{Deserialize, Serialize}; + +use crate::{ + ivc::PublicInputs as OriginalTreePublicInputs, + query::{ + aggregation::{ChildPosition, NodeInfo, QueryBounds, QueryHashNonExistenceCircuits}, + api::CircuitInput as QueryCircuitInput, + computational_hash_ids::{AggregationOperation, ColumnIDs, ResultIdentifier}, + merkle_path::{MerklePathGadget, MerklePathTargetInputs}, + public_inputs::PublicInputs as QueryProofPublicInputs, + universal_circuit::{ + build_cells_tree, + universal_circuit_inputs::{BasicOperation, Placeholders, ResultStructure}, + }, + PI_LEN, + }, +}; + +use super::{ + placeholders_check::{CheckPlaceholderGadget, CheckPlaceholderInputWires}, + revelation_without_results_tree::CircuitBuilderParams, + PublicInputs, NUM_PREPROCESSING_IO, NUM_QUERY_IO, PI_LEN as REVELATION_PI_LEN, +}; + +#[derive(Clone, Debug, Serialize, Deserialize)] +/// Target for all the information about nodes in the path needed by this revelation circuit +struct NodeInfoTarget { + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + child_hashes: [HashOutTarget; 2], + node_min: UInt256Target, + node_max: UInt256Target, +} + +impl NodeInfoTarget { + fn build(b: &mut CBuilder) -> Self { + let child_hashes = b.add_virtual_hashes(2); + let [node_min, node_max] = b.add_virtual_u256_arr_unsafe(); + + Self { + child_hashes: child_hashes.try_into().unwrap(), + node_min, + node_max, + } + } + + fn set_target(&self, pw: &mut PartialWitness, inputs: &NodeInfo) { + self.child_hashes + .iter() + .zip(inputs.child_hashes) + .for_each(|(&target, value)| pw.set_hash_target(target, value)); + pw.set_u256_target(&self.node_min, inputs.min); + pw.set_u256_target(&self.node_max, inputs.max); + } +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub(crate) struct RevelationWires< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const L: usize, + const S: usize, + const PH: usize, + const PP: usize, +> where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); S * L]:, +{ + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + row_tree_paths: [MerklePathTargetInputs; L], + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + index_tree_paths: [MerklePathTargetInputs; L], + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + row_node_info: [NodeInfoTarget; L], + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + index_node_info: [NodeInfoTarget; L], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + is_row_node_leaf: [BoolTarget; L], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + is_item_included: [BoolTarget; S], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + ids: [Target; S], + #[serde( + serialize_with = "serialize_array", + deserialize_with = "deserialize_array" + )] + results: [UInt256Target; S * L], + limit: Target, + offset: Target, + #[serde(serialize_with = "serialize", deserialize_with = "deserialize")] + distinct: BoolTarget, + check_placeholder_wires: CheckPlaceholderInputWires, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct RevelationCircuit< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const L: usize, + const S: usize, + const PH: usize, + const PP: usize, +> where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); S * L]:, +{ + /// Path to verify each of the L rows in the rows tree + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + row_tree_paths: [MerklePathGadget; L], + /// Path to verify each of the L rows in the index tree + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + index_tree_paths: [MerklePathGadget; L], + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + /// Info about the nodes of the rows tree storing each of the L rows being proven + row_node_info: [NodeInfo; L], + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + /// Info about the nodes of the index tree that stores the rows trees where each of + /// the L rows being proven are located + index_node_info: [NodeInfo; L], + /// Actual number of items per-row included in the results. + num_actual_items_per_row: usize, + /// Ids of the output items included in the results for each row + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + ids: [F; S], + /// Output results of the query. They must be provided as input as they are checked against the + /// one accumulated by the query circuits + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + results: [U256; S * L], + limit: u32, + offset: u32, + /// Boolean flag specifying whether DISTINCT keyword must be applied to results + distinct: bool, + /// Input values employed by the `CheckPlaceholderGadget` + check_placeholder_inputs: CheckPlaceholderGadget, +} + +#[derive(Clone, Debug, Serialize, Deserialize, Default, PartialEq, Eq)] +/// Data structure containing all the information needed to verify the membership of +/// a row in a tree representing a table +pub struct RowPath { + /// Info about the node of the row tree storing the row + row_node_info: NodeInfo, + /// Info about the nodes in the path of the rows tree for the node storing the row; The `ChildPosition` refers to + /// the position of the previous node in the path as a child of the current node + row_tree_path: Vec<(NodeInfo, ChildPosition)>, + /// Hash of the siblings of the node in the rows tree path (except for the root) + row_path_siblings: Vec>, + /// Info about the node of the index tree storing the rows tree containing the row + index_node_info: NodeInfo, + /// Info about the nodes in the path of the index tree for the index_node; The `ChildPosition` refers to + /// the position of the previous node in the path as a child of the current node + index_tree_path: Vec<(NodeInfo, ChildPosition)>, + /// Hash of the siblings of the nodes in the index tree path (except for the root) + index_path_siblings: Vec>, +} + +impl RowPath { + /// Instantiate a new instance of `RowPath` for a given proven row from the following input data: + /// - `row_node_info`: data about the node of the row tree storing the row + /// - `row_tree_path`: data about the nodes in the path of the rows tree for the node storing the row; + /// The `ChildPosition` refers to the position of the previous node in the path as a child of the current node + /// - `row_path_siblings`: hash of the siblings of the node in the rows tree path (except for the root) + /// - `index_node_info`: data about the node of the index tree storing the rows tree containing the row + /// - `index_tree_path`: data about the nodes in the path of the index tree for the index_node; + /// The `ChildPosition` refers to the position of the previous node in the path as a child of the current node + /// - `index_path_siblings`: hash of the siblings of the nodes in the index tree path (except for the root) + pub fn new( + row_node_info: NodeInfo, + row_tree_path: Vec<(NodeInfo, ChildPosition)>, + row_path_siblings: Vec>, + index_node_info: NodeInfo, + index_tree_path: Vec<(NodeInfo, ChildPosition)>, + index_path_siblings: Vec>, + ) -> Self { + Self { + row_node_info, + row_tree_path, + row_path_siblings, + index_node_info, + index_tree_path, + index_path_siblings, + } + } +} + +impl< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const L: usize, + const S: usize, + const PH: usize, + const PP: usize, + > RevelationCircuit +where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); S * L]:, +{ + pub(crate) fn new( + row_paths: [RowPath; L], + index_ids: [u64; 2], + item_ids: &[F], + results: [Vec; L], + limit: u32, + offset: u32, + distinct: bool, + placeholder_inputs: CheckPlaceholderGadget, + ) -> Result { + let mut row_tree_paths = [MerklePathGadget::::default(); L]; + let mut index_tree_paths = [MerklePathGadget::::default(); L]; + let mut row_node_info = [NodeInfo::default(); L]; + let mut index_node_info = [NodeInfo::default(); L]; + for (i, row) in row_paths.into_iter().enumerate() { + row_tree_paths[i] = + MerklePathGadget::new(&row.row_tree_path, &row.row_path_siblings, index_ids[1])?; + index_tree_paths[i] = MerklePathGadget::new( + &row.index_tree_path, + &row.index_path_siblings, + index_ids[0], + )?; + row_node_info[i] = row.row_node_info; + index_node_info[i] = row.index_node_info; + } + + let num_actual_items_per_row = item_ids.len(); + ensure!( + num_actual_items_per_row <= S, + format!("number of results per row is bigger than {}", S) + ); + let padded_ids = item_ids + .into_iter() + .chain(repeat(&F::default())) + .take(S) + .cloned() + .collect_vec(); + let results = results + .iter() + .flat_map(|res| { + assert!(res.len() >= num_actual_items_per_row); + res.into_iter() + .cloned() + .take(num_actual_items_per_row) + .chain(repeat(U256::default())) + .take(S) + .collect_vec() + }) + .collect_vec(); + + Ok(Self { + row_tree_paths, + index_tree_paths, + row_node_info, + index_node_info, + num_actual_items_per_row, + ids: padded_ids.try_into().unwrap(), + results: results.try_into().unwrap(), + limit, + offset, + distinct, + check_placeholder_inputs: placeholder_inputs, + }) + } + + pub(crate) fn build( + b: &mut CBuilder, + // Proofs of the L rows computed by the universal query circuit + row_proofs: &[QueryProofPublicInputs; L], + // proof of construction of the original tree in the pre-processing stage (IVC proof) + original_tree_proof: &OriginalTreePublicInputs, + ) -> RevelationWires { + // allocate input values + let [row_node_info, index_node_info] = + array::from_fn(|_| array::from_fn(|_| NodeInfoTarget::build(b))); + let is_row_node_leaf = array::from_fn(|_| b.add_virtual_bool_target_safe()); + let is_item_included = array::from_fn(|_| b.add_virtual_bool_target_safe()); + let distinct = b.add_virtual_bool_target_safe(); + let ids = b.add_virtual_target_arr(); + let results = b.add_virtual_u256_arr_unsafe(); // unsafe should be ok since they are matched against the order-agnostic digest + // computed by the universal query circuit + // closure to access the output items of the i-th result + let get_result = |i| &results[S * i..S * (i + 1)]; + let [min_query, max_query] = b.add_virtual_u256_arr_unsafe(); // unsafe should be ok since they are later included in placeholder hash + let [limit, offset] = b.add_virtual_target_arr(); + let tree_hash = original_tree_proof.merkle_hash(); + let zero = b.zero(); + let one = b.one(); + let zero_u256 = b.zero_u256(); + let _true = b._true(); + let _false = b._false(); + let mut num_results = zero; + let placeholder_hash = row_proofs[0].placeholder_hash_target(); + let computational_hash = row_proofs[0].computational_hash_target(); + let mut overflow = _false; + let mut row_paths = vec![]; + let mut index_paths = vec![]; + let mut max_result = None; + // Flag employed to enforce that the matching rows are all placed in the initial slots; + // this is a requirement to ensure that the check for DISTINCT is sound + let mut only_matching_rows = _true; + row_proofs + .into_iter() + .enumerate() + .for_each(|(i, row_proof)| { + let index_ids = row_proof.index_ids_target(); + let is_matching_row = b.is_equal(row_proof.num_matching_rows_target(), one); + // ensure that once `is_matching_row = false`, then it will be false for all + // subsequent iterations + only_matching_rows = b.and(only_matching_rows, is_matching_row); + b.connect(only_matching_rows.target, is_matching_row.target); + let row_node_hash = { + // if the node storing the current row is a leaf node in rows tree, then + // the hash of such node is already computed by `row_proof`; otherwise, + // we need to compute it + let inputs = row_node_info[i] + .child_hashes + .into_iter() + .flat_map(|hash| hash.to_targets()) + .chain(row_node_info[i].node_min.to_targets()) + .chain(row_node_info[i].node_max.to_targets()) + .chain(once(index_ids[1])) + .chain(row_proof.min_value_target().to_targets()) + .chain(row_proof.tree_hash_target().to_targets()) + .collect_vec(); + let row_node_hash = b.hash_n_to_hash_no_pad::(inputs); + b.select_hash( + is_row_node_leaf[i], + &row_proof.tree_hash_target(), + &row_node_hash, + ) + }; + let row_path_wires = MerklePathGadget::build(b, row_node_hash, index_ids[1]); + let row_tree_root = row_path_wires.root; + // compute hash of the index node storing the rows tree containing the current row + let index_node_hash = { + let inputs = index_node_info[i] + .child_hashes + .into_iter() + .flat_map(|hash| hash.to_targets()) + .chain(index_node_info[i].node_min.to_targets()) + .chain(index_node_info[i].node_max.to_targets()) + .chain(once(index_ids[0])) + .chain(row_proof.index_value_target().to_targets()) + .chain(row_tree_root.to_targets()) + .collect_vec(); + b.hash_n_to_hash_no_pad::(inputs) + }; + let index_path_wires = MerklePathGadget::build(b, index_node_hash, index_ids[0]); + // if the current row is valid, check that the root is the same of the original tree, completing + // membership proof for the current row; otherwise, we don't care + let root = b.select_hash(is_matching_row, &index_path_wires.root, &tree_hash); + b.connect_hashes(tree_hash, root); + + row_paths.push(row_path_wires.inputs); + index_paths.push(index_path_wires.inputs); + // check that the primary index value for the current row is within the query + // bounds (only if the row is valid) + let index_value = row_proof.index_value_target(); + let greater_than_min = b.is_less_or_equal_than_u256(&min_query, &index_value); + let smaller_than_max = b.is_less_or_equal_than_u256(&index_value, &max_query); + let in_range = b.and(greater_than_min, smaller_than_max); + let in_range = b.and(is_matching_row, in_range); + b.connect(in_range.target, is_matching_row.target); + + // enforce DISTINCT only for actual results: we enforce the i-th actual result is strictly smaller + // than the (i+1)-th actual result + max_result = if let Some(res) = &max_result { + let current_result: [UInt256Target; S] = + get_result(i).to_vec().try_into().unwrap(); + let is_smaller = b.is_less_than_or_equal_to_u256_arr(res, ¤t_result).0; + // flag specifying whether we must enforce DISTINCT for the current result or not + let must_be_enforced = b.and(is_matching_row, distinct); + let is_smaller = b.and(must_be_enforced, is_smaller); + b.connect(is_smaller.target, must_be_enforced.target); + Some(current_result) + } else { + Some(get_result(i).to_vec().try_into().unwrap()) + }; + + // Expose results for this row. + // First, we compute the digest of the results corresponding to this row, as computed in the universal + // query circuit, to check that the results correspond to the one computed by that circuit. + // To recompute the digest of the results, we first need to build the cells tree that is constructed + // in the universal query circuit to store the results computed for each row. Note that the + // universal query circuit stores results in a cells tree since to prove some queries a results tree + // needs to be built + let cells_tree_hash = + build_cells_tree(b, &get_result(i)[2..], &ids[2..], &is_item_included[2..]); + let second_item = b.select_u256(is_item_included[1], &get_result(i)[1], &zero_u256); + // digest = D(ids[0]||result[0]||ids[1]||second_item||cells_tree_hash) + let digest = { + let inputs = once(ids[0]) + .chain(get_result(i)[0].to_targets()) + .chain(once(ids[1])) + .chain(second_item.to_targets()) + .chain(cells_tree_hash.to_targets()) + .collect_vec(); + b.map_to_curve_point(&inputs) + }; + // we need to check that the digests are equal only if the current row is valid + let digest_equal = b.curve_eq(digest, row_proof.first_value_as_curve_target()); + let digest_equal = b.and(digest_equal, is_matching_row); + b.connect(is_matching_row.target, digest_equal.target); + num_results = b.add(num_results, is_matching_row.target); + + // check that placeholder hash and computational hash are the same for all + // the proofs + b.connect_hashes(row_proof.computational_hash_target(), computational_hash); + b.connect_hashes(row_proof.placeholder_hash_target(), placeholder_hash); + + overflow = b.or(overflow, row_proof.overflow_flag_target()); + }); + + // finally, check placeholders + // First, compute the final placeholder hash, adding the primary index query bounds + let final_placeholder_hash = { + let inputs = placeholder_hash + .to_targets() + .into_iter() + .chain(min_query.to_targets()) + .chain(max_query.to_targets()) + .collect_vec(); + b.hash_n_to_hash_no_pad::(inputs) + }; + let check_placeholder_wires = CheckPlaceholderGadget::build(b, &final_placeholder_hash); + + b.enforce_equal_u256( + &min_query, + &check_placeholder_wires.input_wires.placeholder_values[0], + ); + b.enforce_equal_u256( + &max_query, + &check_placeholder_wires.input_wires.placeholder_values[1], + ); + + // Add the information about DISTINCT keyword being used or not to the computational hash + let computational_hash = + ResultIdentifier::result_id_hash_circuit(b, computational_hash, &distinct); + + // Add the hash of placeholder identifiers and pre-processing metadata + // hash to the computational hash: + // H(pQ.C || placeholder_ids_hash || pQ.M) + let inputs = computational_hash + .to_targets() + .iter() + .chain(&check_placeholder_wires.placeholder_id_hash.to_targets()) + .chain(original_tree_proof.metadata_hash()) + .cloned() + .collect(); + let computational_hash = b.hash_n_to_hash_no_pad::(inputs); + + let flat_computational_hash = flatten_poseidon_hash_target(b, computational_hash); + + let placeholder_values_slice = check_placeholder_wires + .input_wires + .placeholder_values + .iter() + .flat_map(ToTargets::to_targets) + .collect_vec(); + + let results_slice = results.iter().flat_map(ToTargets::to_targets).collect_vec(); + + // Register the public innputs. + PublicInputs::<_, L, S, PH>::new( + &original_tree_proof.block_hash(), + &flat_computational_hash, + &placeholder_values_slice, + &results_slice, + &[check_placeholder_wires.num_placeholders], + // The aggregation query proof only has one result. + &[num_results], + &[num_results], + &[overflow.target], + // Query limit + &[zero], + // Query offset + &[zero], + ) + .register(b); + + RevelationWires { + row_tree_paths: row_paths.try_into().unwrap(), + index_tree_paths: index_paths.try_into().unwrap(), + row_node_info, + index_node_info, + is_row_node_leaf, + is_item_included, + ids, + results, + limit, + offset, + distinct, + check_placeholder_wires: check_placeholder_wires.input_wires, + } + } + + pub(crate) fn assign( + &self, + pw: &mut PartialWitness, + wires: &RevelationWires, + ) { + self.row_tree_paths + .iter() + .zip(wires.row_tree_paths.iter()) + .for_each(|(value, target)| value.assign(pw, target)); + self.index_tree_paths + .iter() + .zip(wires.index_tree_paths.iter()) + .for_each(|(value, target)| value.assign(pw, target)); + [ + (self.row_node_info, &wires.row_node_info), + (self.index_node_info, &wires.index_node_info), + ] + .into_iter() + .for_each(|(nodes, target_nodes)| { + nodes + .iter() + .zip(target_nodes) + .for_each(|(&value, target)| target.set_target(pw, &value)) + }); + wires + .is_item_included + .iter() + .enumerate() + .for_each(|(i, &target)| pw.set_bool_target(target, i < self.num_actual_items_per_row)); + self.row_node_info + .iter() + .zip(wires.is_row_node_leaf) + .for_each(|(&node_info, target)| pw.set_bool_target(target, node_info.is_leaf)); + self.results + .iter() + .zip(wires.results.iter()) + .for_each(|(&value, target)| pw.set_u256_target(target, value)); + pw.set_target_arr(&wires.ids, &self.ids); + pw.set_target(wires.limit, self.limit.to_field()); + pw.set_target(wires.offset, self.offset.to_field()); + pw.set_bool_target(wires.distinct, self.distinct); + self.check_placeholder_inputs + .assign(pw, &wires.check_placeholder_wires); + } +} + +/// Compute the inputs for the dummy proof to be employed to pad up to L the number of +/// proofs provided as input to the revelation circuit. The proof is generated by +/// running the non-existence circuit over a fake index-tree node +pub(crate) fn generate_dummy_row_proof_inputs< + const MAX_NUM_COLUMNS: usize, + const MAX_NUM_PREDICATE_OPS: usize, + const MAX_NUM_RESULT_OPS: usize, + const MAX_NUM_ITEMS_PER_OUTPUT: usize, +>( + column_ids: &ColumnIDs, + predicate_operations: &[BasicOperation], + results: &ResultStructure, + placeholders: &Placeholders, + query_bounds: &QueryBounds, +) -> Result< + QueryCircuitInput< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, + >, +> +where + [(); MAX_NUM_COLUMNS + MAX_NUM_RESULT_OPS]:, + [(); 2 * (MAX_NUM_PREDICATE_OPS + MAX_NUM_RESULT_OPS)]:, + [(); MAX_NUM_ITEMS_PER_OUTPUT - 1]:, + [(); PI_LEN::]:, + [(); >::HASH_SIZE]:, +{ + // we generate a dummy proof for a dummy node of the index tree with an index value out of range + let query_hashes = QueryHashNonExistenceCircuits::new::< + MAX_NUM_COLUMNS, + MAX_NUM_PREDICATE_OPS, + MAX_NUM_RESULT_OPS, + MAX_NUM_ITEMS_PER_OUTPUT, + >( + column_ids, + predicate_operations, + results, + placeholders, + query_bounds, + false, + )?; + // we generate info about the proven index-tree node; we can use all dummy values, except for the + // node value which must be out of the query range + let node_value = query_bounds.max_query_primary() + U256::from(1); + let node_info = NodeInfo::new( + &HashOutput::default(), + None, // no children, for simplicity + None, + node_value, + U256::default(), + U256::default(), + ); + // The query has no aggregation operations, so by construction of the circuits we + // know that the first aggregate operation is ID, while the remaining ones are dummies + let aggregation_ops = once(AggregationOperation::IdOp) + .chain(repeat(AggregationOperation::default())) + .take(MAX_NUM_ITEMS_PER_OUTPUT) + .collect_vec(); + QueryCircuitInput::new_non_existence_input( + node_info, + None, + None, + node_value, + &[ + column_ids.primary.to_canonical_u64(), + column_ids.secondary.to_canonical_u64(), + ], + &aggregation_ops, + query_hashes, + false, + query_bounds, + placeholders, + ) +} + +#[derive(Serialize, Deserialize, Clone, Debug)] +pub struct RecursiveCircuitWires< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const L: usize, + const S: usize, + const PH: usize, + const PP: usize, +> where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); S * L]:, +{ + revelation_circuit: RevelationWires, + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + row_verifiers: [RecursiveCircuitsVerifierTarget; L], + #[serde(serialize_with = "serialize", deserialize_with = "deserialize")] + preprocessing_proof: ProofWithPublicInputsTarget, +} + +#[derive(Clone, Debug, Serialize, Deserialize)] +pub struct RecursiveCircuitInputs< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const L: usize, + const S: usize, + const PH: usize, + const PP: usize, +> where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); S * L]:, +{ + pub(crate) inputs: RevelationCircuit, + #[serde( + serialize_with = "serialize_long_array", + deserialize_with = "deserialize_long_array" + )] + pub(crate) row_proofs: [ProofWithVK; L], + pub(crate) preprocessing_proof: ProofWithPublicInputs, + pub(crate) query_circuit_set: RecursiveCircuits, +} + +impl< + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const L: usize, + const S: usize, + const PH: usize, + const PP: usize, + > CircuitLogicWires + for RecursiveCircuitWires +where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); S * L]:, + [(); NUM_QUERY_IO::]:, + [(); >::HASH_SIZE]:, +{ + type CircuitBuilderParams = CircuitBuilderParams; + + type Inputs = RecursiveCircuitInputs; + + const NUM_PUBLIC_INPUTS: usize = REVELATION_PI_LEN::; + + fn circuit_logic( + builder: &mut CBuilder, + _verified_proofs: [&ProofWithPublicInputsTarget; 0], + builder_parameters: Self::CircuitBuilderParams, + ) -> Self { + let row_verifier = RecursiveCircuitsVerifierGagdet:: }>::new( + default_config(), + &builder_parameters.query_circuit_set, + ); + let row_verifiers = [0; L].map(|_| row_verifier.verify_proof_in_circuit_set(builder)); + let preprocessing_verifier = + RecursiveCircuitsVerifierGagdet::::new( + default_config(), + &builder_parameters.preprocessing_circuit_set, + ); + let preprocessing_proof = preprocessing_verifier.verify_proof_fixed_circuit_in_circuit_set( + builder, + &builder_parameters.preprocessing_vk, + ); + let row_pis = row_verifiers + .iter() + .map(|verifier| { + QueryProofPublicInputs::from_slice( + verifier.get_public_input_targets:: }>(), + ) + }) + .collect_vec(); + let preprocessing_pi = + OriginalTreePublicInputs::from_slice(&preprocessing_proof.public_inputs); + let revelation_circuit = + RevelationCircuit::build(builder, &row_pis.try_into().unwrap(), &preprocessing_pi); + + Self { + revelation_circuit, + row_verifiers, + preprocessing_proof, + } + } + + fn assign_input(&self, inputs: Self::Inputs, pw: &mut PartialWitness) -> Result<()> { + for (verifier_target, row_proof) in self.row_verifiers.iter().zip(inputs.row_proofs) { + let (proof, verifier_data) = (&row_proof).into(); + verifier_target.set_target(pw, &inputs.query_circuit_set, proof, verifier_data)?; + } + pw.set_proof_with_pis_target(&self.preprocessing_proof, &inputs.preprocessing_proof); + inputs.inputs.assign(pw, &self.revelation_circuit); + Ok(()) + } +} + +#[cfg(test)] +mod tests { + + use std::{array, cmp::Ordering, iter::once}; + + use alloy::primitives::U256; + use futures::{stream, StreamExt}; + use itertools::Itertools; + use mp2_common::{ + group_hashing::map_to_curve_point, + types::{HashOutput, CURVE_TARGET_LEN}, + u256::is_less_than_or_equal_to_u256_arr, + utils::ToFields, + C, D, F, + }; + use mp2_test::{ + cells_tree::{compute_cells_tree_hash, TestCell}, + circuit::{run_circuit, UserCircuit}, + utils::{gen_random_field_hash, gen_random_u256}, + }; + use plonky2::{ + field::types::{Field, PrimeField64, Sample}, + iop::{ + target::Target, + witness::{PartialWitness, WitnessWrite}, + }, + plonk::{circuit_builder::CircuitBuilder, config::GenericHashOut}, + }; + use rand::{thread_rng, Rng}; + + use crate::{ + ivc::{ + public_inputs::H_RANGE as ORIGINAL_TREE_H_RANGE, + PublicInputs as OriginalTreePublicInputs, + }, + query::{ + aggregation::{ChildPosition, NodeInfo}, + public_inputs::{PublicInputs as QueryProofPublicInputs, QueryPublicInputs}, + }, + revelation::{ + revelation_unproven_offset::RowPath, tests::TestPlaceholders, NUM_PREPROCESSING_IO, + NUM_QUERY_IO, + }, + test_utils::{random_aggregation_operations, random_aggregation_public_inputs}, + }; + + use super::{RevelationCircuit, RevelationWires}; + + #[derive(Clone, Debug)] + struct TestRevelationCircuit< + 'a, + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const L: usize, + const S: usize, + const PH: usize, + const PP: usize, + > + where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); S * L]:, + { + circuit: RevelationCircuit, + row_pis: &'a [Vec; L], + original_tree_pis: &'a [F], + } + + impl< + 'a, + const ROW_TREE_MAX_DEPTH: usize, + const INDEX_TREE_MAX_DEPTH: usize, + const L: usize, + const S: usize, + const PH: usize, + const PP: usize, + > UserCircuit + for TestRevelationCircuit<'a, ROW_TREE_MAX_DEPTH, INDEX_TREE_MAX_DEPTH, L, S, PH, PP> + where + [(); ROW_TREE_MAX_DEPTH - 1]:, + [(); INDEX_TREE_MAX_DEPTH - 1]:, + [(); S * L]:, + { + type Wires = ( + RevelationWires, + [Vec; L], + Vec, + ); + + fn build(c: &mut CircuitBuilder) -> Self::Wires { + let row_pis_raw: [Vec; L] = (0..L) + .map(|_| c.add_virtual_targets(NUM_QUERY_IO::)) + .collect_vec() + .try_into() + .unwrap(); + let original_pis_raw = c.add_virtual_targets(NUM_PREPROCESSING_IO); + let row_pis = row_pis_raw + .iter() + .map(|pis| QueryProofPublicInputs::from_slice(&pis)) + .collect_vec() + .try_into() + .unwrap(); + let original_pis = OriginalTreePublicInputs::from_slice(&original_pis_raw); + let revelation_wires = RevelationCircuit::build(c, &row_pis, &original_pis); + (revelation_wires, row_pis_raw, original_pis_raw) + } + + fn prove(&self, pw: &mut PartialWitness, wires: &Self::Wires) { + self.circuit.assign(pw, &wires.0); + self.row_pis + .iter() + .zip(&wires.1) + .for_each(|(pis, pis_target)| pw.set_target_arr(pis_target, pis)); + pw.set_target_arr(&wires.2, self.original_tree_pis); + } + } + + // test function for this revelation circuit. If `distinct` is true, then the + // results are enforced to be distinct + async fn test_revelation_unproven_offset_circuit(distinct: bool) { + const ROW_TREE_MAX_DEPTH: usize = 10; + const INDEX_TREE_MAX_DEPTH: usize = 10; + const L: usize = 5; + const S: usize = 7; + const PH: usize = 10; + const PP: usize = 30; + let ops = random_aggregation_operations::(); + let mut row_pis = random_aggregation_public_inputs(&ops); + let rng = &mut thread_rng(); + let mut original_tree_pis = (0..NUM_PREPROCESSING_IO) + .map(|_| rng.gen()) + .collect::>() + .to_fields(); + const NUM_PLACEHOLDERS: usize = 5; + let test_placeholders = TestPlaceholders::sample(NUM_PLACEHOLDERS); + let (index_ids, computational_hash) = { + let row_pi_0 = QueryProofPublicInputs::<_, S>::from_slice(&row_pis[0]); + let index_ids = row_pi_0.index_ids(); + let computational_hash = row_pi_0.computational_hash(); + + (index_ids, computational_hash) + }; + let placeholder_hash = test_placeholders.query_placeholder_hash; + // set same index_ids, computational hash and placeholder hash for all proofs; set also num matching rows to 1 + // for all proofs + row_pis.iter_mut().for_each(|pis| { + let [index_id_range, ch_range, ph_range, count_range] = [ + QueryPublicInputs::IndexIds, + QueryPublicInputs::ComputationalHash, + QueryPublicInputs::PlaceholderHash, + QueryPublicInputs::NumMatching, + ] + .map(QueryProofPublicInputs::::to_range); + pis[index_id_range].copy_from_slice(&index_ids); + pis[ch_range].copy_from_slice(&computational_hash.to_fields()); + pis[ph_range].copy_from_slice(&placeholder_hash.to_fields()); + pis[count_range].copy_from_slice(&[F::ONE]); + }); + let index_value_range = + QueryProofPublicInputs::::to_range(QueryPublicInputs::IndexValue); + let hash_range = QueryProofPublicInputs::::to_range(QueryPublicInputs::TreeHash); + let min_query = test_placeholders.min_query; + let max_query = test_placeholders.max_query; + // closure that modifies a set of row public inputs to ensure that the index value lies + // within the query bounds; the new index value set in the public inputs is returned by the closure + let enforce_index_value_in_query_range = |pis: &mut [F], index_value: U256| { + let query_range_size = max_query - min_query + U256::from(1); + let new_index_value = min_query + index_value % query_range_size; + pis[index_value_range.clone()].copy_from_slice(&new_index_value.to_fields()); + assert!(new_index_value >= min_query && new_index_value <= max_query); + new_index_value + }; + // build a test tree containing the rows 0..5 found in row_pis + // Index tree: + // A + // B C + // Rows tree A: + // 0 + // 1 + // Rows tree B: + // 2 + // Rows tree C: + // 3 + // 4 5 + let node_1 = { + let row_pi = QueryProofPublicInputs::<_, S>::from_slice(&row_pis[1]); + let embedded_tree_hash = + HashOutput::try_from(gen_random_field_hash::().to_bytes()).unwrap(); + let node_value = row_pi.min_value(); + NodeInfo::new( + &embedded_tree_hash, + None, + None, + node_value, + node_value, + node_value, + ) + }; + // set hash in row 1 proof to node 1 hash, given that node 1 is a leaf node + let node_1_hash = node_1.compute_node_hash(index_ids[1]); + row_pis[1][hash_range.clone()].copy_from_slice(&node_1_hash.to_fields()); + let node_0 = { + let row_pi = QueryProofPublicInputs::<_, S>::from_slice(&row_pis[0]); + let embedded_tree_hash = HashOutput::try_from(row_pi.tree_hash().to_bytes()).unwrap(); + let node_value = row_pi.min_value(); + // left child is node 1 + let left_child_hash = HashOutput::try_from(node_1_hash.to_bytes()).unwrap(); + NodeInfo::new( + &embedded_tree_hash, + Some(&left_child_hash), + None, + node_value, + node_1.min, + node_value, + ) + }; + let node_2 = { + let row_pi = QueryProofPublicInputs::<_, S>::from_slice(&row_pis[2]); + let embedded_tree_hash = + HashOutput::try_from(gen_random_field_hash::().to_bytes()).unwrap(); + let node_value = row_pi.min_value(); + NodeInfo::new( + &embedded_tree_hash, + None, + None, + node_value, + node_value, + node_value, + ) + }; + // set hash in row 2 proof to node 2 hash, given that node 2 is a leaf node + let node_2_hash = node_2.compute_node_hash(index_ids[1]); + row_pis[2][hash_range.clone()].copy_from_slice(&node_2_hash.to_fields()); + let node_4 = { + let row_pi = QueryProofPublicInputs::<_, S>::from_slice(&row_pis[4]); + let embedded_tree_hash = + HashOutput::try_from(gen_random_field_hash::().to_bytes()).unwrap(); + let node_value = row_pi.min_value(); + NodeInfo::new( + &embedded_tree_hash, + None, + None, + node_value, + node_value, + node_value, + ) + }; + // set hash in row 4 proof to node 4 hash, given that node 4 is a leaf node + let node_4_hash = node_4.compute_node_hash(index_ids[1]); + row_pis[4][hash_range.clone()].copy_from_slice(&node_4_hash.to_fields()); + let node_5 = { + // can use all dummy values for this node, since there is no proof associated to it + let embedded_tree_hash = + HashOutput::try_from(gen_random_field_hash::().to_bytes()).unwrap(); + let [node_value, node_min, node_max] = array::from_fn(|_| gen_random_u256(rng)); + NodeInfo::new( + &embedded_tree_hash, + None, + None, + node_value, + node_min, + node_max, + ) + }; + let node_4_hash = HashOutput::try_from(node_4_hash.to_bytes()).unwrap(); + let node_5_hash = + HashOutput::try_from(node_5.compute_node_hash(index_ids[1]).to_bytes()).unwrap(); + let node_3 = { + let row_pi = QueryProofPublicInputs::<_, S>::from_slice(&row_pis[3]); + let embedded_tree_hash = HashOutput::try_from(row_pi.tree_hash().to_bytes()).unwrap(); + let node_value = row_pi.min_value(); + NodeInfo::new( + &embedded_tree_hash, + Some(&node_4_hash), // left child is node 4 + Some(&node_5_hash), // right child is node 5 + node_value, + node_4.min, + node_5.max, + ) + }; + let node_B = { + let row_pi = QueryProofPublicInputs::<_, S>::from_slice(&row_pis[2]); + let embedded_tree_hash = + HashOutput::try_from(node_2.compute_node_hash(index_ids[1]).to_bytes()).unwrap(); + let index_value = row_pi.index_value(); + let node_value = enforce_index_value_in_query_range(&mut row_pis[2], index_value); + NodeInfo::new( + &embedded_tree_hash, + None, + None, + node_value, + node_value, + node_value, + ) + }; + let node_C = { + let row_pi = QueryProofPublicInputs::<_, S>::from_slice(&row_pis[4]); + let embedded_tree_hash = + HashOutput::try_from(node_3.compute_node_hash(index_ids[1]).to_bytes()).unwrap(); + let index_value = row_pi.index_value(); + let node_value = enforce_index_value_in_query_range(&mut row_pis[4], index_value); + // we need also to set index value PI in row_pis[3] to the same value of row_pis[4], as they are in the same index tree + row_pis[3][index_value_range.clone()].copy_from_slice(&node_value.to_fields()); + NodeInfo::new( + &embedded_tree_hash, + None, + None, + node_value, + node_value, + node_value, + ) + }; + let node_B_hash = + HashOutput::try_from(node_B.compute_node_hash(index_ids[0]).to_bytes()).unwrap(); + let node_C_hash = + HashOutput::try_from(node_C.compute_node_hash(index_ids[0]).to_bytes()).unwrap(); + let node_A = { + let row_pi = QueryProofPublicInputs::<_, S>::from_slice(&row_pis[0]); + let embedded_tree_hash = + HashOutput::try_from(node_0.compute_node_hash(index_ids[1]).to_bytes()).unwrap(); + let index_value = row_pi.index_value(); + let node_value = enforce_index_value_in_query_range(&mut row_pis[0], index_value); + // we need also to set index value PI in row_pis[1] to the same value of row_pis[0], as they are in the same index tree + row_pis[1][index_value_range].copy_from_slice(&node_value.to_fields()); + NodeInfo::new( + &embedded_tree_hash, + Some(&node_B_hash), // left child is node B + Some(&node_C_hash), // right child is node C + node_value, + node_B.min, + node_C.max, + ) + }; + // set original tree PI to the root of the tree + let root = node_A.compute_node_hash(index_ids[0]); + original_tree_pis[ORIGINAL_TREE_H_RANGE].copy_from_slice(&root.to_fields()); + + // sample final results and set order-agnostic digests in row_pis proofs accordingly + const NUM_ACTUAL_ITEMS_PER_OUTPUT: usize = 4; + let mut results: [[U256; NUM_ACTUAL_ITEMS_PER_OUTPUT]; L] = + array::from_fn(|_| array::from_fn(|_| gen_random_u256(rng))); + // sort them to ensure that DISTINCT constraints are satisfied + results.sort_by(|a, b| { + let (is_smaller, is_eq) = is_less_than_or_equal_to_u256_arr(a, b); + if is_smaller { + return Ordering::Less; + } + if is_eq { + return Ordering::Equal; + } + Ordering::Greater + }); + // random ids of output items + let ids: [F; NUM_ACTUAL_ITEMS_PER_OUTPUT] = F::rand_array(); + + let digests = stream::iter(results.iter()) + .then(|res| async { + // build set of cells for the cells tree + let cells = res + .iter() + .zip(ids.iter()) + .map(|(value, id)| TestCell::new(*value, *id)) + .collect_vec(); + map_to_curve_point( + &once(cells[0].id) + .chain(cells[0].value.to_fields()) + .chain(once(cells.get(1).map(|cell| cell.id).unwrap_or_default())) + .chain( + cells + .get(1) + .map(|cell| cell.value) + .unwrap_or_default() + .to_fields(), + ) + .chain( + compute_cells_tree_hash(cells.get(2..).unwrap_or_default().to_vec()) + .await + .to_vec(), + ) + .collect_vec(), + ) + }) + .collect::>() + .await; + + row_pis.iter_mut().zip(digests).for_each(|(pis, digest)| { + let values_range = + QueryProofPublicInputs::::to_range(QueryPublicInputs::OutputValues); + pis[values_range.start..values_range.start + CURVE_TARGET_LEN] + .copy_from_slice(&digest.to_fields()) + }); + + // prepare RowPath inputs for each row + let row_path_1 = RowPath { + row_node_info: node_1, + row_tree_path: vec![(node_0.clone(), ChildPosition::Left)], + row_path_siblings: vec![None], + index_node_info: node_A.clone(), + index_tree_path: vec![], + index_path_siblings: vec![], + }; + let row_path_0 = RowPath { + row_node_info: node_0, + row_tree_path: vec![], + row_path_siblings: vec![], + index_node_info: node_A.clone(), + index_tree_path: vec![], + index_path_siblings: vec![], + }; + let row_path_2 = RowPath { + row_node_info: node_2, + row_tree_path: vec![], + row_path_siblings: vec![], + index_node_info: node_B.clone(), + index_tree_path: vec![(node_A.clone(), ChildPosition::Left)], + index_path_siblings: vec![Some(node_C_hash)], + }; + let row_path_4 = RowPath { + row_node_info: node_4, + row_tree_path: vec![(node_3.clone(), ChildPosition::Left)], + row_path_siblings: vec![Some(node_5_hash)], + index_node_info: node_C.clone(), + index_tree_path: vec![(node_A.clone(), ChildPosition::Right)], + index_path_siblings: vec![Some(node_B_hash.clone())], + }; + let row_path_3 = RowPath { + row_node_info: node_3, + row_tree_path: vec![], + row_path_siblings: vec![], + index_node_info: node_C.clone(), + index_tree_path: vec![(node_A.clone(), ChildPosition::Right)], + index_path_siblings: vec![Some(node_B_hash)], + }; + + let circuit = + TestRevelationCircuit:: { + circuit: RevelationCircuit::new( + [row_path_0, row_path_1, row_path_2, row_path_3, row_path_4], + index_ids + .into_iter() + .map(|id| id.to_canonical_u64()) + .collect_vec() + .try_into() + .unwrap(), + &ids, + results.map(|res| res.to_vec()), + 0, + 0, + false, + test_placeholders.check_placeholder_inputs, + ) + .unwrap(), + row_pis: &row_pis, + original_tree_pis: &original_tree_pis, + }; + + let proof = run_circuit::(circuit); + } + + #[tokio::test] + async fn test_revelation_unproven_offset_circuit_no_distinct() { + test_revelation_unproven_offset_circuit(false).await + } + + #[tokio::test] + async fn test_revelation_unproven_offset_circuit_distinct() { + test_revelation_unproven_offset_circuit(true).await + } +} diff --git a/verifiable-db/src/revelation/revelation_without_results_tree.rs b/verifiable-db/src/revelation/revelation_without_results_tree.rs index 743d12d87..f041f0a02 100644 --- a/verifiable-db/src/revelation/revelation_without_results_tree.rs +++ b/verifiable-db/src/revelation/revelation_without_results_tree.rs @@ -6,9 +6,8 @@ use crate::{ computational_hash_ids::AggregationOperation, public_inputs::PublicInputs as QueryProofPublicInputs, }, - revelation::{placeholders_check::check_placeholders, PublicInputs}, + revelation::PublicInputs, }; -use alloy::primitives::U256; use anyhow::Result; use itertools::Itertools; use mp2_common::{ @@ -17,18 +16,15 @@ use mp2_common::{ poseidon::{flatten_poseidon_hash_target, H}, proof::ProofWithVK, public_inputs::PublicInputCommon, - serialization::{ - deserialize, deserialize_array, deserialize_long_array, serialize, serialize_array, - serialize_long_array, - }, + serialization::{deserialize, serialize}, types::CBuilder, - u256::{CircuitBuilderU256, UInt256Target, WitnessWriteU256}, + u256::{CircuitBuilderU256, UInt256Target}, utils::ToTargets, C, D, F, }; use plonky2::{ iop::{ - target::{BoolTarget, Target}, + target::Target, witness::{PartialWitness, WitnessWrite}, }, plonk::{ @@ -45,12 +41,9 @@ use recursion_framework::{ }, }; use serde::{Deserialize, Serialize}; -use std::array; use super::{ - placeholders_check::{ - CheckedPlaceholder, CheckedPlaceholderTarget, NUM_SECONDARY_INDEX_PLACEHOLDERS, - }, + placeholders_check::{CheckPlaceholderGadget, CheckPlaceholderInputWires}, NUM_PREPROCESSING_IO, NUM_QUERY_IO, PI_LEN as REVELATION_PI_LEN, }; @@ -65,28 +58,7 @@ pub struct RevelationWithoutResultsTreeWires< const PH: usize, const PP: usize, > { - #[serde( - serialize_with = "serialize_array", - deserialize_with = "deserialize_array" - )] - is_placeholder_valid: [BoolTarget; PH], - #[serde( - serialize_with = "serialize_array", - deserialize_with = "deserialize_array" - )] - placeholder_ids: [Target; PH], - #[serde( - serialize_with = "serialize_array", - deserialize_with = "deserialize_array" - )] - placeholder_values: [UInt256Target; PH], - #[serde( - serialize_with = "serialize_long_array", - deserialize_with = "deserialize_long_array" - )] - to_be_checked_placeholders: [CheckedPlaceholderTarget; PP], - secondary_query_bound_placeholders: - [CheckedPlaceholderTarget; NUM_SECONDARY_INDEX_PLACEHOLDERS], + check_placeholder: CheckPlaceholderInputWires, } #[derive(Clone, Debug, Serialize, Deserialize)] @@ -96,47 +68,7 @@ pub struct RevelationWithoutResultsTreeCircuit< const PH: usize, const PP: usize, > { - /// Real number of the valid placeholders - pub(crate) num_placeholders: usize, - /// Array of the placeholder identifiers that can be employed in the query: - /// - The first 4 items are expected to be constant identifiers of the query - /// bounds `MIN_I1, MAX_I1` and `MIN_I2, MAX_I2` - /// - The following `num_placeholders - 4` values are expected to be the - /// identifiers of the placeholders employed in the query - /// - The remaining `PH - num_placeholders` items are expected to be the - /// same as `placeholders_ids[0]` - #[serde( - serialize_with = "serialize_long_array", - deserialize_with = "deserialize_long_array" - )] - pub(crate) placeholder_ids: [F; PH], - /// Array of the placeholder values that can be employed in the query: - /// - The first 4 values are expected to be the bounds `MIN_I1, MAX_I1` and - /// `MIN_I2, MAX_I2` found in the query for the primary and secondary - /// indexed columns - /// - The following `num_placeholders - 4` values are expected to be the - /// values for the placeholders employed in the query - /// - The remaining `PH - num_placeholders` values are expected to be the - /// same as `placeholder_values[0]` - #[serde( - serialize_with = "serialize_long_array", - deserialize_with = "deserialize_long_array" - )] - pub(crate) placeholder_values: [U256; PH], - /// Placeholders data to be provided to `check_placeholder` gadget to - /// check that placeholders employed in universal query circuit matches - /// with the `placeholder_values` exposed as public input by this proof - #[serde( - serialize_with = "serialize_long_array", - deserialize_with = "deserialize_long_array" - )] - pub(crate) to_be_checked_placeholders: [CheckedPlaceholder; PP], - /// Placeholders data related to the placeholders employed in the - /// universal query circuit to hash the query bounds for the secondary - /// index; they are provided as well to `check_placeholder` gadget to - /// check the correctness of the placeholders employed for query bounds - pub(crate) secondary_query_bound_placeholders: - [CheckedPlaceholder; NUM_SECONDARY_INDEX_PLACEHOLDERS], + pub(crate) check_placeholder: CheckPlaceholderGadget, } impl @@ -154,15 +86,6 @@ where let zero = b.zero(); let u256_zero = b.zero_u256(); - let is_placeholder_valid = array::from_fn(|_| b.add_virtual_bool_target_safe()); - let placeholder_ids = b.add_virtual_target_arr(); - // `placeholder_values` are exposed as public inputs to the Solidity contract - // which will not do range-check. - let placeholder_values = array::from_fn(|_| b.add_virtual_u256()); - let to_be_checked_placeholders = array::from_fn(|_| CheckedPlaceholderTarget::new(b)); - let secondary_query_bound_placeholders = - array::from_fn(|_| CheckedPlaceholderTarget::new(b)); - // The operation cannot be ID for aggregation. let [op_avg, op_count] = [AggregationOperation::AvgOp, AggregationOperation::CountOp] .map(|op| b.constant(op.to_field())); @@ -207,15 +130,8 @@ where let final_placeholder_hash = b.hash_n_to_hash_no_pad::(inputs); // Check the placeholder data. - let (num_placeholders, placeholder_ids_hash) = check_placeholders( - b, - &is_placeholder_valid, - &placeholder_ids, - &placeholder_values, - &to_be_checked_placeholders, - &secondary_query_bound_placeholders, - &final_placeholder_hash, - ); + let check_placeholder_wires = + CheckPlaceholderGadget::::build(b, &final_placeholder_hash); // Check that the tree employed to build the queries is the same as the // tree constructed in pre-processing. @@ -230,13 +146,15 @@ where let inputs = query_proof .to_computational_hash_raw() .iter() - .chain(&placeholder_ids_hash.to_targets()) + .chain(&check_placeholder_wires.placeholder_id_hash.to_targets()) .chain(original_tree_proof.metadata_hash()) .cloned() .collect(); let computational_hash = b.hash_n_to_hash_no_pad::(inputs); - let placeholder_values_slice = placeholder_values + let placeholder_values_slice = check_placeholder_wires + .input_wires + .placeholder_values .iter() .flat_map(ToTargets::to_targets) .collect_vec(); @@ -252,7 +170,7 @@ where &flat_computational_hash, &placeholder_values_slice, &results_slice, - &[num_placeholders], + &[check_placeholder_wires.num_placeholders], // The aggregation query proof only has one result. &[num_results.target], &[query_proof.num_matching_rows_target()], @@ -265,11 +183,7 @@ where .register(b); RevelationWithoutResultsTreeWires { - is_placeholder_valid, - placeholder_ids, - placeholder_values, - to_be_checked_placeholders, - secondary_query_bound_placeholders, + check_placeholder: check_placeholder_wires.input_wires, } } @@ -278,30 +192,10 @@ where pw: &mut PartialWitness, wires: &RevelationWithoutResultsTreeWires, ) { - wires - .is_placeholder_valid - .iter() - .enumerate() - .for_each(|(i, t)| pw.set_bool_target(*t, i < self.num_placeholders)); - pw.set_target_arr(&wires.placeholder_ids, &self.placeholder_ids); - wires - .placeholder_values - .iter() - .zip(self.placeholder_values) - .for_each(|(t, v)| pw.set_u256_target(t, v)); - wires - .to_be_checked_placeholders - .iter() - .zip(&self.to_be_checked_placeholders) - .for_each(|(t, v)| v.assign(pw, t)); - wires - .secondary_query_bound_placeholders - .iter() - .zip(&self.secondary_query_bound_placeholders) - .for_each(|(t, v)| v.assign(pw, t)); + self.check_placeholder.assign(pw, &wires.check_placeholder); } } - +#[derive(Clone, Debug)] pub struct CircuitBuilderParams { pub(crate) query_circuit_set: RecursiveCircuits, pub(crate) preprocessing_circuit_set: RecursiveCircuits, @@ -393,6 +287,7 @@ mod tests { random_original_tree_proof, }, }; + use alloy::primitives::U256; use mp2_common::{poseidon::flatten_poseidon_hash_value, utils::ToFields, C, D}; use mp2_test::circuit::{run_circuit, UserCircuit}; use plonky2::{field::types::Field, plonk::config::Hasher}; @@ -415,12 +310,7 @@ mod tests { impl From<&TestPlaceholders> for RevelationWithoutResultsTreeCircuit { fn from(test_placeholders: &TestPlaceholders) -> Self { Self { - num_placeholders: test_placeholders.num_placeholders, - placeholder_ids: test_placeholders.placeholder_ids, - placeholder_values: test_placeholders.placeholder_values, - to_be_checked_placeholders: test_placeholders.to_be_checked_placeholders, - secondary_query_bound_placeholders: test_placeholders - .secondary_query_bound_placeholders, + check_placeholder: test_placeholders.check_placeholder_inputs.clone(), } } } @@ -547,12 +437,17 @@ mod tests { // Number of placeholders assert_eq!( pi.num_placeholders(), - test_placeholders.num_placeholders.to_field() + test_placeholders + .check_placeholder_inputs + .num_placeholders + .to_field() ); // Placeholder values assert_eq!( pi.placeholder_values(), - test_placeholders.placeholder_values + test_placeholders + .check_placeholder_inputs + .placeholder_values ); // Entry count assert_eq!(pi.entry_count(), entry_count); @@ -614,7 +509,7 @@ mod tests { fn test_revelation_without_results_tree_for_no_op_avg_with_no_entries() { // Initialize the all operations to SUM or COUNT (not AVG). let mut rng = thread_rng(); - let ops = array::from_fn(|_| { + let ops = std::array::from_fn(|_| { [AggregationOperation::SumOp, AggregationOperation::CountOp] .choose(&mut rng) .unwrap() diff --git a/verifiable-db/src/row_tree/full_node.rs b/verifiable-db/src/row_tree/full_node.rs index fd4ad588d..5a4ae96c0 100644 --- a/verifiable-db/src/row_tree/full_node.rs +++ b/verifiable-db/src/row_tree/full_node.rs @@ -1,12 +1,7 @@ use derive_more::{From, Into}; use mp2_common::{ - default_config, - poseidon::H, - proof::ProofWithVK, - public_inputs::PublicInputCommon, - u256::CircuitBuilderU256, - utils::ToTargets, - C, D, F, + default_config, poseidon::H, proof::ProofWithVK, public_inputs::PublicInputCommon, + u256::CircuitBuilderU256, utils::ToTargets, C, D, F, }; use plonky2::{ iop::{target::Target, witness::PartialWitness}, diff --git a/verifiable-db/src/row_tree/leaf.rs b/verifiable-db/src/row_tree/leaf.rs index d791c8c91..f28c23646 100644 --- a/verifiable-db/src/row_tree/leaf.rs +++ b/verifiable-db/src/row_tree/leaf.rs @@ -8,10 +8,7 @@ use mp2_common::{ C, D, F, }; use plonky2::{ - iop::{ - target::Target, - witness::PartialWitness, - }, + iop::{target::Target, witness::PartialWitness}, plonk::{circuit_builder::CircuitBuilder, proof::ProofWithPublicInputsTarget}, }; use recursion_framework::{ diff --git a/verifiable-db/src/row_tree/mod.rs b/verifiable-db/src/row_tree/mod.rs index bd209fe19..c76daa172 100644 --- a/verifiable-db/src/row_tree/mod.rs +++ b/verifiable-db/src/row_tree/mod.rs @@ -1,4 +1,3 @@ - mod api; mod full_node; mod leaf; diff --git a/verifiable-db/src/test_utils.rs b/verifiable-db/src/test_utils.rs index 4d5ae616a..881ba427b 100644 --- a/verifiable-db/src/test_utils.rs +++ b/verifiable-db/src/test_utils.rs @@ -47,6 +47,8 @@ pub const MAX_NUM_PLACEHOLDERS: usize = 14; pub const MAX_NUM_COLUMNS: usize = 20; pub const MAX_NUM_PREDICATE_OPS: usize = 20; pub const MAX_NUM_RESULT_OPS: usize = 20; +pub const ROW_TREE_MAX_DEPTH: usize = 10; +pub const INDEX_TREE_MAX_DEPTH: usize = 15; pub const NUM_COLUMNS: usize = 4; /// Generate a random original tree proof for testing. @@ -210,7 +212,8 @@ impl TestRevelationData { result_operations, output_items, aggregation_ops, - ); + ) + .unwrap(); let placeholders = Placeholders::from(( placeholder_ids .into_iter()