From 45fa3238d00ecbbbebaa382186cc767bb5c4cbeb Mon Sep 17 00:00:00 2001 From: lonerapier Date: Wed, 25 Sep 2024 16:59:30 +0530 Subject: [PATCH 01/10] remove slice --- circuits/test/utils/array.test.ts | 30 ------------------------------ circuits/utils/array.circom | 29 ----------------------------- circuits/utils/hash.circom | 10 ++++++++-- 3 files changed, 8 insertions(+), 61 deletions(-) diff --git a/circuits/test/utils/array.test.ts b/circuits/test/utils/array.test.ts index c951452..02b2fce 100644 --- a/circuits/test/utils/array.test.ts +++ b/circuits/test/utils/array.test.ts @@ -1,34 +1,4 @@ import { circomkit, WitnessTester } from "../common"; - -describe("array", () => { - describe("Slice", () => { - let circuit: WitnessTester<["in"], ["out"]>; - before(async () => { - circuit = await circomkit.WitnessTester(`Slice`, { - file: "utils/array", - template: "Slice", - params: [10, 2, 4], - }); - console.log("#constraints:", await circuit.getConstraintCount()); - }); - - it("witness: [random*10], start: 2, end: 4", async () => { - const input = Array.from({ length: 10 }, () => Math.floor(Math.random() * 256)); - await circuit.expectPass( - { in: input }, - { out: input.slice(2, 4) } - ); - }); - - it("witness: [random*9], start: 2, end: 4", async () => { - const input = Array.from({ length: 9 }, () => Math.floor(Math.random() * 256)); - await circuit.expectFail( - { in: input }, - ); - }); - }); -}); - describe("IsEqualArray", () => { let circuit: WitnessTester<["in"], ["out"]>; before(async () => { diff --git a/circuits/utils/array.circom b/circuits/utils/array.circom index 6a9ae48..ff45809 100644 --- a/circuits/utils/array.circom +++ b/circuits/utils/array.circom @@ -2,35 +2,6 @@ pragma circom 2.1.9; include "circomlib/circuits/comparators.circom"; -/// Extract a fixed portion of an array -/// -/// # Note -/// Unlike SelectSubArray, Slice uses compile-time known indices and doesn't pad the output. -/// Slice is more efficient for fixed ranges, while SelectSubArray offers runtime flexibility -/// -/// # Parameters -/// - `n`: The length of the input array -/// - `start`: The starting index of the slice (inclusive) -/// - `end`: The ending index of the slice (exclusive) -/// -/// # Inputs -/// - `in`: The input array of length n -/// -/// # Output -/// - `out`: The sliced array of length (end - start) -template Slice(n, start, end) { - assert(n >= end); - assert(start >= 0); - assert(end >= start); - - signal input in[n]; - signal output out[end - start]; - - for (var i = start; i < end; i++) { - out[i - start] <== in[i]; - } -} - /* This template is an indicator for two equal array inputs. diff --git a/circuits/utils/hash.circom b/circuits/utils/hash.circom index 3d0ef40..e2e1823 100644 --- a/circuits/utils/hash.circom +++ b/circuits/utils/hash.circom @@ -36,10 +36,16 @@ template PoseidonModular(numElements) { if (end > numElements) { // last chunk end = numElements; - var last_chunk[last_chunk_size] = Slice(numElements, start, end)(in); + var last_chunk[last_chunk_size]; + for (var i=start ; i Date: Wed, 25 Sep 2024 22:10:33 +0530 Subject: [PATCH 02/10] remove syntax --- circuits/json/parser/machine.circom | 19 ++++++++----------- src/codegen/http.rs | 5 +---- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/circuits/json/parser/machine.circom b/circuits/json/parser/machine.circom index 78a38c6..21c8a87 100644 --- a/circuits/json/parser/machine.circom +++ b/circuits/json/parser/machine.circom @@ -56,29 +56,28 @@ template StateUpdate(MAX_STACK_HEIGHT) { signal output next_parsing_string; signal output next_parsing_number; - component Syntax = Syntax(); component Command = Command(); //--------------------------------------------------------------------------------------------// // Break down what was read // * read in a start brace `{` * component readStartBrace = IsEqual(); - readStartBrace.in <== [byte, Syntax.START_BRACE]; + readStartBrace.in <== [byte, 123]; // * read in an end brace `}` * component readEndBrace = IsEqual(); - readEndBrace.in <== [byte, Syntax.END_BRACE]; + readEndBrace.in <== [byte, 125]; // * read in a start bracket `[` * component readStartBracket = IsEqual(); - readStartBracket.in <== [byte, Syntax.START_BRACKET]; + readStartBracket.in <== [byte, 91]; // * read in an end bracket `]` * component readEndBracket = IsEqual(); - readEndBracket.in <== [byte, Syntax.END_BRACKET]; + readEndBracket.in <== [byte, 93]; // * read in a colon `:` * component readColon = IsEqual(); - readColon.in <== [byte, Syntax.COLON]; + readColon.in <== [byte, 58]; // * read in a comma `,` * component readComma = IsEqual(); - readComma.in <== [byte, Syntax.COMMA]; + readComma.in <== [byte, 44]; // * read in some delimeter * signal readDelimeter <== readStartBrace.out + readEndBrace.out + readStartBracket.out + readEndBracket.out + readColon.out + readComma.out; @@ -88,7 +87,7 @@ template StateUpdate(MAX_STACK_HEIGHT) { readNumber.range <== [48, 57]; // This is the range where ASCII digits are // * read in a quote `"` * component readQuote = IsEqual(); - readQuote.in <== [byte, Syntax.QUOTE]; + readQuote.in <== [byte, 34]; component readOther = IsZero(); readOther.in <== readDelimeter + readNumber.out + readQuote.out; //--------------------------------------------------------------------------------------------// @@ -240,6 +239,7 @@ template GetTopOfStack(n) { atTop.vals[i] <== stack[i]; } atTop.case <== selector; + _ <== atTop.match; value <== atTop.out; pointer <== selector; } @@ -283,9 +283,6 @@ template RewriteStack(n) { signal pointer <== topOfStack.pointer; signal current_value[2] <== topOfStack.value; // * check if we are currently in a value of an object * - component inObjectValue = IsEqualArray(2); - inObjectValue.in[0] <== current_value; - inObjectValue.in[1] <== [1,1]; // * check if value indicates currently in an array * component inArray = IsEqual(); inArray.in[0] <== current_value[0]; diff --git a/src/codegen/http.rs b/src/codegen/http.rs index 53e9535..588ff3a 100644 --- a/src/codegen/http.rs +++ b/src/codegen/http.rs @@ -3,7 +3,7 @@ use regex::Regex; use serde::{Deserialize, Serialize}; use std::{ - collections::{BTreeMap, HashMap}, + collections::BTreeMap, error::Error, fs::{self, create_dir_all}, path::Path, @@ -118,10 +118,8 @@ impl HttpData { let mut headers_map = BTreeMap::::new(); let re = Regex::new(r":\s+").unwrap(); for &header in headers { - println!("header: {:?}", header); let key_value: Vec<&str> = re.split(header).collect(); assert_eq!(key_value.len(), 2); - println!("key: {:?}", key_value); headers_map.insert(key_value[0].to_string(), key_value[1].to_string()); } @@ -181,7 +179,6 @@ impl HttpData { input_file: &Path, codegen_filename: &str, ) -> Result> { - println!("input_ifle: {:?}", input_file); let input = FileType::Http.read_input(input_file)?; let circuit_template_name = match self { From 27e1384e7881b8aa2e6ff6a674153c0ecada372b Mon Sep 17 00:00:00 2001 From: lonerapier Date: Wed, 25 Sep 2024 23:14:42 +0530 Subject: [PATCH 03/10] feat: json improvements --- circuits/json/interpreter.circom | 40 ++--- .../test/json/extractor/interpreter.test.ts | 31 ++-- docs/json.md | 4 +- src/codegen/json.rs | 153 +++++++++++++----- 4 files changed, 152 insertions(+), 76 deletions(-) diff --git a/circuits/json/interpreter.circom b/circuits/json/interpreter.circom index 762a062..a1bfa20 100644 --- a/circuits/json/interpreter.circom +++ b/circuits/json/interpreter.circom @@ -34,6 +34,7 @@ template InsideKey(n) { component topOfStack = GetTopOfStack(n); topOfStack.stack <== stack; + _ <== topOfStack.pointer; signal currentVal[2] <== topOfStack.value; signal parsingStringAndNotNumber <== parsing_string * (1 - parsing_number); @@ -54,7 +55,7 @@ template InsideKey(n) { /// /// # Output /// - `out`: Returns `1` if current byte is inside a value -template InsideValue(n) { +template InsideValueAtTop(n) { signal input stack[n][2]; signal input parsing_string; signal input parsing_number; @@ -85,14 +86,14 @@ template InsideValue(n) { /// /// # Output /// - `out`: Returns `1` if current byte is inside a value -template InsideValueAtDepth(n, depth) { - signal input stack[n][2]; +template InsideValue() { + signal input stack[2]; signal input parsing_string; signal input parsing_number; signal output out; - signal ifParsingValue <== stack[depth][0] * stack[depth][1]; + signal ifParsingValue <== stack[0] * stack[1]; signal parsingStringXORNumber <== XOR()(parsing_string, parsing_number); out <== ifParsingValue * parsingStringXORNumber; @@ -111,7 +112,7 @@ template InsideValueAtDepth(n, depth) { /// /// # Output /// - `out`: Returns `1` if current byte represents an array element at `index` -template InsideArrayIndex(n, index) { +template InsideArrayIndexAtTop(n, index) { signal input stack[n][2]; signal input parsing_string; signal input parsing_number; @@ -144,15 +145,15 @@ template InsideArrayIndex(n, index) { /// /// # Output /// - `out`: Returns `1` if current byte is inside an array index -template InsideArrayIndexAtDepth(n, index, depth) { - signal input stack[n][2]; +template InsideArrayIndex(index) { + signal input stack[2]; signal input parsing_string; signal input parsing_number; signal output out; - signal insideArray <== IsEqual()([stack[depth][0], 2]); - signal insideIndex <== IsEqual()([stack[depth][1], index]); + signal insideArray <== IsEqual()([stack[0], 2]); + signal insideIndex <== IsEqual()([stack[1], index]); signal insideArrayIndex <== insideArray * insideIndex; out <== insideArrayIndex * (parsing_string + parsing_number); } @@ -215,10 +216,10 @@ template NextKVPairAtDepth(n, depth) { signal isNextPair <== IsEqualArray(2)([currentVal, [1, 0]]); - // `, -> 44` - component syntax = Syntax(); - signal isComma <== IsEqual()([currByte, syntax.COMMA]); + // `,` -> 44 + signal isComma <== IsEqual()([currByte, 44]); // pointer <= depth + // TODO: `LessThan` circuit warning signal atLessDepth <== LessEqThan(logMaxDepth)([pointer-1, depth]); // current depth is less than key depth signal isCommaAtDepthLessThanCurrent <== isComma * atLessDepth; @@ -248,13 +249,12 @@ template KeyMatch(dataLen, keyLen) { signal input index; signal input parsing_key; - component syntax = Syntax(); - + // `"` -> 34 signal end_of_key <== IndexSelector(dataLen)(data, index + keyLen); - signal is_end_of_key_equal_to_quote <== IsEqual()([end_of_key, syntax.QUOTE]); + signal is_end_of_key_equal_to_quote <== IsEqual()([end_of_key, 34]); signal start_of_key <== IndexSelector(dataLen)(data, index - 1); - signal is_start_of_key_equal_to_quote <== IsEqual()([start_of_key, syntax.QUOTE]); + signal is_start_of_key_equal_to_quote <== IsEqual()([start_of_key, 34]); signal substring_match <== SubstringMatchWithIndex(dataLen, keyLen)(data, key, r, index); @@ -293,16 +293,17 @@ template KeyMatchAtDepth(dataLen, n, keyLen, depth) { component topOfStack = GetTopOfStack(n); topOfStack.stack <== stack; signal pointer <== topOfStack.pointer; + _ <== topOfStack.value; - component syntax = Syntax(); + // `"` -> 34 // end of key equals `"` signal end_of_key <== IndexSelector(dataLen)(data, index + keyLen); - signal is_end_of_key_equal_to_quote <== IsEqual()([end_of_key, syntax.QUOTE]); + signal is_end_of_key_equal_to_quote <== IsEqual()([end_of_key, 34]); // start of key equals `"` signal start_of_key <== IndexSelector(dataLen)(data, index - 1); - signal is_start_of_key_equal_to_quote <== IsEqual()([start_of_key, syntax.QUOTE]); + signal is_start_of_key_equal_to_quote <== IsEqual()([start_of_key, 34]); // key matches signal substring_match <== SubstringMatchWithIndex(dataLen, keyLen)(data, key, r, index); @@ -316,7 +317,6 @@ template KeyMatchAtDepth(dataLen, n, keyLen, depth) { signal is_key_at_depth <== IsEqual()([pointer-1, depth]); signal is_parsing_correct_key_at_depth <== is_parsing_correct_key * is_key_at_depth; - // log("key match", index, end_of_key, is_end_of_key_equal_to_quote, substring_match); signal output out <== substring_match * is_parsing_correct_key_at_depth; } \ No newline at end of file diff --git a/circuits/test/json/extractor/interpreter.test.ts b/circuits/test/json/extractor/interpreter.test.ts index a4e1b1f..43e1762 100644 --- a/circuits/test/json/extractor/interpreter.test.ts +++ b/circuits/test/json/extractor/interpreter.test.ts @@ -41,13 +41,13 @@ describe("Interpreter", async () => { generatePassCase(input5, { out: 0 }, "parsing number as a key"); }); - describe("InsideValue", async () => { + describe("InsideValueAtTop", async () => { let circuit: WitnessTester<["stack", "parsing_string", "parsing_number"], ["out"]>; before(async () => { - circuit = await circomkit.WitnessTester(`InsideValue`, { + circuit = await circomkit.WitnessTester(`InsideValueAtTop`, { file: "json/interpreter", - template: "InsideValue", + template: "InsideValueAtTop", params: [4], }); console.log("#constraints:", await circuit.getConstraintCount()); @@ -80,20 +80,21 @@ describe("Interpreter", async () => { generatePassCase(input5, { out: 0 }, "parsing number and key both"); }); - describe("InsideValueAtDepth", async () => { + describe("InsideValue", async () => { let circuit: WitnessTester<["stack", "parsing_string", "parsing_number"], ["out"]>; function generatePassCase(input: any, expected: any, depth: number, desc: string) { const description = generateDescription(input); it(`(valid) witness: ${description} ${desc}`, async () => { - circuit = await circomkit.WitnessTester(`InsideValueAtDepth`, { + circuit = await circomkit.WitnessTester(`InsideValue`, { file: "json/interpreter", - template: "InsideValueAtDepth", - params: [4, depth], + template: "InsideValue", }); console.log("#constraints:", await circuit.getConstraintCount()); + input.stack = input.stack[depth]; + await circuit.expectPass(input, expected); }); } @@ -117,16 +118,16 @@ describe("Interpreter", async () => { generatePassCase(input5, { out: 0 }, 3, "parsing number and key both"); }); - describe("InsideArrayIndex", async () => { + describe("InsideArrayIndexAtTop", async () => { let circuit: WitnessTester<["stack", "parsing_string", "parsing_number"], ["out"]>; function generatePassCase(input: any, expected: any, index: number, desc: string) { const description = generateDescription(input); it(`(valid) witness: ${description} ${desc}`, async () => { - circuit = await circomkit.WitnessTester(`InsideArrayIndex`, { + circuit = await circomkit.WitnessTester(`InsideArrayIndexAtTop`, { file: "json/interpreter", - template: "InsideArrayIndex", + template: "InsideArrayIndexAtTop", params: [4, index], }); console.log("#constraints:", await circuit.getConstraintCount()); @@ -157,20 +158,22 @@ describe("Interpreter", async () => { generatePassCase(input6, { out: 0 }, 3, "incorrect index"); }); - describe("InsideArrayIndexAtDepth", async () => { + describe("InsideArrayIndex", async () => { let circuit: WitnessTester<["stack", "parsing_string", "parsing_number"], ["out"]>; function generatePassCase(input: any, expected: any, index: number, depth: number, desc: string) { const description = generateDescription(input); it(`(valid) witness: ${description} ${desc}`, async () => { - circuit = await circomkit.WitnessTester(`InsideArrayIndexAtDepth`, { + circuit = await circomkit.WitnessTester(`InsideArrayIndex`, { file: "json/interpreter", - template: "InsideArrayIndexAtDepth", - params: [4, index, depth], + template: "InsideArrayIndex", + params: [index], }); console.log("#constraints:", await circuit.getConstraintCount()); + input.stack = input.stack[depth] + await circuit.expectPass(input, expected); }); } diff --git a/docs/json.md b/docs/json.md index 3f77d7c..b793951 100644 --- a/docs/json.md +++ b/docs/json.md @@ -117,8 +117,8 @@ Let's deep dive into interpreter and extractor. ## Interpreter Interpreter builds high-level circuits on top of stack to understand state better. It provides following templates: - `InsideKey` -- `InsideValue` & `InsideValueAtDepth` -- `InsideArrayIndex` & `InsideArrayIndexAtDepth` +- `InsideValueAtTop` & `InsideValue` +- `InsideArrayIndexAtTop` & `InsideArrayIndex` - `NextKVPair` & `NextKVPairAtDepth` - `KeyMatch` & `KeyMatchAtDepth` diff --git a/src/codegen/json.rs b/src/codegen/json.rs index 5f99cad..19b9412 100644 --- a/src/codegen/json.rs +++ b/src/codegen/json.rs @@ -208,11 +208,11 @@ fn extract_string( } *circuit_buffer += r#" - value <== SelectSubArray(DATA_BYTES, maxValueLen)(data, value_starting_index[DATA_BYTES-2]+1, maxValueLen);"#; + value <== SelectSubArray(DATA_BYTES, maxValueLen)(data, value_starting_index[DATA_BYTES-1]+1, maxValueLen);"#; if debug { *circuit_buffer += r#" - log("value_starting_index", value_starting_index[DATA_BYTES-2]); + log("value_starting_index", value_starting_index[DATA_BYTES-1]+1); for (var i=0 ; i circuit_buffer += &format!(" signal is_key{}_match[DATA_BYTES];\n signal is_key{}_match_for_value[DATA_BYTES];\n is_key{}_match_for_value[0] <== 0;\n signal is_next_pair_at_depth{}[DATA_BYTES];\n", i+1, i+1, i+1, i+1), + Key::String(_) => circuit_buffer += &format!(" signal is_key{}_match[DATA_BYTES];\n signal is_key{}_match_for_value[DATA_BYTES+1];\n is_key{}_match_for_value[0] <== 0;\n signal is_next_pair_at_depth{}[DATA_BYTES];\n", i+1, i+1, i+1, i+1), Key::Num(_) => (), } } } + let mut num_objects = 0; + + // initialise first iteration + { + // parsing_key and parsing_object{i}_value + circuit_buffer += r#" + // initialise first iteration + parsing_key[0] <== InsideKey(MAX_STACK_HEIGHT)(State[0].next_stack, State[0].next_parsing_string, State[0].next_parsing_number); + +"#; + + for (i, key) in data.keys.iter().enumerate() { + match key { + Key::String(_) => { + circuit_buffer += &format!(" parsing_object{}_value[0] <== InsideValue()(State[0].next_stack[0], State[0].next_parsing_string, State[0].next_parsing_number);\n", i+1); + } + Key::Num(_) => { + circuit_buffer += &format!(" parsing_array{}[0] <== InsideArrayIndex(index{})(State[0].next_stack[0], State[0].next_parsing_string, State[0].next_parsing_number);\n", i+1, i+1); + } + } + } + + // parsing_value[0] <== MultiAND(5)([parsing_object1_value[0], parsing_object2_value[0], parsing_array3[0], parsing_object4_value[0], parsing_object5_value[0]]); + circuit_buffer += &format!( + " // parsing correct value = AND(all individual stack values)\n parsing_value[0] <== MultiAND({})([", + data.keys.len() + ); + for (i, key) in data.keys.iter().take(data.keys.len() - 1).enumerate() { + match key { + Key::String(_) => circuit_buffer += &format!("parsing_object{}_value[0], ", i + 1), + Key::Num(_) => circuit_buffer += &format!("parsing_array{}[0], ", i + 1), + } + } + match data.keys[data.keys.len() - 1] { + Key::String(_) => { + circuit_buffer += &format!("parsing_object{}_value[0]]);\n\n", data.keys.len()) + } + Key::Num(_) => circuit_buffer += &format!("parsing_array{}[0]]);\n\n", data.keys.len()), + } + + // is_key{i}_match_for_value + for (i, key) in data.keys.iter().enumerate() { + match key { + Key::String(_) => { + num_objects += 1; + circuit_buffer += &format!(" is_key{}_match[0] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen{}, depth{})(data, key{}, r, 0, parsing_key[0], State[0].next_stack);\n", i+1, i+1, i+1, i+1); + circuit_buffer += &format!(" is_next_pair_at_depth{}[0] <== NextKVPairAtDepth(MAX_STACK_HEIGHT, depth{})(State[0].next_stack, data[0]);\n", i+1, i+1); + circuit_buffer += &format!(" is_key{}_match_for_value[1] <== Mux1()([is_key{}_match_for_value[0] * (1-is_next_pair_at_depth{}[0]), is_key{}_match[0] * (1-is_next_pair_at_depth{}[0])], is_key{}_match[0]);\n", i+1, i+1, i+1, i+1, i+1, i+1); + if debug { + circuit_buffer += &format!(" // log(\"is_key{}_match_for_value\", is_key{}_match_for_value[1]);\n\n", i + 1, i + 1); + } + } + Key::Num(_) => (), + } + } + + // is_value_match[data_idx] <== MultiAND(2)([is_key1_match_for_value[data_idx], is_key3_match_for_value[data_idx]]); + { + circuit_buffer += &format!(" is_value_match[0] <== MultiAND({})([", num_objects); + for (i, key) in data.keys.iter().enumerate() { + match key { + Key::String(_) => { + circuit_buffer += &format!("is_key{}_match_for_value[1], ", i + 1) + } + Key::Num(_) => (), + } + } + + // remove last 2 chars `, ` from string buffer + circuit_buffer.pop(); + circuit_buffer.pop(); + circuit_buffer += "]);\n"; + } + + circuit_buffer += r#" + mask[0] <== parsing_value[0] * is_value_match[0]; +"#; + } + // debugging circuit_buffer += r#" - signal is_value_match[DATA_BYTES]; - is_value_match[0] <== 0; - signal value_mask[DATA_BYTES]; - for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) {"#; if debug { @@ -455,22 +530,22 @@ fn build_json_circuit( // - mask // check if inside key or not - parsing_key[data_idx-1] <== InsideKey(MAX_STACK_HEIGHT)(State[data_idx].stack, State[data_idx].parsing_string, State[data_idx].parsing_number); + parsing_key[data_idx] <== InsideKey(MAX_STACK_HEIGHT)(State[data_idx].next_stack, State[data_idx].next_parsing_string, State[data_idx].next_parsing_number); "#; /* Determining wheter parsing correct value and array index - parsing_object1_value[data_idx-1] <== InsideValueAtDepth(MAX_STACK_HEIGHT, depth1)(State[data_idx].stack, State[data_idx].parsing_string, State[data_idx].parsing_number); - parsing_array2[data_idx-1] <== InsideArrayIndexAtDepth(MAX_STACK_HEIGHT, index2, depth2)(State[data_idx].stack, State[data_idx].parsing_string, State[data_idx].parsing_number); + parsing_object1_value[data_idx-1] <== InsideValue(MAX_STACK_HEIGHT, depth1)(State[data_idx].stack, State[data_idx].parsing_string, State[data_idx].parsing_number); + parsing_array2[data_idx-1] <== InsideArrayIndex(MAX_STACK_HEIGHT, index2, depth2)(State[data_idx].stack, State[data_idx].parsing_string, State[data_idx].parsing_number); */ { for (i, key) in data.keys.iter().enumerate() { match key { Key::String(_) => { - circuit_buffer += &format!(" parsing_object{}_value[data_idx-1] <== InsideValueAtDepth(MAX_STACK_HEIGHT, depth{})(State[data_idx].stack, State[data_idx].parsing_string, State[data_idx].parsing_number);\n", i+1, i+1); + circuit_buffer += &format!(" parsing_object{}_value[data_idx] <== InsideValue()(State[data_idx].next_stack[depth{}], State[data_idx].next_parsing_string, State[data_idx].next_parsing_number);\n", i+1, i+1); } Key::Num(_) => { - circuit_buffer += &format!(" parsing_array{}[data_idx-1] <== InsideArrayIndexAtDepth(MAX_STACK_HEIGHT, index{}, depth{})(State[data_idx].stack, State[data_idx].parsing_string, State[data_idx].parsing_number);\n", i+1, i+1, i+1); + circuit_buffer += &format!(" parsing_array{}[data_idx] <== InsideArrayIndex(index{})(State[data_idx].next_stack[depth{}], State[data_idx].next_parsing_string, State[data_idx].next_parsing_number);\n", i+1, i+1, i+1); } } } @@ -480,25 +555,24 @@ fn build_json_circuit( // parsing_value[data_idx-1] <== MultiAND(4)([parsing_object1_value[data_idx-1], parsing_array2[data_idx-1], parsing_object3_value[data_idx-1], parsing_array4[data_idx-1]]); { circuit_buffer += &format!( - " // parsing correct value = AND(all individual stack values)\n parsing_value[data_idx-1] <== MultiAND({})([", - data.keys.len() - ); + " // parsing correct value = AND(all individual stack values)\n parsing_value[data_idx] <== MultiAND({})([", + data.keys.len() + ); for (i, key) in data.keys.iter().take(data.keys.len() - 1).enumerate() { match key { Key::String(_) => { - circuit_buffer += &format!("parsing_object{}_value[data_idx-1], ", i + 1) + circuit_buffer += &format!("parsing_object{}_value[data_idx], ", i + 1) } - Key::Num(_) => circuit_buffer += &format!("parsing_array{}[data_idx-1], ", i + 1), + Key::Num(_) => circuit_buffer += &format!("parsing_array{}[data_idx], ", i + 1), } } match data.keys[data.keys.len() - 1] { Key::String(_) => { - circuit_buffer += - &format!("parsing_object{}_value[data_idx-1]]);\n", data.keys.len()) + circuit_buffer += &format!("parsing_object{}_value[data_idx]]);\n", data.keys.len()) } Key::Num(_) => { - circuit_buffer += &format!("parsing_array{}[data_idx-1]]);\n", data.keys.len()) + circuit_buffer += &format!("parsing_array{}[data_idx]]);\n", data.keys.len()) } } @@ -508,14 +582,12 @@ fn build_json_circuit( for (i, key) in data.keys.iter().enumerate() { match key { Key::String(_) => { - circuit_buffer += &format!("parsing_object{}_value[data_idx-1], ", i + 1) - } - Key::Num(_) => { - circuit_buffer += &format!("parsing_array{}[data_idx-1], ", i + 1) + circuit_buffer += &format!("parsing_object{}_value[data_idx], ", i + 1) } + Key::Num(_) => circuit_buffer += &format!("parsing_array{}[data_idx], ", i + 1), } } - circuit_buffer += "parsing_value[data_idx-1]);\n\n"; + circuit_buffer += "parsing_value[data_idx]);\n\n"; } } @@ -542,11 +614,11 @@ fn build_json_circuit( match key { Key::String(_) => { num_objects += 1; - circuit_buffer += &format!(" is_key{}_match[data_idx-1] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen{}, depth{})(data, key{}, r, data_idx-1, parsing_key[data_idx-1], State[data_idx].stack);\n", i+1, i+1, i+1, i+1); - circuit_buffer += &format!(" is_next_pair_at_depth{}[data_idx-1] <== NextKVPairAtDepth(MAX_STACK_HEIGHT, depth{})(State[data_idx].stack, data[data_idx-1]);\n", i+1, i+1); - circuit_buffer += &format!(" is_key{}_match_for_value[data_idx] <== Mux1()([is_key{}_match_for_value[data_idx-1] * (1-is_next_pair_at_depth{}[data_idx-1]), is_key{}_match[data_idx-1] * (1-is_next_pair_at_depth{}[data_idx-1])], is_key{}_match[data_idx-1]);\n", i+1, i+1, i+1, i+1, i+1, i+1); + circuit_buffer += &format!(" is_key{}_match[data_idx] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen{}, depth{})(data, key{}, r, data_idx, parsing_key[data_idx], State[data_idx].next_stack);\n", i+1, i+1, i+1, i+1); + circuit_buffer += &format!(" is_next_pair_at_depth{}[data_idx] <== NextKVPairAtDepth(MAX_STACK_HEIGHT, depth{})(State[data_idx].next_stack, data[data_idx]);\n", i+1, i+1); + circuit_buffer += &format!(" is_key{}_match_for_value[data_idx+1] <== Mux1()([is_key{}_match_for_value[data_idx] * (1-is_next_pair_at_depth{}[data_idx]), is_key{}_match[data_idx] * (1-is_next_pair_at_depth{}[data_idx])], is_key{}_match[data_idx]);\n", i+1, i+1, i+1, i+1, i+1, i+1); if debug { - circuit_buffer += &format!(" // log(\"is_key{}_match_for_value\", is_key{}_match_for_value[data_idx]);\n\n", i + 1, i + 1); + circuit_buffer += &format!(" // log(\"is_key{}_match_for_value\", is_key{}_match_for_value[data_idx+1]);\n\n", i + 1, i + 1); } } Key::Num(_) => (), @@ -563,7 +635,7 @@ fn build_json_circuit( for (i, key) in data.keys.iter().enumerate() { match key { Key::String(_) => { - circuit_buffer += &format!("is_key{}_match_for_value[data_idx], ", i + 1) + circuit_buffer += &format!("is_key{}_match_for_value[data_idx+1], ", i + 1) } Key::Num(_) => (), } @@ -578,9 +650,8 @@ fn build_json_circuit( // debugging and output bytes { circuit_buffer += r#" - // mask[i] = data[i] * parsing_value[i] * is_key_match_for_value[i] - value_mask[data_idx-1] <== data[data_idx-1] * parsing_value[data_idx-1]; - mask[data_idx-1] <== value_mask[data_idx-1] * is_value_match[data_idx]; + // mask = currently parsing value and all subsequent keys matched + mask[data_idx] <== parsing_value[data_idx] * is_value_match[data_idx]; }"#; // Debugging @@ -597,11 +668,13 @@ fn build_json_circuit( circuit_buffer += r#" + // find starting index of value in data by matching mask signal is_zero_mask[DATA_BYTES]; signal is_prev_starting_index[DATA_BYTES]; value_starting_index[0] <== 0; + is_prev_starting_index[0] <== 0; is_zero_mask[0] <== IsZero()(mask[0]); - for (var i=1 ; i Date: Thu, 26 Sep 2024 10:24:38 +0530 Subject: [PATCH 04/10] feat(http): remove syntax use --- circuits/http/interpreter.circom | 19 ++++++------------- circuits/http/parser/machine.circom | 12 +++++------- 2 files changed, 11 insertions(+), 20 deletions(-) diff --git a/circuits/http/interpreter.circom b/circuits/http/interpreter.circom index 0cd6a2e..ddcd52b 100644 --- a/circuits/http/interpreter.circom +++ b/circuits/http/interpreter.circom @@ -34,10 +34,9 @@ template MethodMatch(dataLen, methodLen) { signal input data[dataLen]; signal input method[methodLen]; - signal input r; signal input index; - signal isMatch <== SubstringMatchWithIndex(dataLen, methodLen)(data, method, r, index); + signal isMatch <== SubstringMatchWithIndex(dataLen, methodLen)(data, method, index); isMatch === 1; } @@ -46,24 +45,21 @@ template HeaderFieldNameValueMatch(dataLen, nameLen, valueLen) { signal input data[dataLen]; signal input headerName[nameLen]; signal input headerValue[valueLen]; - signal input r; signal input index; - component syntax = HttpSyntax(); - // signal output value[valueLen]; // is name matches - signal headerNameMatch <== SubstringMatchWithIndex(dataLen, nameLen)(data, headerName, r, index); + signal headerNameMatch <== SubstringMatchWithIndex(dataLen, nameLen)(data, headerName, index); // next byte to name should be COLON signal endOfHeaderName <== IndexSelector(dataLen)(data, index + nameLen); - signal isNextByteColon <== IsEqual()([endOfHeaderName, syntax.COLON]); + signal isNextByteColon <== IsEqual()([endOfHeaderName, 58]); signal headerNameMatchAndNextByteColon <== headerNameMatch * isNextByteColon; // field-name: SP field-value - signal headerValueMatch <== SubstringMatchWithIndex(dataLen, valueLen)(data, headerValue, r, index + nameLen + 2); + signal headerValueMatch <== SubstringMatchWithIndex(dataLen, valueLen)(data, headerValue, index + nameLen + 2); // header name matches + header value matches signal output out <== headerNameMatchAndNextByteColon * headerValueMatch; @@ -73,17 +69,14 @@ template HeaderFieldNameValueMatch(dataLen, nameLen, valueLen) { template HeaderFieldNameMatch(dataLen, nameLen) { signal input data[dataLen]; signal input headerName[nameLen]; - signal input r; signal input index; - component syntax = HttpSyntax(); - // is name matches - signal headerNameMatch <== SubstringMatchWithIndex(dataLen, nameLen)(data, headerName, r, index); + signal headerNameMatch <== SubstringMatchWithIndex(dataLen, nameLen)(data, headerName, index); // next byte to name should be COLON signal endOfHeaderName <== IndexSelector(dataLen)(data, index + nameLen); - signal isNextByteColon <== IsEqual()([endOfHeaderName, syntax.COLON]); + signal isNextByteColon <== IsEqual()([endOfHeaderName, 58]); // header name matches signal output out; diff --git a/circuits/http/parser/machine.circom b/circuits/http/parser/machine.circom index 04a4bee..eca2207 100644 --- a/circuits/http/parser/machine.circom +++ b/circuits/http/parser/machine.circom @@ -19,20 +19,18 @@ template HttpStateUpdate() { signal output next_parsing_body; signal output next_line_status; - component HttpSyntax = HttpSyntax(); - //---------------------------------------------------------------------------------// - // check if we read space or colon + // check if we read space: 32 or colon: 58 component readSP = IsEqual(); - readSP.in <== [byte, HttpSyntax.SPACE]; + readSP.in <== [byte, 32]; component readColon = IsEqual(); - readColon.in <== [byte, HttpSyntax.COLON]; + readColon.in <== [byte, 58]; // Check if what we just read is a CR / LF component readCR = IsEqual(); - readCR.in <== [byte, HttpSyntax.CR]; + readCR.in <== [byte, 13]; component readLF = IsEqual(); - readLF.in <== [byte, HttpSyntax.LF]; + readLF.in <== [byte, 10]; signal notCRAndLF <== (1 - readCR.out) * (1 - readLF.out); //---------------------------------------------------------------------------------// From 7e1cd6975d1538918384c85a252402128beaa698 Mon Sep 17 00:00:00 2001 From: lonerapier Date: Thu, 26 Sep 2024 10:24:52 +0530 Subject: [PATCH 05/10] feat(http): remove hasher substring matching --- circuits/utils/search.circom | 30 ++++++++++++++++++++++++++---- src/codegen/http.rs | 14 +++++--------- 2 files changed, 31 insertions(+), 13 deletions(-) diff --git a/circuits/utils/search.circom b/circuits/utils/search.circom index b9bb86e..a257814 100644 --- a/circuits/utils/search.circom +++ b/circuits/utils/search.circom @@ -4,6 +4,7 @@ include "circomlib/circuits/mux1.circom"; include "./hash.circom"; include "./operators.circom"; include "./array.circom"; +include "@zk-email/circuits/utils/array.circom"; /* SubstringSearch @@ -84,8 +85,6 @@ template SubstringSearch(dataLen, keyLen) { } /* -SubstringMatchWithIndex - RLC algorithm for matching substring at index. - Creates a mask for `data` at `[start, start + keyLen]` - apply mask to data @@ -107,7 +106,7 @@ RLC algorithm for matching substring at index. NOTE: Modified from https://github.com/zkemail/zk-email-verify/tree/main/packages/circuits */ -template SubstringMatchWithIndex(dataLen, keyLen) { +template SubstringMatchWithHasher(dataLen, keyLen) { signal input data[dataLen]; signal input key[keyLen]; signal input r; @@ -203,6 +202,29 @@ template SubstringMatchWithIndex(dataLen, keyLen) { out <== IsZero()(hashMaskedData[dataLen-1]-hashMaskedKey[keyLen-1]); } +/* +SubstringMatchWithIndex + +matching substring at index by selecting a subarray and matching arrays + +# Parameters +- `dataLen`: The maximum length of the input string +- `keyLen`: The maximum length of the substring to be matched + +# Inputs +- `data`: Array of ASCII characters as input string +- `key`: Array of ASCII characters as substring to be searched in `data` +- `position`: Index of `key` in `data` +*/ +template SubstringMatchWithIndex(dataLen, keyLen) { + signal input data[dataLen]; + signal input key[keyLen]; + signal input start; + + signal subarray[keyLen] <== SelectSubArray(dataLen, keyLen)(data, start, keyLen); + signal output out <== IsEqualArray(keyLen)([key, subarray]); +} + /* SubstringMatch: Matches a substring with an input string and returns the position @@ -238,7 +260,7 @@ template SubstringMatch(dataLen, keyLen) { // matches a `key` in `data` at `pos` // NOTE: constrained verification assures correctness - signal isMatch <== SubstringMatchWithIndex(dataLen, keyLen)(data, key, r, start); + signal isMatch <== SubstringMatchWithHasher(dataLen, keyLen)(data, key, r, start); isMatch === 1; position <== start; diff --git a/src/codegen/http.rs b/src/codegen/http.rs index 588ff3a..d536100 100644 --- a/src/codegen/http.rs +++ b/src/codegen/http.rs @@ -483,7 +483,7 @@ fn build_http_circuit( // Header matches { for (i, _header) in data.headers().iter().enumerate() { - circuit_buffer += &format!(" headerNameValueMatch{}[data_idx] <== HeaderFieldNameValueMatch(DATA_BYTES, headerNameLen{}, headerValueLen{})(data, header{}, value{}, 100, data_idx);\n", i + 1,i + 1,i + 1,i + 1,i + 1); + circuit_buffer += &format!(" headerNameValueMatch{}[data_idx] <== HeaderFieldNameValueMatch(DATA_BYTES, headerNameLen{}, headerValueLen{})(data, header{}, value{}, data_idx);\n", i + 1,i + 1,i + 1,i + 1,i + 1); circuit_buffer += &format!( " hasMatchedHeaderValue{} += headerNameValueMatch{}[data_idx];\n", i + 1, @@ -560,14 +560,12 @@ fn build_http_circuit( methodLen === target_start_counter - 1; // Check target is correct by substring match and length check - // TODO: change r - signal targetMatch <== SubstringMatchWithIndex(DATA_BYTES, targetLen)(data, target, 100, target_start_counter); + signal targetMatch <== SubstringMatchWithIndex(DATA_BYTES, targetLen)(data, target, target_start_counter); targetMatch === 1; targetLen === target_end_counter - target_start_counter - 1; // Check version is correct by substring match and length check - // TODO: change r - signal versionMatch <== SubstringMatchWithIndex(DATA_BYTES, versionLen)(data, version, 100, target_end_counter); + signal versionMatch <== SubstringMatchWithIndex(DATA_BYTES, versionLen)(data, version, target_end_counter); versionMatch === 1; // -2 here for the CRLF versionLen === version_end_counter - target_end_counter - 2; @@ -579,14 +577,12 @@ fn build_http_circuit( versionLen === status_start_counter - 1; // Check status is correct by substring match and length check - // TODO: change r - signal statusMatch <== SubstringMatchWithIndex(DATA_BYTES, statusLen)(data, status, 100, status_start_counter); + signal statusMatch <== SubstringMatchWithIndex(DATA_BYTES, statusLen)(data, status, status_start_counter); statusMatch === 1; statusLen === status_end_counter - status_start_counter - 1; // Check message is correct by substring match and length check - // TODO: change r - signal messageMatch <== SubstringMatchWithIndex(DATA_BYTES, messageLen)(data, message, 100, status_end_counter); + signal messageMatch <== SubstringMatchWithIndex(DATA_BYTES, messageLen)(data, message, status_end_counter); messageMatch === 1; // -2 here for the CRLF messageLen === message_end_counter - status_end_counter - 2; From 960bbab8c10fcb4909258e671a9af87e9794e8ac Mon Sep 17 00:00:00 2001 From: lonerapier Date: Thu, 26 Sep 2024 15:41:37 +0530 Subject: [PATCH 06/10] fix: delete extractor --- circuits/http/extractor.circom | 165 --------------------------- circuits/http/parser/machine.circom | 10 +- circuits/test/http/extractor.test.ts | 81 ------------- src/codegen/http.rs | 100 +++++++++++++--- 4 files changed, 90 insertions(+), 266 deletions(-) delete mode 100644 circuits/http/extractor.circom delete mode 100644 circuits/test/http/extractor.test.ts diff --git a/circuits/http/extractor.circom b/circuits/http/extractor.circom deleted file mode 100644 index 160e8ee..0000000 --- a/circuits/http/extractor.circom +++ /dev/null @@ -1,165 +0,0 @@ -pragma circom 2.1.9; - -include "interpreter.circom"; -include "parser/machine.circom"; -include "../utils/bytes.circom"; -include "../utils/search.circom"; -include "circomlib/circuits/gates.circom"; -include "@zk-email/circuits/utils/array.circom"; - -// TODO: -// - handle CRLF in response data - -template ExtractResponse(DATA_BYTES, maxContentLength) { - signal input data[DATA_BYTES]; - signal output response[maxContentLength]; - - //--------------------------------------------------------------------------------------------// - //-CONSTRAINTS--------------------------------------------------------------------------------// - //--------------------------------------------------------------------------------------------// - component dataASCII = ASCII(DATA_BYTES); - dataASCII.in <== data; - //--------------------------------------------------------------------------------------------// - - // Initialze the parser - component State[DATA_BYTES]; - State[0] = HttpStateUpdate(); - State[0].byte <== data[0]; - State[0].parsing_start <== 1; - State[0].parsing_header <== 0; - State[0].parsing_field_name <== 0; - State[0].parsing_field_value <== 0; - State[0].parsing_body <== 0; - State[0].line_status <== 0; - - signal dataMask[DATA_BYTES]; - dataMask[0] <== 0; - - for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) { - State[data_idx] = HttpStateUpdate(); - State[data_idx].byte <== data[data_idx]; - State[data_idx].parsing_start <== State[data_idx - 1].next_parsing_start; - State[data_idx].parsing_header <== State[data_idx - 1].next_parsing_header; - State[data_idx].parsing_field_name <== State[data_idx-1].next_parsing_field_name; - State[data_idx].parsing_field_value <== State[data_idx-1].next_parsing_field_value; - State[data_idx].parsing_body <== State[data_idx - 1].next_parsing_body; - State[data_idx].line_status <== State[data_idx - 1].next_line_status; - - // apply body mask to data - dataMask[data_idx] <== data[data_idx] * State[data_idx].next_parsing_body; - - // Debugging - log("State[", data_idx, "].parsing_start ", "= ", State[data_idx].parsing_start); - log("State[", data_idx, "].parsing_header ", "= ", State[data_idx].parsing_header); - log("State[", data_idx, "].parsing_field_name ", "= ", State[data_idx].parsing_field_name); - log("State[", data_idx, "].parsing_field_value", "= ", State[data_idx].parsing_field_value); - log("State[", data_idx, "].parsing_body ", "= ", State[data_idx].parsing_body); - log("State[", data_idx, "].line_status ", "= ", State[data_idx].line_status); - log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); - } - - // Debugging - log("State[", DATA_BYTES, "].parsing_start ", "= ", State[DATA_BYTES-1].next_parsing_start); - log("State[", DATA_BYTES, "].parsing_header ", "= ", State[DATA_BYTES-1].next_parsing_header); - log("State[", DATA_BYTES, "].parsing_field_name ", "= ", State[DATA_BYTES-1].parsing_field_name); - log("State[", DATA_BYTES, "].parsing_field_value", "= ", State[DATA_BYTES-1].parsing_field_value); - log("State[", DATA_BYTES, "].parsing_body ", "= ", State[DATA_BYTES-1].next_parsing_body); - log("State[", DATA_BYTES, "].line_status ", "= ", State[DATA_BYTES-1].next_line_status); - log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); - - signal valueStartingIndex[DATA_BYTES]; - signal isZeroMask[DATA_BYTES]; - signal isPrevStartingIndex[DATA_BYTES]; - valueStartingIndex[0] <== 0; - isZeroMask[0] <== IsZero()(dataMask[0]); - for (var i=1 ; i < DATA_BYTES; i++) { - isZeroMask[i] <== IsZero()(dataMask[i]); - isPrevStartingIndex[i] <== IsZero()(valueStartingIndex[i-1]); - valueStartingIndex[i] <== valueStartingIndex[i-1] + i * (1-isZeroMask[i]) * isPrevStartingIndex[i]; - } - - response <== SelectSubArray(DATA_BYTES, maxContentLength)(dataMask, valueStartingIndex[DATA_BYTES-1]+1, maxContentLength); -} - -template ExtractHeaderValue(DATA_BYTES, headerNameLength, maxValueLength) { - signal input data[DATA_BYTES]; - signal input header[headerNameLength]; - - signal output value[maxValueLength]; - - //--------------------------------------------------------------------------------------------// - //-CONSTRAINTS--------------------------------------------------------------------------------// - //--------------------------------------------------------------------------------------------// - component dataASCII = ASCII(DATA_BYTES); - dataASCII.in <== data; - //--------------------------------------------------------------------------------------------// - - // Initialze the parser - component State[DATA_BYTES]; - State[0] = HttpStateUpdate(); - State[0].byte <== data[0]; - State[0].parsing_start <== 1; - State[0].parsing_header <== 0; - State[0].parsing_field_name <== 0; - State[0].parsing_field_value <== 0; - State[0].parsing_body <== 0; - State[0].line_status <== 0; - - signal headerMatch[DATA_BYTES]; - headerMatch[0] <== 0; - signal isHeaderNameMatch[DATA_BYTES]; - isHeaderNameMatch[0] <== 0; - signal readCRLF[DATA_BYTES]; - readCRLF[0] <== 0; - signal valueMask[DATA_BYTES]; - valueMask[0] <== 0; - - for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) { - State[data_idx] = HttpStateUpdate(); - State[data_idx].byte <== data[data_idx]; - State[data_idx].parsing_start <== State[data_idx - 1].next_parsing_start; - State[data_idx].parsing_header <== State[data_idx - 1].next_parsing_header; - State[data_idx].parsing_field_name <== State[data_idx-1].next_parsing_field_name; - State[data_idx].parsing_field_value <== State[data_idx-1].next_parsing_field_value; - State[data_idx].parsing_body <== State[data_idx - 1].next_parsing_body; - State[data_idx].line_status <== State[data_idx - 1].next_line_status; - - // apply value mask to data - // TODO: change r - headerMatch[data_idx] <== HeaderFieldNameMatch(DATA_BYTES, headerNameLength)(data, header, 100, data_idx); - readCRLF[data_idx] <== IsEqual()([State[data_idx].line_status, 2]); - isHeaderNameMatch[data_idx] <== Mux1()([isHeaderNameMatch[data_idx-1] * (1-readCRLF[data_idx]), 1], headerMatch[data_idx]); - valueMask[data_idx] <== MultiAND(3)([data[data_idx], isHeaderNameMatch[data_idx], State[data_idx].parsing_field_value]); - - // Debugging - log("State[", data_idx, "].parsing_start ", "= ", State[data_idx].parsing_start); - log("State[", data_idx, "].parsing_header ", "= ", State[data_idx].parsing_header); - log("State[", data_idx, "].parsing_field_name ", "= ", State[data_idx].parsing_field_name); - log("State[", data_idx, "].parsing_field_value", "= ", State[data_idx].parsing_field_value); - log("State[", data_idx, "].parsing_body ", "= ", State[data_idx].parsing_body); - log("State[", data_idx, "].line_status ", "= ", State[data_idx].line_status); - log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); - } - - // Debugging - log("State[", DATA_BYTES, "].parsing_start ", "= ", State[DATA_BYTES-1].next_parsing_start); - log("State[", DATA_BYTES, "].parsing_header ", "= ", State[DATA_BYTES-1].next_parsing_header); - log("State[", DATA_BYTES, "].parsing_field_name ", "= ", State[DATA_BYTES-1].parsing_field_name); - log("State[", DATA_BYTES, "].parsing_field_value", "= ", State[DATA_BYTES-1].parsing_field_value); - log("State[", DATA_BYTES, "].parsing_body ", "= ", State[DATA_BYTES-1].next_parsing_body); - log("State[", DATA_BYTES, "].line_status ", "= ", State[DATA_BYTES-1].next_line_status); - log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); - - signal valueStartingIndex[DATA_BYTES]; - signal isZeroMask[DATA_BYTES]; - signal isPrevStartingIndex[DATA_BYTES]; - valueStartingIndex[0] <== 0; - isZeroMask[0] <== IsZero()(valueMask[0]); - for (var i=1 ; i { - let circuit: WitnessTester<["data"], ["response"]>; - - - function generatePassCase(input: number[], expected: any, desc: string) { - const description = generateDescription(input); - - it(`(valid) witness: ${description} ${desc}`, async () => { - circuit = await circomkit.WitnessTester(`ExtractResponseData`, { - file: "http/extractor", - template: "ExtractResponse", - params: [input.length, expected.length], - }); - console.log("#constraints:", await circuit.getConstraintCount()); - - await circuit.expectPass({ data: input }, { response: expected }); - }); - } - - describe("response", async () => { - - let parsedHttp = readHTTPInputFile("get_response.http"); - - generatePassCase(parsedHttp.input, parsedHttp.bodyBytes, ""); - - let output2 = parsedHttp.bodyBytes.slice(0); - output2.push(0, 0, 0, 0); - generatePassCase(parsedHttp.input, output2, "output length more than actual length"); - - let output3 = parsedHttp.bodyBytes.slice(0); - output3.pop(); - output3.pop(); - generatePassCase(parsedHttp.input, output3, "output length less than actual length"); - }); - - describe("request", async () => { - let parsedHttp = readHTTPInputFile("post_request.http"); - - generatePassCase(parsedHttp.input, parsedHttp.bodyBytes, ""); - - let output2 = parsedHttp.bodyBytes.slice(0); - output2.push(0, 0, 0, 0, 0, 0); - generatePassCase(parsedHttp.input, output2, "output length more than actual length"); - - console.log(parsedHttp.bodyBytes.length); - let output3 = parsedHttp.bodyBytes.slice(0); - output3.pop(); - output3.pop(); - generatePassCase(parsedHttp.input, output3, "output length less than actual length"); - }); -}); - -describe("HTTP :: header Extractor", async () => { - let circuit: WitnessTester<["data", "header"], ["value"]>; - - function generatePassCase(input: number[], headerName: number[], headerValue: number[], desc: string) { - const description = generateDescription(input); - - it(`(valid) witness: ${description} ${desc}`, async () => { - circuit = await circomkit.WitnessTester(`ExtractHeaderValue`, { - file: "http/extractor", - template: "ExtractHeaderValue", - params: [input.length, headerName.length, headerValue.length], - }); - console.log("#constraints:", await circuit.getConstraintCount()); - - await circuit.expectPass({ data: input, header: headerName }, { value: headerValue }); - }); - } - - describe("response", async () => { - - let parsedHttp = readHTTPInputFile("get_response.http"); - - generatePassCase(parsedHttp.input, toByte("Content-Length"), toByte(parsedHttp.headers["content-length"]), ""); - }); -}); - diff --git a/src/codegen/http.rs b/src/codegen/http.rs index d536100..af2e36a 100644 --- a/src/codegen/http.rs +++ b/src/codegen/http.rs @@ -343,7 +343,6 @@ fn build_http_circuit( signal output body[maxContentLength]; signal bodyMask[DATA_BYTES]; - bodyMask[0] <== 0; "#; } } @@ -363,9 +362,9 @@ fn build_http_circuit( signal targetMask[DATA_BYTES]; signal versionMask[DATA_BYTES]; - var target_start_counter = 1; - var target_end_counter = 1; - var version_end_counter = 1; + var target_start_counter = 0; + var target_end_counter = 0; + var version_end_counter = 0; "#; } HttpData::Response(_) => { @@ -380,12 +379,21 @@ fn build_http_circuit( signal statusMask[DATA_BYTES]; signal messageMask[DATA_BYTES]; - var status_start_counter = 1; - var status_end_counter = 1; - var message_end_counter = 1; + var status_start_counter = 0; + var status_end_counter = 0; + var message_end_counter = 0; "#; } } + + // Create header match signals + { + for (i, _header) in data.headers().iter().enumerate() { + circuit_buffer += + &format!(" signal headerNameValueMatch{}[DATA_BYTES];\n", i + 1); + circuit_buffer += &format!(" var hasMatchedHeaderValue{} = 0;\n\n", i + 1); + } + } } circuit_buffer += r#" @@ -401,12 +409,69 @@ fn build_http_circuit( "#; - // Create header match signals + // If parsing a `Response`, create a mask of the body bytes { - for (i, _header) in data.headers().iter().enumerate() { - circuit_buffer += &format!(" signal headerNameValueMatch{}[DATA_BYTES];\n", i + 1); - circuit_buffer += &format!(" headerNameValueMatch{}[0] <== 0;\n", i + 1); - circuit_buffer += &format!(" var hasMatchedHeaderValue{} = 0;\n\n", i + 1); + if let HttpData::Response(_) = data { + circuit_buffer += r#" + // Mask if parser is in the body of response + bodyMask[0] <== data[0] * State[0].next_parsing_body; +"#; + } + } + + // Start line matches + { + match data { + HttpData::Request(_) => { + circuit_buffer += r#" + // Check remaining method bytes + // if(data_idx < methodLen) { + // methodIsEqual[data_idx] <== IsEqual()([data[data_idx], method[data_idx]]); + // methodIsEqual[data_idx] === 1; + // } + + // Get the target bytes + startLineMask[0] <== inStartLine()(State[0].next_parsing_start); + targetMask[0] <== inStartMiddle()(State[0].next_parsing_start); + versionMask[0] <== inStartEnd()(State[0].next_parsing_start); + target_start_counter += startLineMask[0] - targetMask[0] - versionMask[0]; + + // Get the version bytes + target_end_counter += startLineMask[0] - versionMask[0]; + version_end_counter += startLineMask[0]; +"#; + } + HttpData::Response(_) => { + circuit_buffer += r#" + // Check remaining version bytes + // if(data_idx < versionLen) { + // versionIsEqual[data_idx] <== IsEqual()([data[data_idx], version[data_idx]]); + // versionIsEqual[data_idx] === 1; + // } + + // Get the status bytes + startLineMask[0] <== inStartLine()(State[0].next_parsing_start); + statusMask[0] <== inStartMiddle()(State[0].next_parsing_start); + messageMask[0] <== inStartEnd()(State[0].next_parsing_start); + status_start_counter += startLineMask[0] - statusMask[0] - messageMask[0]; + + // Get the message bytes + status_end_counter += startLineMask[0] - messageMask[0]; + message_end_counter += startLineMask[0]; +"#; + } + } + + // Header matches + { + for (i, _header) in data.headers().iter().enumerate() { + circuit_buffer += &format!(" headerNameValueMatch{}[0] <== HeaderFieldNameValueMatch(DATA_BYTES, headerNameLen{}, headerValueLen{})(data, header{}, value{}, 0);\n", i + 1, i + 1, i + 1, i + 1, i + 1); + circuit_buffer += &format!( + " hasMatchedHeaderValue{} += headerNameValueMatch{}[0];\n", + i + 1, + i + 1 + ); + } } } @@ -483,7 +548,7 @@ fn build_http_circuit( // Header matches { for (i, _header) in data.headers().iter().enumerate() { - circuit_buffer += &format!(" headerNameValueMatch{}[data_idx] <== HeaderFieldNameValueMatch(DATA_BYTES, headerNameLen{}, headerValueLen{})(data, header{}, value{}, data_idx);\n", i + 1,i + 1,i + 1,i + 1,i + 1); + circuit_buffer += &format!(" headerNameValueMatch{}[data_idx] <== HeaderFieldNameValueMatch(DATA_BYTES, headerNameLen{}, headerValueLen{})(data, header{}, value{}, data_idx);\n", i + 1, i + 1, i + 1, i + 1, i + 1); circuit_buffer += &format!( " hasMatchedHeaderValue{} += headerNameValueMatch{}[data_idx];\n", i + 1, @@ -506,7 +571,7 @@ fn build_http_circuit( "#; } - circuit_buffer += " }"; + circuit_buffer += " }"; // debugging if debug { @@ -527,10 +592,17 @@ fn build_http_circuit( { if let HttpData::Response(_) = data { circuit_buffer += r#" + _ <== State[DATA_BYTES-1].next_line_status; + _ <== State[DATA_BYTES-1].next_parsing_start; + _ <== State[DATA_BYTES-1].next_parsing_header; + _ <== State[DATA_BYTES-1].next_parsing_field_name; + _ <== State[DATA_BYTES-1].next_parsing_field_value; + signal bodyStartingIndex[DATA_BYTES]; signal isZeroMask[DATA_BYTES]; signal isPrevStartingIndex[DATA_BYTES]; bodyStartingIndex[0] <== 0; + isPrevStartingIndex[0] <== 0; isZeroMask[0] <== IsZero()(bodyMask[0]); for (var i=1 ; i < DATA_BYTES; i++) { isZeroMask[i] <== IsZero()(bodyMask[i]); From 004596bd6c7b6b36b695b6b3918b5ebdf74d600e Mon Sep 17 00:00:00 2001 From: lonerapier Date: Thu, 26 Sep 2024 15:45:19 +0530 Subject: [PATCH 07/10] fix: codegen --- src/codegen/http.rs | 65 ++++++++++++++++++++++----------------------- src/codegen/json.rs | 51 +++-------------------------------- 2 files changed, 36 insertions(+), 80 deletions(-) diff --git a/src/codegen/http.rs b/src/codegen/http.rs index af2e36a..c513f27 100644 --- a/src/codegen/http.rs +++ b/src/codegen/http.rs @@ -396,8 +396,7 @@ fn build_http_circuit( } } - circuit_buffer += r#" - component State[DATA_BYTES]; + circuit_buffer += r#" component State[DATA_BYTES]; State[0] = HttpStateUpdate(); State[0].byte <== data[0]; State[0].parsing_start <== 1; @@ -406,15 +405,14 @@ fn build_http_circuit( State[0].parsing_field_value <== 0; State[0].parsing_body <== 0; State[0].line_status <== 0; - "#; // If parsing a `Response`, create a mask of the body bytes { if let HttpData::Response(_) = data { circuit_buffer += r#" - // Mask if parser is in the body of response - bodyMask[0] <== data[0] * State[0].next_parsing_body; + // Mask if parser is in the body of response + bodyMask[0] <== data[0] * State[0].next_parsing_body; "#; } } @@ -431,10 +429,10 @@ fn build_http_circuit( // } // Get the target bytes - startLineMask[0] <== inStartLine()(State[0].next_parsing_start); - targetMask[0] <== inStartMiddle()(State[0].next_parsing_start); - versionMask[0] <== inStartEnd()(State[0].next_parsing_start); - target_start_counter += startLineMask[0] - targetMask[0] - versionMask[0]; + startLineMask[0] <== inStartLine()(State[0].next_parsing_start); + targetMask[0] <== inStartMiddle()(State[0].next_parsing_start); + versionMask[0] <== inStartEnd()(State[0].next_parsing_start); + target_start_counter += startLineMask[0] - targetMask[0] - versionMask[0]; // Get the version bytes target_end_counter += startLineMask[0] - versionMask[0]; @@ -450,10 +448,10 @@ fn build_http_circuit( // } // Get the status bytes - startLineMask[0] <== inStartLine()(State[0].next_parsing_start); - statusMask[0] <== inStartMiddle()(State[0].next_parsing_start); - messageMask[0] <== inStartEnd()(State[0].next_parsing_start); - status_start_counter += startLineMask[0] - statusMask[0] - messageMask[0]; + startLineMask[0] <== inStartLine()(State[0].next_parsing_start); + statusMask[0] <== inStartMiddle()(State[0].next_parsing_start); + messageMask[0] <== inStartEnd()(State[0].next_parsing_start); + status_start_counter += startLineMask[0] - statusMask[0] - messageMask[0]; // Get the message bytes status_end_counter += startLineMask[0] - messageMask[0]; @@ -465,9 +463,9 @@ fn build_http_circuit( // Header matches { for (i, _header) in data.headers().iter().enumerate() { - circuit_buffer += &format!(" headerNameValueMatch{}[0] <== HeaderFieldNameValueMatch(DATA_BYTES, headerNameLen{}, headerValueLen{})(data, header{}, value{}, 0);\n", i + 1, i + 1, i + 1, i + 1, i + 1); + circuit_buffer += &format!(" headerNameValueMatch{}[0] <== HeaderFieldNameValueMatch(DATA_BYTES, headerNameLen{}, headerValueLen{})(data, header{}, value{}, 0);\n", i + 1, i + 1, i + 1, i + 1, i + 1); circuit_buffer += &format!( - " hasMatchedHeaderValue{} += headerNameValueMatch{}[0];\n", + " hasMatchedHeaderValue{} += headerNameValueMatch{}[0];\n", i + 1, i + 1 ); @@ -513,9 +511,9 @@ fn build_http_circuit( } // Get the target bytes - startLineMask[data_idx] <== inStartLine()(State[data_idx].parsing_start); - targetMask[data_idx] <== inStartMiddle()(State[data_idx].parsing_start); - versionMask[data_idx] <== inStartEnd()(State[data_idx].parsing_start); + startLineMask[data_idx] <== inStartLine()(State[data_idx].next_parsing_start); + targetMask[data_idx] <== inStartMiddle()(State[data_idx].next_parsing_start); + versionMask[data_idx] <== inStartEnd()(State[data_idx].next_parsing_start); target_start_counter += startLineMask[data_idx] - targetMask[data_idx] - versionMask[data_idx]; // Get the version bytes @@ -532,9 +530,9 @@ fn build_http_circuit( } // Get the status bytes - startLineMask[data_idx] <== inStartLine()(State[data_idx].parsing_start); - statusMask[data_idx] <== inStartMiddle()(State[data_idx].parsing_start); - messageMask[data_idx] <== inStartEnd()(State[data_idx].parsing_start); + startLineMask[data_idx] <== inStartLine()(State[data_idx].next_parsing_start); + statusMask[data_idx] <== inStartMiddle()(State[data_idx].next_parsing_start); + messageMask[data_idx] <== inStartEnd()(State[data_idx].next_parsing_start); status_start_counter += startLineMask[data_idx] - statusMask[data_idx] - messageMask[data_idx]; // Get the message bytes @@ -571,7 +569,13 @@ fn build_http_circuit( "#; } - circuit_buffer += " }"; + circuit_buffer += " } + + _ <== State[DATA_BYTES-1].next_line_status; + _ <== State[DATA_BYTES-1].next_parsing_start; + _ <== State[DATA_BYTES-1].next_parsing_header; + _ <== State[DATA_BYTES-1].next_parsing_field_name; + _ <== State[DATA_BYTES-1].next_parsing_field_value;\n"; // debugging if debug { @@ -592,11 +596,6 @@ fn build_http_circuit( { if let HttpData::Response(_) = data { circuit_buffer += r#" - _ <== State[DATA_BYTES-1].next_line_status; - _ <== State[DATA_BYTES-1].next_parsing_start; - _ <== State[DATA_BYTES-1].next_parsing_header; - _ <== State[DATA_BYTES-1].next_parsing_field_name; - _ <== State[DATA_BYTES-1].next_parsing_field_value; signal bodyStartingIndex[DATA_BYTES]; signal isZeroMask[DATA_BYTES]; @@ -629,15 +628,15 @@ fn build_http_circuit( HttpData::Request(_) => { circuit_buffer += r#" // Verify method had correct length - methodLen === target_start_counter - 1; + methodLen === target_start_counter; // Check target is correct by substring match and length check - signal targetMatch <== SubstringMatchWithIndex(DATA_BYTES, targetLen)(data, target, target_start_counter); + signal targetMatch <== SubstringMatchWithIndex(DATA_BYTES, targetLen)(data, target, target_start_counter + 1); targetMatch === 1; targetLen === target_end_counter - target_start_counter - 1; // Check version is correct by substring match and length check - signal versionMatch <== SubstringMatchWithIndex(DATA_BYTES, versionLen)(data, version, target_end_counter); + signal versionMatch <== SubstringMatchWithIndex(DATA_BYTES, versionLen)(data, version, target_end_counter + 1); versionMatch === 1; // -2 here for the CRLF versionLen === version_end_counter - target_end_counter - 2; @@ -646,15 +645,15 @@ fn build_http_circuit( HttpData::Response(_) => { circuit_buffer += r#" // Verify version had correct length - versionLen === status_start_counter - 1; + versionLen === status_start_counter; // Check status is correct by substring match and length check - signal statusMatch <== SubstringMatchWithIndex(DATA_BYTES, statusLen)(data, status, status_start_counter); + signal statusMatch <== SubstringMatchWithIndex(DATA_BYTES, statusLen)(data, status, status_start_counter + 1); statusMatch === 1; statusLen === status_end_counter - status_start_counter - 1; // Check message is correct by substring match and length check - signal messageMatch <== SubstringMatchWithIndex(DATA_BYTES, messageLen)(data, message, status_end_counter); + signal messageMatch <== SubstringMatchWithIndex(DATA_BYTES, messageLen)(data, message, status_end_counter + 1); messageMatch === 1; // -2 here for the CRLF messageLen === message_end_counter - status_end_counter - 2; diff --git a/src/codegen/json.rs b/src/codegen/json.rs index 19b9412..a90d545 100644 --- a/src/codegen/json.rs +++ b/src/codegen/json.rs @@ -5,7 +5,6 @@ use std::{ collections::HashMap, error::Error, fs::{self, create_dir_all}, - str::FromStr, }; use crate::{circuit_config::CircomkitCircuitConfig, ExtractorArgs}; @@ -326,49 +325,7 @@ fn build_json_circuit( } } } - - /* - component rHasher = PoseidonModular(dataLen + keyLen1 + keyLen3); - for (var i = 0; i < keyLen1; i++) { - rHasher.in[i] <== key1[i]; - } - for (var i = 0; i < keyLen3; i++) { - rHasher.in[keyLen1 + i] <== key3[i]; - } - for (var i = 0; i < dataLen; i++) { - rHasher.in[i + keyLen1 + keyLen3] <== data[i]; - } - signal r <== rHasher.out; - */ - { - circuit_buffer += "\n // r must be secret, so either has to be derived from hash in the circuit or off the circuit\n component rHasher = PoseidonModular(DATA_BYTES + "; - for (i, key) in data.keys.iter().enumerate() { - match key { - Key::String(_) => circuit_buffer += &format!(" keyLen{} +", i + 1), - Key::Num(_) => (), - } - } - circuit_buffer.pop(); - circuit_buffer.pop(); - circuit_buffer += ");\n"; - - let mut key_len_counter_str = String::from_str("i")?; - for (i, key) in data.keys.iter().enumerate() { - match key { - Key::String(_) => { - circuit_buffer += &format!(" for (var i = 0 ; i < keyLen{} ; i++) {{\n rHasher.in[{}] <== key{}[i];\n }}\n", i+1, key_len_counter_str, i+1); - key_len_counter_str += &format!(" + keyLen{}", i + 1); - } - Key::Num(_) => (), - } - } - - circuit_buffer += &format!(" for (var i = 0 ; i < DATA_BYTES ; i++) {{\n rHasher.in[{}] <== data[i];\n }}\n", key_len_counter_str); - } - - circuit_buffer += r#" signal r <== rHasher.out; - - // value starting index in `data` + circuit_buffer += r#" // value starting index in `data` signal output value_starting_index[DATA_BYTES]; // flag determining whether this byte is matched value signal is_value_match[DATA_BYTES]; @@ -464,7 +421,7 @@ fn build_json_circuit( match key { Key::String(_) => { num_objects += 1; - circuit_buffer += &format!(" is_key{}_match[0] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen{}, depth{})(data, key{}, r, 0, parsing_key[0], State[0].next_stack);\n", i+1, i+1, i+1, i+1); + circuit_buffer += &format!(" is_key{}_match[0] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen{}, depth{})(data, key{}, 0, parsing_key[0], State[0].next_stack);\n", i+1, i+1, i+1, i+1); circuit_buffer += &format!(" is_next_pair_at_depth{}[0] <== NextKVPairAtDepth(MAX_STACK_HEIGHT, depth{})(State[0].next_stack, data[0]);\n", i+1, i+1); circuit_buffer += &format!(" is_key{}_match_for_value[1] <== Mux1()([is_key{}_match_for_value[0] * (1-is_next_pair_at_depth{}[0]), is_key{}_match[0] * (1-is_next_pair_at_depth{}[0])], is_key{}_match[0]);\n", i+1, i+1, i+1, i+1, i+1, i+1); if debug { @@ -598,7 +555,7 @@ fn build_json_circuit( - key matches at current index and depth of key is as specified - whether next KV pair starts - whether key matched for a value (propogate key match until new KV pair of lower depth starts) - is_key1_match[data_idx-1] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen1, depth1)(data, key1, r, data_idx-1, parsing_key[data_idx-1], State[data_idx].stack); + is_key1_match[data_idx-1] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen1, depth1)(data, key1, data_idx-1, parsing_key[data_idx-1], State[data_idx].stack); is_next_pair_at_depth1[data_idx-1] <== NextKVPairAtDepth(MAX_STACK_HEIGHT, depth1)(State[data_idx].stack, data[data_idx-1]); is_key1_match_for_value[data_idx] <== Mux1()([is_key1_match_for_value[data_idx-1] * (1-is_next_pair_at_depth1[data_idx-1]), is_key1_match[data_idx-1] * (1-is_next_pair_at_depth1[data_idx-1])], is_key1_match[data_idx-1]); */ @@ -614,7 +571,7 @@ fn build_json_circuit( match key { Key::String(_) => { num_objects += 1; - circuit_buffer += &format!(" is_key{}_match[data_idx] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen{}, depth{})(data, key{}, r, data_idx, parsing_key[data_idx], State[data_idx].next_stack);\n", i+1, i+1, i+1, i+1); + circuit_buffer += &format!(" is_key{}_match[data_idx] <== KeyMatchAtDepth(DATA_BYTES, MAX_STACK_HEIGHT, keyLen{}, depth{})(data, key{}, data_idx, parsing_key[data_idx], State[data_idx].next_stack);\n", i+1, i+1, i+1, i+1); circuit_buffer += &format!(" is_next_pair_at_depth{}[data_idx] <== NextKVPairAtDepth(MAX_STACK_HEIGHT, depth{})(State[data_idx].next_stack, data[data_idx]);\n", i+1, i+1); circuit_buffer += &format!(" is_key{}_match_for_value[data_idx+1] <== Mux1()([is_key{}_match_for_value[data_idx] * (1-is_next_pair_at_depth{}[data_idx]), is_key{}_match[data_idx] * (1-is_next_pair_at_depth{}[data_idx])], is_key{}_match[data_idx]);\n", i+1, i+1, i+1, i+1, i+1, i+1); if debug { From 856ad18f2eca45b3620a775f03952cf3da37062d Mon Sep 17 00:00:00 2001 From: lonerapier Date: Thu, 26 Sep 2024 15:46:30 +0530 Subject: [PATCH 08/10] feat: remove hashing based substring matching --- circuits/http/locker.circom | 8 ++---- circuits/http/parser/machine.circom | 1 + circuits/json/interpreter.circom | 6 ++--- circuits/test/utils/search.test.ts | 39 +++++++++++++++++++++++++++-- circuits/utils/search.circom | 11 ++++++-- 5 files changed, 51 insertions(+), 14 deletions(-) diff --git a/circuits/http/locker.circom b/circuits/http/locker.circom index a2bde99..cd8c439 100644 --- a/circuits/http/locker.circom +++ b/circuits/http/locker.circom @@ -101,14 +101,12 @@ template LockStartLine(DATA_BYTES, beginningLen, middleLen, finalLen) { beginningLen === middle_start_counter - 1; // Check middle is correct by substring match and length check - // TODO: change r - signal middleMatch <== SubstringMatchWithIndex(DATA_BYTES, middleLen)(data, middle, 100, middle_start_counter); + signal middleMatch <== SubstringMatchWithIndex(DATA_BYTES, middleLen)(data, middle, middle_start_counter); middleMatch === 1; middleLen === middle_end_counter - middle_start_counter - 1; // Check final is correct by substring match and length check - // TODO: change r - signal finalMatch <== SubstringMatchWithIndex(DATA_BYTES, finalLen)(data, final, 100, middle_end_counter); + signal finalMatch <== SubstringMatchWithIndex(DATA_BYTES, finalLen)(data, final, middle_end_counter); finalMatch === 1; // -2 here for the CRLF finalLen === final_end_counter - middle_end_counter - 2; @@ -153,12 +151,10 @@ template LockHeader(DATA_BYTES, headerNameLen, headerValueLen) { State[data_idx].parsing_body <== State[data_idx - 1].next_parsing_body; State[data_idx].line_status <== State[data_idx - 1].next_line_status; - // TODO: change r headerFieldNameValueMatch[data_idx] = HeaderFieldNameValueMatch(DATA_BYTES, headerNameLen, headerValueLen); headerFieldNameValueMatch[data_idx].data <== data; headerFieldNameValueMatch[data_idx].headerName <== header; headerFieldNameValueMatch[data_idx].headerValue <== value; - headerFieldNameValueMatch[data_idx].r <== 100; headerFieldNameValueMatch[data_idx].index <== data_idx; isHeaderFieldNameValueMatch[data_idx] <== isHeaderFieldNameValueMatch[data_idx-1] + headerFieldNameValueMatch[data_idx].out; diff --git a/circuits/http/parser/machine.circom b/circuits/http/parser/machine.circom index 7bfd33d..438a178 100644 --- a/circuits/http/parser/machine.circom +++ b/circuits/http/parser/machine.circom @@ -91,6 +91,7 @@ template StateChange() { // enable parsing header on reading CRLF signal enableParsingHeader <== readCRLF * isParsingStart; // check if we are parsing header + // TODO: correct this 3 (it means we can parse max 2^3 headers) signal isParsingHeader <== GreaterEqThan(3)([state[1], 1]); // increment parsing header counter on CRLF and parsing header signal incrementParsingHeader <== readCRLF * isParsingHeader; diff --git a/circuits/json/interpreter.circom b/circuits/json/interpreter.circom index a1bfa20..be9358c 100644 --- a/circuits/json/interpreter.circom +++ b/circuits/json/interpreter.circom @@ -245,7 +245,6 @@ template NextKVPairAtDepth(n, depth) { template KeyMatch(dataLen, keyLen) { signal input data[dataLen]; signal input key[keyLen]; - signal input r; signal input index; signal input parsing_key; @@ -256,7 +255,7 @@ template KeyMatch(dataLen, keyLen) { signal start_of_key <== IndexSelector(dataLen)(data, index - 1); signal is_start_of_key_equal_to_quote <== IsEqual()([start_of_key, 34]); - signal substring_match <== SubstringMatchWithIndex(dataLen, keyLen)(data, key, r, index); + signal substring_match <== SubstringMatchWithIndex(dataLen, keyLen)(data, key, index); signal is_key_between_quotes <== is_start_of_key_equal_to_quote * is_end_of_key_equal_to_quote; signal is_parsing_correct_key <== is_key_between_quotes * parsing_key; @@ -285,7 +284,6 @@ template KeyMatch(dataLen, keyLen) { template KeyMatchAtDepth(dataLen, n, keyLen, depth) { signal input data[dataLen]; signal input key[keyLen]; - signal input r; signal input index; signal input parsing_key; signal input stack[n][2]; @@ -306,7 +304,7 @@ template KeyMatchAtDepth(dataLen, n, keyLen, depth) { signal is_start_of_key_equal_to_quote <== IsEqual()([start_of_key, 34]); // key matches - signal substring_match <== SubstringMatchWithIndex(dataLen, keyLen)(data, key, r, index); + signal substring_match <== SubstringMatchWithIndex(dataLen, keyLen)(data, key, index); // key should be a string signal is_key_between_quotes <== is_start_of_key_equal_to_quote * is_end_of_key_equal_to_quote; diff --git a/circuits/test/utils/search.test.ts b/circuits/test/utils/search.test.ts index e6cbbe1..91f2ded 100644 --- a/circuits/test/utils/search.test.ts +++ b/circuits/test/utils/search.test.ts @@ -80,13 +80,13 @@ describe("search", () => { }); }); - describe("SubstringMatchWithIndex", () => { + describe("SubstringMatchWithHasher", () => { let circuit: WitnessTester<["data", "key", "r", "start"], ["out"]>; before(async () => { circuit = await circomkit.WitnessTester(`SubstringSearch`, { file: "utils/search", - template: "SubstringMatchWithIndex", + template: "SubstringMatchWithHasher", params: [787, 10], }); console.log("#constraints:", await circuit.getConstraintCount()); @@ -117,6 +117,41 @@ describe("search", () => { }); }); + describe("SubstringMatchWithIndex", () => { + let circuit: WitnessTester<["data", "key", "start"], ["out"]>; + + before(async () => { + circuit = await circomkit.WitnessTester(`SubstringSearch`, { + file: "utils/search", + template: "SubstringMatchWithIndex", + params: [787, 10], + }); + console.log("#constraints:", await circuit.getConstraintCount()); + }); + + it("data = witness.json:data, key = witness.json:key, r = hash(key+data)", async () => { + await circuit.expectPass( + { + data: witness["data"], + key: witness["key"], + start: 6 + }, + { out: 1 }, + ); + }); + + it("data = witness.json:data, key = witness.json:key, r = hash(key+data), output false", async () => { + await circuit.expectPass( + { + data: witness["data"], + key: witness["key"], + start: 98 + }, + { out: 0 } + ); + }); + }); + describe("SubstringMatch", () => { let circuit: WitnessTester<["data", "key"], ["position"]>; diff --git a/circuits/utils/search.circom b/circuits/utils/search.circom index a257814..b839fd8 100644 --- a/circuits/utils/search.circom +++ b/circuits/utils/search.circom @@ -1,5 +1,6 @@ pragma circom 2.1.9; +include "circomlib/circuits/comparators.circom"; include "circomlib/circuits/mux1.circom"; include "./hash.circom"; include "./operators.circom"; @@ -221,8 +222,14 @@ template SubstringMatchWithIndex(dataLen, keyLen) { signal input key[keyLen]; signal input start; - signal subarray[keyLen] <== SelectSubArray(dataLen, keyLen)(data, start, keyLen); - signal output out <== IsEqualArray(keyLen)([key, subarray]); + var logDataLen = log2Ceil(dataLen + keyLen + 1); + + signal isStartLessThanMaxLength <== LessThan(logDataLen)([start, dataLen]); + signal index <== start * isStartLessThanMaxLength; + + signal subarray[keyLen] <== SelectSubArray(dataLen, keyLen)(data, index, keyLen); + signal isSubarrayMatch <== IsEqualArray(keyLen)([key, subarray]); + signal output out <== isStartLessThanMaxLength * isSubarrayMatch; } /* From db655bbdd5742465e06212d313ba3a1fbc70e333 Mon Sep 17 00:00:00 2001 From: lonerapier Date: Thu, 26 Sep 2024 15:47:15 +0530 Subject: [PATCH 09/10] fix: spotify test --- circuits/test/http/codegen.test.ts | 47 ++++++++++++ .../test/json/extractor/extractor.test.ts | 32 ++++++++ circuits/test/spotify_top_artists.test.ts | 74 ------------------- 3 files changed, 79 insertions(+), 74 deletions(-) diff --git a/circuits/test/http/codegen.test.ts b/circuits/test/http/codegen.test.ts index 2db0a23..16f750d 100644 --- a/circuits/test/http/codegen.test.ts +++ b/circuits/test/http/codegen.test.ts @@ -213,4 +213,51 @@ describe("HTTP :: Codegen :: Response", async () => { circuitInput.value1 = toByte("/aip"); await circuit.expectFail(circuitInput); }); +}); + +describe("spotify_top_artists_http", async () => { + let http_circuit: WitnessTester<["data", "version", "status", "message", "header1", "value1"], ["body"]>; + + it("POST response body", async () => { + let httpLockfile = "spotify.lock" + let httpInputFile = "spotify_top_artists_response.http"; + let httpCircuitName = "spotify_top_artists"; + + await executeCodegen(`${httpCircuitName}_test`, httpInputFile, `${httpLockfile}.json`); + + const lockData = readLockFile(`${httpLockfile}.json`); + + const http = readHTTPInputFile(`${httpInputFile}`); + const inputHttp = http.input; + + const headers = getHeaders(lockData); + + const params = [inputHttp.length, http.bodyBytes.length, lockData.version.length, lockData.status.length, lockData.message.length]; + headers.forEach(header => { + params.push(header[0].length); + params.push(header[1].length); + }); + + http_circuit = await circomkit.WitnessTester(`Extract`, { + file: `main/http_${httpCircuitName}_test`, + template: "LockHTTPResponse", + params: params, + }); + console.log("#constraints:", await http_circuit.getConstraintCount()); + + // match circuit output to original JSON value + const circuitInput: any = { + data: inputHttp, + version: toByte(lockData.version), + status: toByte(lockData.status), + message: toByte(lockData.message), + }; + + headers.forEach((header, index) => { + circuitInput[`header${index + 1}`] = toByte(header[0]); + circuitInput[`value${index + 1}`] = toByte(header[1]); + }); + + await http_circuit.expectPass(circuitInput, { body: http.bodyBytes }); + }); }); \ No newline at end of file diff --git a/circuits/test/json/extractor/extractor.test.ts b/circuits/test/json/extractor/extractor.test.ts index ec9860c..f5db3b9 100644 --- a/circuits/test/json/extractor/extractor.test.ts +++ b/circuits/test/json/extractor/extractor.test.ts @@ -197,4 +197,36 @@ describe("ExtractValueArrayObject", () => { await circuit.expectPass({ data: input, key1: keyUnicode[0], key3: keyUnicode[2] }, { value: num }); }); +}); + +describe("spotify_top_artists_json", async () => { + let json_circuit: WitnessTester<["data", "key1", "key2", "key4", "key5"], ["value"]>; + + it("response matcher", async () => { + let jsonFilename = "spotify"; + + await executeCodegen(`${jsonFilename}_test`, `${jsonFilename}.json`, `${jsonFilename}.json`); + + let index_0 = 0; + + let [inputJson, key, output] = readJSONInputFile( + `${jsonFilename}.json`, + [ + "data", + "items", + index_0, + "profile", + "name" + ] + ); + + json_circuit = await circomkit.WitnessTester(`Extract`, { + file: `main/json_${jsonFilename}_test`, + template: "ExtractStringValue", + params: [inputJson.length, 5, 4, 0, 5, 1, index_0, 2, 7, 3, 4, 4, 12], + }); + console.log("#constraints:", await json_circuit.getConstraintCount()); + + await json_circuit.expectPass({ data: inputJson, key1: key[0], key2: key[1], key4: key[3], key5: key[4] }, { value: output }); + }); }); \ No newline at end of file diff --git a/circuits/test/spotify_top_artists.test.ts b/circuits/test/spotify_top_artists.test.ts index 208cfd4..ef4f491 100644 --- a/circuits/test/spotify_top_artists.test.ts +++ b/circuits/test/spotify_top_artists.test.ts @@ -32,80 +32,6 @@ async function extendedLockfileCodegen(circuitName: string, inputFileName: strin }) } -// describe("spotify top artists separate", async () => { -// let http_circuit: WitnessTester<["data", "version", "status", "message", "header1", "value1"], ["body"]>; -// let json_circuit: WitnessTester<["data", "key1", "key2", "key4", "key5"], ["value"]>; - -// it("POST response body extraction", async () => { -// let httpLockfile = "spotify.lock" -// let httpInputFile = "spotify_top_artists_response.http"; -// let httpCircuitName = "spotify_top_artists"; - -// await httpLockfileCodegen(httpCircuitName, httpInputFile, `${httpLockfile}.json`); - -// let jsonFilename = "spotify"; - -// await jsonLockfileCodegen(`${jsonFilename}_test`, `${jsonFilename}.json`, `${jsonFilename}.json`); - -// const lockData = readLockFile(`${httpLockfile}.json`); - -// const http = readHTTPInputFile(`${httpInputFile}`); -// const inputHttp = http.input; - -// const headers = getHttpHeaders(lockData); - -// const params = [inputHttp.length, http.bodyBytes.length, lockData.version.length, lockData.status.length, lockData.message.length]; -// headers.forEach(header => { -// params.push(header[0].length); -// params.push(header[1].length); -// }); - -// http_circuit = await circomkit.WitnessTester(`Extract`, { -// file: `main/http_${httpCircuitName}`, -// template: "LockHTTPResponse", -// params: params, -// }); -// console.log("#constraints:", await http_circuit.getConstraintCount()); - -// // match circuit output to original JSON value -// const circuitInput: any = { -// data: inputHttp, -// version: toByte(lockData.version), -// status: toByte(lockData.status), -// message: toByte(lockData.message), -// }; - -// headers.forEach((header, index) => { -// circuitInput[`header${index + 1}`] = toByte(header[0]); -// circuitInput[`value${index + 1}`] = toByte(header[1]); -// }); - -// await http_circuit.expectPass(circuitInput, { body: http.bodyBytes }); - -// let index_0 = 0; - -// let [inputJson, key, output] = readJSONInputFile( -// `${jsonFilename}.json`, -// [ -// "data", -// "items", -// index_0, -// "profile", -// "name" -// ] -// ); - -// json_circuit = await circomkit.WitnessTester(`Extract`, { -// file: `main/json_${jsonFilename}_test`, -// template: "ExtractStringValue", -// params: [inputJson.length, 5, 4, 0, 5, 1, index_0, 2, 7, 3, 4, 4, 12], -// }); -// console.log("#constraints:", await json_circuit.getConstraintCount()); - -// await json_circuit.expectPass({ data: inputJson, key1: key[0], key2: key[1], key4: key[3], key5: key[4] }, { value: output }); -// }); -// }); - interface JsonLockfile { keys: any[], valueType: string, From f358bb040c42599ae5fd6614999a6c7dbdcb82b8 Mon Sep 17 00:00:00 2001 From: lonerapier Date: Thu, 26 Sep 2024 15:47:38 +0530 Subject: [PATCH 10/10] fix: tests --- circuits/test/http/interpreter.test.ts | 6 ++-- .../test/json/extractor/interpreter.test.ts | 35 +++++++++---------- 2 files changed, 19 insertions(+), 22 deletions(-) diff --git a/circuits/test/http/interpreter.test.ts b/circuits/test/http/interpreter.test.ts index 5986eac..24113c7 100644 --- a/circuits/test/http/interpreter.test.ts +++ b/circuits/test/http/interpreter.test.ts @@ -3,7 +3,7 @@ import { readHTTPInputFile } from "../common/http"; describe("HTTP :: Interpreter", async () => { describe("MethodMatch", async () => { - let circuit: WitnessTester<["data", "method", "r", "index"], []>; + let circuit: WitnessTester<["data", "method", "index"], []>; function generatePassCase(input: number[], method: number[], index: number, desc: string) { const description = generateDescription(input); @@ -16,7 +16,7 @@ describe("HTTP :: Interpreter", async () => { }); console.log("#constraints:", await circuit.getConstraintCount()); - await circuit.expectPass({ data: input, method: method, r: 100, index: index }, {}); + await circuit.expectPass({ data: input, method: method, index: index }, {}); }); } @@ -31,7 +31,7 @@ describe("HTTP :: Interpreter", async () => { }); console.log("#constraints:", await circuit.getConstraintCount()); - await circuit.expectFail({ data: input, method: method, r: 100, index: index }); + await circuit.expectFail({ data: input, method: method, index: index }); }); } diff --git a/circuits/test/json/extractor/interpreter.test.ts b/circuits/test/json/extractor/interpreter.test.ts index 43e1762..bf6a220 100644 --- a/circuits/test/json/extractor/interpreter.test.ts +++ b/circuits/test/json/extractor/interpreter.test.ts @@ -269,7 +269,7 @@ describe("Interpreter", async () => { }); describe("KeyMatch", async () => { - let circuit: WitnessTester<["data", "key", "r", "index", "parsing_key"], ["out"]>; + let circuit: WitnessTester<["data", "key", "index", "parsing_key"], ["out"]>; function generatePassCase(input: any, expected: any, desc: string) { const description = generateDescription(input); @@ -287,30 +287,28 @@ describe("Interpreter", async () => { } let input = readJSONInputFile("value_array_object.json", ["a"]); - const concatenatedInput = input[1][0].concat(input[0]); - const hashResult = PoseidonModular(concatenatedInput); let output = { out: 1 }; - let input1 = { data: input[0], key: input[1][0], r: hashResult, index: 2, parsing_key: 1 }; + let input1 = { data: input[0], key: input[1][0], index: 2, parsing_key: 1 }; generatePassCase(input1, output, ""); - let input2 = { data: input[0], key: [99], r: hashResult, index: 20, parsing_key: 1 }; + let input2 = { data: input[0], key: [99], index: 20, parsing_key: 1 }; generatePassCase(input2, output, ""); // fail cases - let input3 = { data: input[0], key: input[1][0], r: hashResult, index: 3, parsing_key: 1 }; + let input3 = { data: input[0], key: input[1][0], index: 3, parsing_key: 1 }; generatePassCase(input3, { out: 0 }, "wrong index"); - let input4 = { data: input[0], key: [98], r: hashResult, index: 2, parsing_key: 1 }; + let input4 = { data: input[0], key: [98], index: 2, parsing_key: 1 }; generatePassCase(input4, { out: 0 }, "wrong key"); - let input5 = { data: input[0], key: [97], r: hashResult, index: 2, parsing_key: 0 }; + let input5 = { data: input[0], key: [97], index: 2, parsing_key: 0 }; generatePassCase(input5, { out: 0 }, "not parsing key"); }); describe("KeyMatchAtDepth", async () => { - let circuit: WitnessTester<["data", "key", "r", "index", "parsing_key", "stack"], ["out"]>; + let circuit: WitnessTester<["data", "key", "index", "parsing_key", "stack"], ["out"]>; function generatePassCase(input: any, expected: any, depth: number, desc: string) { const description = generateDescription(input); @@ -328,33 +326,32 @@ describe("Interpreter", async () => { } let input = readJSONInputFile("value_array_object.json", ["a", 0, "b", 0]); - const concatenatedInput = input[1][0].concat(input[0]); - const hashResult = PoseidonModular(concatenatedInput); let output = { out: 1 }; - let input1 = { data: input[0], key: input[1][0], r: hashResult, index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; + let input1 = { data: input[0], key: input[1][0], index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; generatePassCase(input1, output, 0, ""); - let input2 = { data: input[0], key: input[1][2], r: hashResult, index: 8, parsing_key: 1, stack: [[1, 1], [2, 0], [1, 0], [0, 0]] }; + let input2 = { data: input[0], key: input[1][2], index: 8, parsing_key: 1, stack: [[1, 1], [2, 0], [1, 0], [0, 0]] }; generatePassCase(input2, output, 2, ""); - let input3 = { data: input[0], key: [99], r: hashResult, index: 20, parsing_key: 1, stack: [[1, 1], [2, 1], [1, 1], [0, 0]] }; - generatePassCase(input3, { out: 1 }, 2, "wrong stack"); + let input3 = { data: input[0], key: [99], index: 20, parsing_key: 1, stack: [[1, 1], [2, 1], [1, 1], [0, 0]] }; + generatePassCase(input3, output, 2, "wrong stack"); // fail cases - let input4 = { data: input[0], key: input[1][1], r: hashResult, index: 3, parsing_key: 1, stack: [[1, 0], [2, 0], [1, 0], [0, 0]] }; + let input4 = { data: input[0], key: input[1][1], index: 3, parsing_key: 1, stack: [[1, 0], [2, 0], [1, 0], [0, 0]] }; generatePassCase(input4, { out: 0 }, 2, "wrong key"); - let input5 = { data: input[0], key: [97], r: hashResult, index: 12, parsing_key: 0, stack: [[1, 1], [2, 0], [1, 1], [0, 0]] }; + let input5 = { data: input[0], key: [97], index: 12, parsing_key: 0, stack: [[1, 1], [2, 0], [1, 1], [0, 0]] }; generatePassCase(input5, { out: 0 }, 3, "not parsing key"); let input6Data = input[0].slice(0); - let input6 = { data: input6Data.splice(1, 1, 35), key: input[1][0], r: hashResult, index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; + input6Data.splice(1, 1, 35); + let input6 = { data: input6Data, key: input[1][0], index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; generatePassCase(input6, { out: 0 }, 0, "invalid key (not surrounded by quotes)"); - let input7 = { data: input[0], key: input[1][0], r: hashResult, index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; + let input7 = { data: input[0], key: input[1][0], index: 2, parsing_key: 1, stack: [[1, 0], [0, 0], [0, 0], [0, 0]] }; generatePassCase(input6, { out: 0 }, 1, "wrong depth"); }); }); \ No newline at end of file