Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

tests/refactor: state update and improved JSON parsing #11

Merged
merged 24 commits into from
Aug 15, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
24 changes: 24 additions & 0 deletions circuits.json
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,30 @@
21
]
},
"test_extract_two_key": {
"file": "extract",
"template": "Extract",
"params": [
4,
40
]
},
"test_extract_depth": {
"file": "extract",
"template": "Extract",
"params": [
4,
64
]
},
"test_extract_sambhav": {
"file": "extract",
"template": "Extract",
"params": [
4,
105
]
},
"test_extract_hard": {
"file": "extract",
"template": "Extract",
Expand Down
35 changes: 14 additions & 21 deletions circuits/extract.circom
Original file line number Diff line number Diff line change
Expand Up @@ -24,35 +24,28 @@ template Extract(KEY_BYTES, DATA_BYTES) {
// Initialze the parser
component State[DATA_BYTES];
State[0] = StateUpdate();
State[0].byte <== data[0];
State[0].tree_depth <== 0;
State[0].parsing_to_key <== 1; // Initialize by saying we are parsing to the first key
State[0].inside_key <== 0;
State[0].parsing_to_value <== 0;
State[0].inside_value <== 0;
State[0].escaping <== 0;
State[0].end_of_kv <== 0;
State[0].byte <== data[0];
State[0].tree_depth <== 0;
State[0].parsing_key <== 0;
State[0].inside_key <== 0;
State[0].parsing_value <== 0;
State[0].inside_value <== 0;

for(var data_pointer = 1; data_pointer < DATA_BYTES; data_pointer++) {
State[data_pointer] = StateUpdate();
State[data_pointer].byte <== data[data_pointer];
State[data_pointer].tree_depth <== State[data_pointer - 1].next_tree_depth;
State[data_pointer].parsing_to_key <== State[data_pointer - 1].next_parsing_to_key;
State[data_pointer].inside_key <== State[data_pointer - 1].next_inside_key;
State[data_pointer].parsing_to_value <== State[data_pointer - 1].next_parsing_to_value;
State[data_pointer].inside_value <== State[data_pointer - 1].next_inside_value;
State[data_pointer].end_of_kv <== State[data_pointer - 1].next_end_of_kv;
// TODO: For the next state, we should use `next_`, this is only to make this compile for now.
State[data_pointer].escaping <== State[data_pointer - 1].escaping;

State[data_pointer].byte <== data[data_pointer];
State[data_pointer].tree_depth <== State[data_pointer - 1].next_tree_depth;
State[data_pointer].parsing_key <== State[data_pointer - 1].next_parsing_key;
State[data_pointer].inside_key <== State[data_pointer - 1].next_inside_key;
State[data_pointer].parsing_value <== State[data_pointer - 1].next_parsing_value;
State[data_pointer].inside_value <== State[data_pointer - 1].next_inside_value;

// Debugging
log("State[", data_pointer, "].tree_depth", "= ", State[data_pointer].tree_depth);
log("State[", data_pointer, "].parsing_to_key", "= ", State[data_pointer].parsing_to_key);
log("State[", data_pointer, "].parsing_key", "= ", State[data_pointer].parsing_key);
log("State[", data_pointer, "].inside_key", "= ", State[data_pointer].inside_key);
log("State[", data_pointer, "].parsing_to_value", "= ", State[data_pointer].parsing_to_value);
log("State[", data_pointer, "].parsing_value", "= ", State[data_pointer].parsing_value);
log("State[", data_pointer, "].inside_value", "= ", State[data_pointer].inside_value);
log("State[", data_pointer, "].end_of_kv", "= ", State[data_pointer].end_of_kv);
log("---");
}

Expand Down
20 changes: 20 additions & 0 deletions circuits/operators.circom
Original file line number Diff line number Diff line change
Expand Up @@ -123,3 +123,23 @@ template Contains(n) {
// Apply `not` to this by 1-x
out <== 1 - someEqual.out;
}

template ArrayAdd(n) {
signal input lhs[n];
signal input rhs[n];
signal output out[n];

for(var i = 0; i < n; i++) {
out[i] <== lhs[i] + rhs[i];
}
}

template ArrayMul(n) {
signal input lhs[n];
signal input rhs[n];
signal output out[n];

for(var i = 0; i < n; i++) {
out[i] <== lhs[i] * rhs[i];
}
}
171 changes: 105 additions & 66 deletions circuits/parser.circom
Original file line number Diff line number Diff line change
Expand Up @@ -22,40 +22,30 @@ State[20]| " | COMPLETE WITH KV PARSING
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
State[20].next_tree_depth == 0 | VALID JSON
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx



Notes:
- If there is no comma after leaving a value, then we should not be parsing to key. If anything breaks here, JSON was bad.
*/

/*
TODO
*/
template StateUpdate() {
signal input byte;

signal input tree_depth; // STATUS_INDICATOR -- how deep in a JSON branch we are, e.g., `user.balance.value` key should be at depth `3`.
// Should always be greater than or equal to `0` (TODO: implement this constraint).
signal input byte;

signal input parsing_to_key; // BIT_FLAG -- whether we are currently parsing bytes until we find the next key (mutally exclusive with `inside_key` and both `*_value flags).
signal input inside_key; // BIT_FLAG -- whether we are currently inside a key (mutually exclusive with `parsing_to_key` and both `*_value` flags).

signal input parsing_to_value; // BIT_FLAG -- whether we are currently parsing bytes until we find the next value (mutually exclusive with `inside_value` and both `*_key` flags).
signal input inside_value; // BIT_FLAG -- whether we are currently inside a value (mutually exclusive with `parsing_to_value` and both `*_key` flags).

signal input escaping; // BIT_FLAG -- whether we have hit an escape ASCII symbol inside of a key or value.
signal input tree_depth; // STATUS_INDICATOR -- how deep in a JSON branch we are, e.g., `user.balance.value` key should be at depth `3`.
// constrainted to be greater than or equal to `0`.
signal input parsing_key; // BIT_FLAG -- whether we are currently parsing bytes until we find the next key (mutally exclusive with `inside_key` and both `*_value flags).
signal input inside_key; // BIT_FLAG -- whether we are currently inside a key (mutually exclusive with `parsing_key` and both `*_value` flags).
signal input parsing_value; // BIT_FLAG -- whether we are currently parsing bytes until we find the next value (mutually exclusive with `inside_value` and both `*_key` flags).
signal input inside_value; // BIT_FLAG -- whether we are currently inside a value (mutually exclusive with `parsing_value` and both `*_key` flags).

signal input end_of_kv; // BIT_FLAG -- reached end of key-value sequence, looking for comma delimiter or end of file signified by `tree_depth == 0`.
signal output next_tree_depth; // STATUS_INDICATOR -- next state for `tree_depth`.
signal output next_parsing_key; // BIT_FLAG -- next state for `parsing_key`.
signal output next_inside_key; // BIT_FLAG -- next state for `inside_key`.
signal output next_parsing_value; // BIT_FLAG -- next state for `parsing_value`.
signal output next_inside_value; // BIT_FLAG -- next state for `inside_value`.

signal output next_tree_depth; // BIT_FLAG -- next state for `tree_depth`.
signal output next_parsing_to_key; // BIT_FLAG -- next state for `parsing_to_key`.
signal output next_inside_key; // BIT_FLAG -- next state for `inside_key`.
signal output next_parsing_to_value; // BIT_FLAG -- next state for `parsing_to_value`.
signal output next_inside_value; // BIT_FLAG -- next state for `inside_value`.
signal output next_end_of_kv; // BIT_FLAG -- next state for `end_of_kv`.

// signal output escaping; // TODO: Add this in!
// TODO: Add this in!
// signal input escaping; // BIT_FLAG -- whether we have hit an escape ASCII symbol inside of a key or value.
// signal output escaping;

//--------------------------------------------------------------------------------------------//
//-Delimeters---------------------------------------------------------------------------------//
Expand Down Expand Up @@ -85,51 +75,67 @@ template StateUpdate() {
var escape = 92;
//--------------------------------------------------------------------------------------------//

//--------------------------------------------------------------------------------------------//
//-MACHINE INSTRUCTIONS-----------------------------------------------------------------------//
// TODO: ADD CASE FOR `is_number` for in range 48-57 https://www.ascii-code.com since a value may just be a number
// Output management
component matcher = Switch(8, 3);
var do_nothing[3] = [ 0, 0, 0]; // Command returned by switch if we want to do nothing, e.g. read a whitespace char while looking for a key
var increase_depth[3] = [ 1, 0, 0]; // Command returned by switch if we hit a start brace `{`
var decrease_depth[3] = [-1, 0, 0]; // Command returned by switch if we hit a end brace `}`
var hit_quote[3] = [ 0, 1, 0]; // Command returned by switch if we hit a quote `"`
var hit_colon[3] = [ 0, 0, 1]; // Command returned by switch if we hit a colon `:`

matcher.branches <== [start_brace, end_brace, quote, colon, start_bracket, end_bracket, comma, escape ];
matcher.vals <== [increase_depth, decrease_depth, hit_quote, hit_colon, do_nothing, do_nothing, do_nothing, do_nothing];
matcher.case <== byte;


// TODO: These could likely go into a switch statement with the output of the `Switch` above.
// TODO: Also could probably clean up things with de Morgan's laws or whatever.
// An `IF ELSE` template would also be handy!
next_inside_key <== inside_key + (parsing_to_key - inside_key) * matcher.out[1]; // IF (`parsing_to_key` AND `hit_quote`) THEN `next_inside_key <== 1` ELSEIF (`inside_key` AND `hit_quote`) THEN `next_inside_key <== 0`
// - note: can rewrite as -> `inside_key * (1-matcher.out[1]) + parsing_to_key * matcher.out[1]`, but this will not be quadratic (according to circom)
next_parsing_to_key <== parsing_to_key * (1 - matcher.out[1]); // IF (`parsing_to_key` AND `hit_quote`) THEN `parsing_to_key <== 0`

next_inside_value <== inside_value + (parsing_to_value - inside_value) * matcher.out[1]; // IF (`parsing_to_value` AND `hit_quote`) THEN `next_inside_value <== 1` ELSEIF (`inside_value` AND `hit_quote`) THEN `next_inside_value <==0`
// -note: can rewrite as -> `(1 - inside_value) * matcher_out[1] + parsing_to_value * matcher.out[1]
//--------------------------------------------------------------------------------------------//
//-Instructions for ASCII---------------------------------------------------------------------//
var state[5] = [tree_depth, parsing_key, inside_key, parsing_value, inside_value];
var do_nothing[5] = [ 0, 0, 0, 0, 0 ]; // Command returned by switch if we want to do nothing, e.g. read a whitespace char while looking for a key
var hit_start_brace[5] = [ 1, 1, 0, -1, 0 ]; // Command returned by switch if we hit a start brace `{`
var hit_end_brace[5] = [-1, 0, 0, 0, 0 ]; // Command returned by switch if we hit a end brace `}`
var hit_quote[5] = [ 0, 0, 1, 0, 1 ]; // Command returned by switch if we hit a quote `"`
var hit_colon[5] = [ 0, -1, 0, 1, 0 ]; // Command returned by switch if we hit a colon `:`
var hit_comma[5] = [ 0, 1, 0, -1, 0 ]; // Command returned by switch if we hit a comma `,`
//--------------------------------------------------------------------------------------------//

signal NOT_PARSING_TO_KEY_AND_NOT_INSIDE_KEY <== (1 - parsing_to_key) * (1 - inside_key); // (NOT `parsing_to_key`) AND (NOT `inside_key`)
signal PARSING_TO_VALUE_AND_NOT_HIT_QUOTE <== parsing_to_value * (1 - matcher.out[1]); // `parsing_to_value` AND (NOT `hit_quote`)
next_parsing_to_value <== PARSING_TO_VALUE_AND_NOT_HIT_QUOTE + NOT_PARSING_TO_KEY_AND_NOT_INSIDE_KEY * matcher.out[2]; // IF (`parsing_to_value` AND (NOT `hit_quote`)) THEN `next_parsing_to_value <== 1 ELSEIF ((NOT `parsing_to_value` AND (NOT `inside_value)) AND `hit_colon`) THEN `next_parsing_to_value <== 1`

signal NOT_PARSING_TO_VALUE_AND_NOT_INSIDE_VALUE <== (1 - parsing_to_value) * (1 - inside_value); // (NOT `parsing_to_value`) AND (NOT `inside_value`)
next_end_of_kv <== NOT_PARSING_TO_KEY_AND_NOT_INSIDE_KEY * NOT_PARSING_TO_VALUE_AND_NOT_INSIDE_VALUE; // IF ((NOT `parsing_to_key`) AND (NOT `inside_key`)) AND (NOT(`parsing_to_value`) AND NOT( `inside_value)) THEN `next_end_of_kv <== 1`


// TODO: Assert this never goes below zero (mod p)
next_tree_depth <== tree_depth + (parsing_to_key + next_end_of_kv) * matcher.out[0]; // IF ((`parsing_to_key` OR `next_end_of_kv`) AND `read_brace` THEN `increase/decrease_depth`
//--------------------------------------------------------------------------------------------//
//-State machine updating---------------------------------------------------------------------//
// * yield instruction based on what byte we read *
component matcher = Switch(5, 5);
Autoparallel marked this conversation as resolved.
Show resolved Hide resolved
matcher.branches <== [start_brace, end_brace, quote, colon, comma ];
matcher.vals <== [hit_start_brace, hit_end_brace, hit_quote, hit_colon, hit_comma];
matcher.case <== byte;
// * get the instruction mask based on current state *
component mask = StateToMask();
mask.state <== state;
// * multiply the mask array elementwise with the instruction array *
component mulMaskAndOut = ArrayMul(5);
mulMaskAndOut.lhs <== mask.mask;
mulMaskAndOut.rhs <== matcher.out;
// * add the masked instruction to the state to get new state *
component addToState = ArrayAdd(5);
addToState.lhs <== state;
addToState.rhs <== mulMaskAndOut.out;
// * set the new state *
next_tree_depth <== addToState.out[0];
next_parsing_key <== addToState.out[1];
next_inside_key <== addToState.out[2];
next_parsing_value <== addToState.out[3];
next_inside_value <== addToState.out[4];
//--------------------------------------------------------------------------------------------//

// Constrain bit flags
next_parsing_to_key * (1 - next_parsing_to_key) === 0; // - constrain that `next_parsing_to_key` remain a bit flag
next_inside_key * (1 - next_inside_key) === 0; // - constrain that `next_inside_key` remain a bit flag
next_parsing_to_value * (1 - next_parsing_to_value) === 0; // - constrain that `next_parsing_to_value` remain a bit flag
next_inside_value * (1 - next_inside_value) === 0; // - constrain that `next_inside_value` remain a bit flag
next_end_of_kv * (1 - next_end_of_kv) === 0; // - constrain that `next_end_of_kv` remain a bit flag
//--------------------------------------------------------------------------------------------//
// // DEBUGGING: internal state
// for(var i = 0; i<5; i++) {
// log("-----------------------");
// log("mask[",i,"]: ", mask.mask[i]);
// log("mulMaskAndOut[",i,"]:", mulMaskAndOut.out[i]);
// log("state[",i,"]: ", state[i]);
// log("next_state[",i,"]: ", addToState.out[i]);
// }
//--------------------------------------------------------------------------------------------//

// TODO: Can hit comma and then be sent to next KV, so comma will engage `parsing_to_key`
//--------------------------------------------------------------------------------------------//
//-Constraints--------------------------------------------------------------------------------//
// * constrain bit flags *
next_parsing_key * (1 - next_parsing_key) === 0; // - constrain that `next_parsing_key` remain a bit flag
next_inside_key * (1 - next_inside_key) === 0; // - constrain that `next_inside_key` remain a bit flag
next_parsing_value * (1 - next_parsing_value) === 0; // - constrain that `next_parsing_value` remain a bit flag
next_inside_value * (1 - next_inside_value) === 0; // - constrain that `next_inside_value` remain a bit flag
// * constrain `tree_depth` to never hit -1 (TODO: should always moves in 1 bit increments?)
component isMinusOne = IsEqual();
isMinusOne.in[0] <== -1;
isMinusOne.in[1] <== next_tree_depth;
Autoparallel marked this conversation as resolved.
Show resolved Hide resolved
isMinusOne.out === 0;
//--------------------------------------------------------------------------------------------//
}

Expand Down Expand Up @@ -177,4 +183,37 @@ template Switch(m, n) {
match <== matchChecker.out;

out <== sum;
}

// TODO: Note at the moment mask 2 and 4 are the same, so this can be removed if it maintains.
template StateToMask() {
signal input state[5];
signal output mask[5];

var tree_depth = state[0];
var parsing_key = state[1];
var inside_key = state[2];
var parsing_value = state[3];
var inside_value = state[4];

signal NOT_INSIDE_KEY_AND_NOT_INSIDE_VALUE <== (1 - inside_key) * (1 - inside_value);
signal NOT_PARSING_VALUE_NOT_INSIDE_VALUE <== (1 - parsing_value) * (1 - inside_value);

component init_tree = IsZero();
init_tree.in <== tree_depth;

// `tree_depth` can change: `IF (parsing_key XOR parsing_value XOR end_of_kv)`
mask[0] <== init_tree.out + parsing_key + parsing_value; // TODO: Make sure these are never both 1!
Autoparallel marked this conversation as resolved.
Show resolved Hide resolved

// `parsing_key` can change: `IF ((NOT inside_key) AND (NOT inside_value) AND (NOT parsing_value))`
Autoparallel marked this conversation as resolved.
Show resolved Hide resolved
mask[1] <== NOT_INSIDE_KEY_AND_NOT_INSIDE_VALUE;

// `inside_key` can change: `IF ((NOT parsing_value) AND (NOT inside_value) AND inside_key) THEN mask <== -1 ELSEIF (NOT parsing_value) AND (NOT inside_value) THEN mask <== 1`
mask[2] <== NOT_PARSING_VALUE_NOT_INSIDE_VALUE - 2 * inside_key;

// `parsing_value` can change: `IF ((NOT inside_key) AND (NOT inside_value) AND (tree_depth != 0))`
mask[3] <== NOT_INSIDE_KEY_AND_NOT_INSIDE_VALUE * (1 - init_tree.out);

// `inside_value` can change: `IF (parsing_value AND (NOT inside_value)) THEN mask <== 1 ELSEIF (inside_value) mask <== -1`
mask[4] <== parsing_value - 2 * inside_value;
}
Loading
Loading