Skip to content

Commit

Permalink
refactor: language.circom
Browse files Browse the repository at this point in the history
  • Loading branch information
Autoparallel committed Aug 13, 2024
1 parent 38683a3 commit 049d45b
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 138 deletions.
42 changes: 42 additions & 0 deletions circuits/language.circom
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
pragma circom 2.1.9;

template Syntax() {
//-Delimeters---------------------------------------------------------------------------------//
// - ASCII char: `{`
signal output START_BRACE <== 123;
// - ASCII char: `}`
signal output END_BRACE <== 125;
// - ASCII char `[`
signal output START_BRACKET <== 91;
// - ASCII char `]`
signal output END_BRACKET <== 93;
// - ASCII char `"`
signal output QUOTE <== 34;
// - ASCII char `:`
signal output COLON <== 58;
// - ASCII char `,`
signal output COMMA <== 44;
//-White_space--------------------------------------------------------------------------------//
// - ASCII char: `\n`
signal output NEWLINE <== 10;
// - ASCII char: ` `
signal output SPACE <== 32;
//-Escape-------------------------------------------------------------------------------------//
// - ASCII char: `\`
signal output ESCAPE <== 92;
//-Number_Remapping---------------------------------------------------------------------------//
signal output NUMBER <== 256; // past a u8 -- reserved for ANY numerical ASCII (48 - 57)
}

template Command() {
// STATE = [pushpop, stack_val, parsing_string, parsing_number]
signal output NOTHING[4] <== [0, 0, 0, 0 ]; // Command returned by switch if we want to do nothing, e.g. read a whitespace char while looking for a key
signal output START_BRACE[4] <== [1, 1, 0, 0 ]; // Command returned by switch if we hit a start brace `{`
signal output END_BRACE[4] <== [-1, 1, 0, 0 ]; // Command returned by switch if we hit a end brace `}`
signal output START_BRACKET[4] <== [1, 2, 0, 0 ]; // TODO: Might want `in_value` to toggle. Command returned by switch if we hit a start bracket `[` (TODO: could likely be combined with end bracket)
signal output END_BRACKET[4] <== [-1, 2, 0, 0 ]; // Command returned by switch if we hit a start bracket `]`
signal output QUOTE[4] <== [0, 0, 1, 0 ]; // TODO: Mightn ot want this to toglle `parsing_array`. Command returned by switch if we hit a quote `"`
signal output COLON[4] <== [1, 3, 0, 0 ]; // Command returned by switch if we hit a colon `:`
signal output COMMA[4] <== [-1, 4, 0, -1 ]; // Command returned by switch if we hit a comma `,`
signal output NUMBER[4] <== [0, 0, 0, 1 ]; // Command returned by switch if we hit some decimal number (e.g., ASCII 48-57)
}
97 changes: 13 additions & 84 deletions circuits/parser.circom
Original file line number Diff line number Diff line change
@@ -1,110 +1,41 @@
pragma circom 2.1.9;

include "utils.circom";
/*
Notes: for `test.json`
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
POINTER | Read In: | STATE
-------------------------------------------------
State[1] | { | PARSING TO KEY
-------------------------------------------------
State[7] | " | INSIDE KEY
-------------------------------------------------
State[12]| " | NOT INSIDE KEY
-------------------------------------------------
State[13]| : | PARSING TO VALUE
-------------------------------------------------
State[15]| " | INSIDE VALUE
-------------------------------------------------
State[19]| " | NOT INSIDE VALUE
-------------------------------------------------
State[20]| " | COMPLETE WITH KV PARSING
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
State[20].next_tree_depth == 0 | VALID JSON
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
*/
include "language.circom";

/*
JSON TYPES:
Number.
String.
Boolean.
Array.
Object.
Whitespace.
Null.
TODO: Might not need the "parsing object" and "parsing array" as these are kinda captured by the stack?
*/
template Delimeters() {
signal output START_BRACE <== 123;
}

template StateUpdate(MAX_STACK_HEIGHT) {
//--------------------------------------------------------------------------------------------//
//-Delimeters---------------------------------------------------------------------------------//
// - ASCII char: `{`
var start_brace = 123;
// - ASCII char: `}`
var end_brace = 125;
// - ASCII char `[`
var start_bracket = 91;
// - ASCII char `]`
var end_bracket = 93;
// - ASCII char `"`
var quote = 34;
// - ASCII char `:`
var colon = 58;
// - ASCII char `,`
var comma = 44;
//--------------------------------------------------------------------------------------------//
// White space
// - ASCII char: `\n`
var newline = 10;
// - ASCII char: ` `
var space = 32;
//--------------------------------------------------------------------------------------------//
// Escape
// - ASCII char: `\`
var escape = 92;
//--------------------------------------------------------------------------------------------//

signal input byte;

signal input pointer; // POINTER -- points to the stack to mark where we currently are inside the JSON.
signal input stack[MAX_STACK_HEIGHT]; // STACK -- how deep in a JSON nest we are and what type we are currently inside (e.g., `1` for object, `-1` for array).
signal input parsing_string;
signal input parsing_number;
// TODO
// signal parsing_boolean;
// signal parsing_null; // TODO
// signal parsing_null;

signal output next_pointer;
signal output next_stack[MAX_STACK_HEIGHT];
signal output next_parsing_string;
signal output next_parsing_number;
//--------------------------------------------------------------------------------------------//
//-Instructions for ASCII---------------------------------------------------------------------//

component Syntax = Syntax();
component Command = Command();

var pushpop = 0;
var stack_val = 0;
var parsing_state[4] = [pushpop, stack_val, parsing_string, parsing_number];
var do_nothing[4] = [0, 0, 0, 0 ]; // Command returned by switch if we want to do nothing, e.g. read a whitespace char while looking for a key
var hit_start_brace[4] = [1, 1, 0, 0 ]; // Command returned by switch if we hit a start brace `{`
var hit_end_brace[4] = [-1, 1, 0, 0 ]; // Command returned by switch if we hit a end brace `}`
var hit_start_bracket[4] = [1, 2, 0, 0 ]; // TODO: Might want `in_value` to toggle. Command returned by switch if we hit a start bracket `[` (TODO: could likely be combined with end bracket)
var hit_end_bracket[4] = [-1, 2, 0, 0 ]; // Command returned by switch if we hit a start bracket `]`
var hit_quote[4] = [0, 0, 1, 0 ]; // TODO: Mightn ot want this to toglle `parsing_array`. Command returned by switch if we hit a quote `"`
var hit_colon[4] = [1, 3, 0, 0 ]; // Command returned by switch if we hit a colon `:`
var hit_comma[4] = [-1, 4, 0, -1 ]; // Command returned by switch if we hit a comma `,`
var hit_number[4] = [0, 0, 0, 1 ]; // Command returned by switch if we hit some decimal number (e.g., ASCII 48-57)
//--------------------------------------------------------------------------------------------//

//--------------------------------------------------------------------------------------------//
//-State machine updating---------------------------------------------------------------------//
// * yield instruction based on what byte we read *
component matcher = SwitchArray(8, 4);
var number = 256; // Number beyond a byte to represent an ASCII numeral
matcher.branches <== [start_brace, end_brace, quote, colon, comma, start_bracket, end_bracket, number ];
matcher.vals <== [hit_start_brace, hit_end_brace, hit_quote, hit_colon, hit_comma, hit_start_bracket, hit_end_bracket, hit_number];
matcher.branches <== [Syntax.START_BRACE, Syntax.END_BRACE, Syntax.QUOTE, Syntax.COLON, Syntax.COMMA, Syntax.START_BRACKET, Syntax.END_BRACKET, Syntax.NUMBER ];
matcher.vals <== [Command.START_BRACE, Command.END_BRACE, Command.QUOTE, Command.COLON, Command.COMMA, Command.START_BRACKET, Command.END_BRACKET, Command.NUMBER];
component numeral_range_check = InRange(8);
numeral_range_check.in <== byte;
numeral_range_check.range <== [48, 57]; // ASCII NUMERALS
Expand All @@ -125,7 +56,7 @@ template StateUpdate(MAX_STACK_HEIGHT) {
newStack.pointer <== pointer;
newStack.stack <== stack;
newStack.pushpop <== addToState.out[0];
newStack.stack_val <== addToState.out[1];
newStack.stack_val <== addToState.out[1];
next_pointer <== newStack.next_pointer;
next_stack <== newStack.next_stack;
next_parsing_string <== addToState.out[2];
Expand Down Expand Up @@ -216,8 +147,6 @@ template RewriteStack(n) {
signal output next_pointer;
signal output next_stack[n];

component Delimeters = Delimeters();
var brace = Delimeters.START_BRACE;
/*
IDEA:
Expand Down Expand Up @@ -309,9 +238,9 @@ template RewriteStack(n) {
isPushAt[i] <== indicator[i].out * isPush.out;

// Leave the stack alone except for where we indicate change
second_pop_val[i] <== isPopAtPrev[i] * corrected_stack_val;
temp_val[i] <== corrected_stack_val + (1 + corrected_stack_val) * isDoublePop;
first_pop_val[i] <== isPopAt[i] * temp_val[i]; // = isPopAt[i] * (corrected_stack_val * (1 - isDoublePop) - 3 * isDoublePop)
second_pop_val[i] <== isPopAtPrev[i] * corrected_stack_val;
temp_val[i] <== corrected_stack_val + (1 + corrected_stack_val) * isDoublePop;
first_pop_val[i] <== isPopAt[i] * temp_val[i]; // = isPopAt[i] * (corrected_stack_val * (1 - isDoublePop) - 3 * isDoublePop)

next_stack[i] <== stack[i] + isPushAt[i] * corrected_stack_val - first_pop_val[i] - second_pop_val[i];

Expand Down
55 changes: 1 addition & 54 deletions circuits/utils.circom
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@ include "circomlib/circuits/bitify.circom";
include "circomlib/circuits/comparators.circom";

/*
All tests for this file are located in: `./test/operators.test.ts`
All tests for this file are located in: `./test/utils/utils.test.ts`
*/

template ASCII(n) {
Expand Down Expand Up @@ -109,59 +109,6 @@ template ArrayMul(n) {
}
}

// template LessThan(n) {
// assert(n <= 252);
// signal input in[2];
// signal output out;

// component n2b = Num2Bits(n+1);

// n2b.in <== in[0]+ (1<<n) - in[1];

// out <== 1-n2b.out[n];
// }



// // N is the number of bits the input have.
// // The MSF is the sign bit.
// template LessEqThan(n) {
// signal input in[2];
// signal output out;

// component lt = LessThan(n);

// lt.in[0] <== in[0];
// lt.in[1] <== in[1]+1;
// lt.out ==> out;
// }

// // N is the number of bits the input have.
// // The MSF is the sign bit.
// template GreaterThan(n) {
// signal input in[2];
// signal output out;

// component lt = LessThan(n);

// lt.in[0] <== in[1];
// lt.in[1] <== in[0];
// lt.out ==> out;
// }

// // N is the number of bits the input have.
// // The MSF is the sign bit.
// template GreaterEqThan(n) {
// signal input in[2];
// signal output out;

// component lt = LessThan(n);

// lt.in[0] <== in[1];
// lt.in[1] <== in[0]+1;
// lt.out ==> out;
// }

template InRange(n) {
signal input in;
signal input range[2];
Expand Down
34 changes: 34 additions & 0 deletions notes.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
# Notes

## JSON Types
- [x] Object
- [x] String
- [ ] Array (PARTIALLY COMPLETED, TODO: Need to parse internally)
- [ ] Number
- [ ] Boolean
- [ ] Null

## Expected Output
> This is old at this point, but we should update it.
```
Notes: for `test.json`
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
POINTER | Read In: | STATE
-------------------------------------------------
State[1] | { | PARSING TO KEY
-------------------------------------------------
State[7] | " | INSIDE KEY
-------------------------------------------------
State[12]| " | NOT INSIDE KEY
-------------------------------------------------
State[13]| : | PARSING TO VALUE
-------------------------------------------------
State[15]| " | INSIDE VALUE
-------------------------------------------------
State[19]| " | NOT INSIDE VALUE
-------------------------------------------------
State[20]| " | COMPLETE WITH KV PARSING
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
State[20].next_tree_depth == 0 | VALID JSON
xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
```

0 comments on commit 049d45b

Please sign in to comment.