Skip to content

Commit

Permalink
feat: stack machine parsing (#18)
Browse files Browse the repository at this point in the history
  • Loading branch information
Autoparallel authored Aug 22, 2024
1 parent 9b6f694 commit 9ffb44f
Show file tree
Hide file tree
Showing 37 changed files with 1,741 additions and 1,055 deletions.
3 changes: 3 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,11 @@

node_modules/*

# Circomkit generated
build/*
ptau/*
circuits/test/*.circom
circuits/main/*

# Rust generated
inputs/**/*.json
52 changes: 34 additions & 18 deletions circuits.json
Original file line number Diff line number Diff line change
@@ -1,50 +1,66 @@
{
"test_extract": {
"extract": {
"file": "extract",
"template": "Extract",
"params": [
4,
21
157,
13
]
},
"test_extract_two_key": {
"value_string": {
"file": "extract",
"template": "Extract",
"params": [
4,
40
12,
1
]
},
"test_extract_depth": {
"value_number": {
"file": "extract",
"template": "Extract",
"params": [
4,
64
12,
2
]
},
"test_extract_sambhav": {
"value_array": {
"file": "extract",
"template": "Extract",
"params": [
4,
105
18,
2
]
},
"test_extract_hard": {
"value_array_nested": {
"file": "extract",
"template": "Extract",
"params": [
4,
48
24,
4
]
},
"extract": {
"value_array_object": {
"file": "extract",
"template": "Extract",
"params": [
25,
4
]
},
"value_array_object_array": {
"file": "extract",
"template": "Extract",
"params": [
31,
5
]
},
"value_object": {
"file": "extract",
"template": "Extract",
"params": [
10,
787
21,
3
]
}
}
58 changes: 0 additions & 58 deletions circuits/bytes.circom

This file was deleted.

65 changes: 32 additions & 33 deletions circuits/extract.circom
Original file line number Diff line number Diff line change
@@ -1,54 +1,53 @@
pragma circom 2.1.9;

include "bytes.circom";
include "operators.circom";
include "utils.circom";
include "parser.circom";

template Extract(KEY_BYTES, DATA_BYTES) {
signal input key[KEY_BYTES];
template Extract(DATA_BYTES, MAX_STACK_HEIGHT) {
signal input data[DATA_BYTES];
signal output KeyMatches[DATA_BYTES - KEY_BYTES];

// TODO: Add assertions on the inputs here!

//--------------------------------------------------------------------------------------------//
//-CONSTRAINTS--------------------------------------------------------------------------------//
//--------------------------------------------------------------------------------------------//
// Working with a single key for now to do substring matching
component keyASCII = ASCII(KEY_BYTES);
keyASCII.in <== key;

//--------------------------------------------------------------------------------------------//
component dataASCII = ASCII(DATA_BYTES);
dataASCII.in <== data;
//--------------------------------------------------------------------------------------------//
// Initialze the parser
component State[DATA_BYTES];
State[0] = StateUpdate();
State[0].byte <== data[0];
State[0].tree_depth <== 0;
State[0].parsing_key <== 0;
State[0].inside_key <== 0;
State[0].parsing_value <== 0;
State[0].inside_value <== 0;

for(var data_pointer = 1; data_pointer < DATA_BYTES; data_pointer++) {
State[data_pointer] = StateUpdate();
State[data_pointer].byte <== data[data_pointer];
State[data_pointer].tree_depth <== State[data_pointer - 1].next_tree_depth;
State[data_pointer].parsing_key <== State[data_pointer - 1].next_parsing_key;
State[data_pointer].inside_key <== State[data_pointer - 1].next_inside_key;
State[data_pointer].parsing_value <== State[data_pointer - 1].next_parsing_value;
State[data_pointer].inside_value <== State[data_pointer - 1].next_inside_value;
State[0] = StateUpdate(MAX_STACK_HEIGHT);
State[0].byte <== data[0];
for(var i = 0; i < MAX_STACK_HEIGHT; i++) {
State[0].stack[i] <== [0,0];
}
State[0].parsing_string <== 0;
State[0].parsing_number <== 0;

for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) {
State[data_idx] = StateUpdate(MAX_STACK_HEIGHT);
State[data_idx].byte <== data[data_idx];
State[data_idx].stack <== State[data_idx - 1].next_stack;
State[data_idx].parsing_string <== State[data_idx - 1].next_parsing_string;
State[data_idx].parsing_number <== State[data_idx - 1].next_parsing_number;

// Debugging
log("State[", data_pointer, "].tree_depth", "= ", State[data_pointer].tree_depth);
log("State[", data_pointer, "].parsing_key", "= ", State[data_pointer].parsing_key);
log("State[", data_pointer, "].inside_key", "= ", State[data_pointer].inside_key);
log("State[", data_pointer, "].parsing_value", "= ", State[data_pointer].parsing_value);
log("State[", data_pointer, "].inside_value", "= ", State[data_pointer].inside_value);
log("---");
for(var i = 0; i<MAX_STACK_HEIGHT; i++) {
log("State[", data_idx, "].stack[", i,"] ", "= [",State[data_idx].stack[i][0], "][", State[data_idx].stack[i][1],"]" );
}
log("State[", data_idx, "].parsing_string", "= ", State[data_idx].parsing_string);
log("State[", data_idx, "].parsing_number", "= ", State[data_idx].parsing_number);
log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
}

// Constrain to have valid JSON (TODO: more is needed)
State[DATA_BYTES - 1].next_tree_depth === 0;
// State[DATA_BYTES - 1].next_tree_depth === 0;

// Debugging
for(var i = 0; i < MAX_STACK_HEIGHT; i++) {
log("State[", DATA_BYTES, "].stack[", i,"] ", "= [",State[DATA_BYTES -1].next_stack[i][0], "][", State[DATA_BYTES - 1].next_stack[i][1],"]" );
}
log("State[", DATA_BYTES, "].parsing_string", "= ", State[DATA_BYTES-1].next_parsing_string);
log("State[", DATA_BYTES, "].parsing_number", "= ", State[DATA_BYTES-1].next_parsing_number);
log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
}
42 changes: 42 additions & 0 deletions circuits/language.circom
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
pragma circom 2.1.9;

template Syntax() {
//-Delimeters---------------------------------------------------------------------------------//
// - ASCII char: `{`
signal output START_BRACE <== 123;
// - ASCII char: `}`
signal output END_BRACE <== 125;
// - ASCII char `[`
signal output START_BRACKET <== 91;
// - ASCII char `]`
signal output END_BRACKET <== 93;
// - ASCII char `"`
signal output QUOTE <== 34;
// - ASCII char `:`
signal output COLON <== 58;
// - ASCII char `,`
signal output COMMA <== 44;
//-White_space--------------------------------------------------------------------------------//
// - ASCII char: `\n`
signal output NEWLINE <== 10;
// - ASCII char: ` `
signal output SPACE <== 32;
//-Escape-------------------------------------------------------------------------------------//
// - ASCII char: `\`
signal output ESCAPE <== 92;
//-Number_Remapping---------------------------------------------------------------------------//
signal output NUMBER <== 256; // past a u8 -- reserved for ANY numerical ASCII (48 - 57)
}

template Command() {
// STATE = [read_write_value, parsing_string, parsing_number]
signal output NOTHING[3] <== [0, 0, -1 ]; // Command returned by switch if we want to do nothing, e.g. read a whitespace char while looking for a key
signal output START_BRACE[3] <== [1, 0, 0 ]; // Command returned by switch if we hit a start brace `{`
signal output END_BRACE[3] <== [-1, 0, -1 ]; // Command returned by switch if we hit a end brace `}`
signal output START_BRACKET[3] <== [2, 0, 0 ]; // Command returned by switch if we hit a start bracket `[`
signal output END_BRACKET[3] <== [-2, 0, -1 ]; // Command returned by switch if we hit a start bracket `]`
signal output QUOTE[3] <== [0, 1, 0 ]; // Command returned by switch if we hit a quote `"`
signal output COLON[3] <== [3, 0, 0 ]; // Command returned by switch if we hit a colon `:`
signal output COMMA[3] <== [4, 0, -1 ]; // Command returned by switch if we hit a comma `,`
signal output NUMBER[3] <== [256, 0, 1 ]; // Command returned by switch if we hit some decimal number (e.g., ASCII 48-57)
}
Loading

0 comments on commit 9ffb44f

Please sign in to comment.