Skip to content

Commit

Permalink
feat: parser state machine (#4)
Browse files Browse the repository at this point in the history
  • Loading branch information
Autoparallel authored Aug 7, 2024
1 parent 8e0318d commit 274d15a
Show file tree
Hide file tree
Showing 18 changed files with 710 additions and 41 deletions.
35 changes: 35 additions & 0 deletions .github/workflows/test.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
name: circom

on:
push:
branches: [ main ]
pull_request:
branches: [ main ]

jobs:
test:
runs-on: ubuntu-latest

steps:
- uses: actions/checkout@v3

- name: Use Node.js
uses: actions/setup-node@v3
with:
node-version: '16'

- name: Install dependencies
run: |
npm install
npm install -g snarkjs
- name: Download and install Circom
run: |
CIRCOM_VERSION=2.1.9
curl -L https://github.com/iden3/circom/releases/download/v$CIRCOM_VERSION/circom-linux-amd64 -o circom
chmod +x circom
sudo mv circom /usr/local/bin/
circom --version
- name: Run tests
run: npx mocha
5 changes: 5 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,11 @@ npx mocha
```
from the repository root.

To run specific tests, use the `-g` flag for `mocha`, e.g., to run any proof described with "State" we can pass:
```
npx mocha -g State
```


## (MOSTLY DEPRECATED DUE TO CIRCOMKIT) Running an example
```
Expand Down
16 changes: 16 additions & 0 deletions circuits.json
Original file line number Diff line number Diff line change
@@ -1,4 +1,20 @@
{
"test_extract": {
"file": "extract",
"template": "Extract",
"params": [
4,
21
]
},
"test_extract_hard": {
"file": "extract",
"template": "Extract",
"params": [
4,
48
]
},
"extract": {
"file": "extract",
"template": "Extract",
Expand Down
35 changes: 30 additions & 5 deletions circuits/bytes.circom
Original file line number Diff line number Diff line change
@@ -1,8 +1,25 @@
pragma circom 2.1.9;

// Converts a u8 number into a byte,
// verifying that this number does indeed fit into u8 (i.e., will fail if >256 is input)
// See: https://github.com/iden3/circomlib/blob/cff5ab6288b55ef23602221694a6a38a0239dcc0/circuits/bitify.circom
/*
All tests for this file are located in: `./test/bytes.test.ts`
Some of the functions here were based off the circomlib:
https://github.com/iden3/circomlib/blob/cff5ab6288b55ef23602221694a6a38a0239dcc0/circuits/bitify.circom
*/

/*
This function reads in a unsigned 8-bit integer and converts it to an array of bits.
# Inputs:
- `in`: a number
- `array[n]`: the array we want to search through
- `out`: either `0` or `1`
- `1` if `in` is found inside `array`
- `0` otherwise
# Constraints:
- `in`: must be between `0` and `2**8 - 1`
*/
template U8ToBits() {
signal input in;
signal byte[8];
Expand All @@ -20,8 +37,16 @@ template U8ToBits() {
lc1 === in;
}

// If above passes, output can be constrained to input since they're
// valid bytes.
/*
This function reads in an array of unsigned numbers that will be constrained to be valid unsigned 8-bit integers.
# Inputs:
- `n`: the length of the ASCII string (as integers) to verify
- `in[n]`: a list of numbers
# Constraints:
- `in[n]`: each element of this array must be between `0` and `2**8-1`
*/
template ASCII(n) {
signal input in[n];

Expand Down
48 changes: 38 additions & 10 deletions circuits/extract.circom
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@ pragma circom 2.1.9;

include "bytes.circom";
include "operators.circom";
include "parser.circom";

template Extract(KEY_BYTES, DATA_BYTES) {
signal input key[KEY_BYTES];
Expand All @@ -20,14 +21,41 @@ template Extract(KEY_BYTES, DATA_BYTES) {
component dataASCII = ASCII(DATA_BYTES);
dataASCII.in <== data;
//--------------------------------------------------------------------------------------------//
component Matches[DATA_BYTES];
for(var data_pointer = 0; data_pointer < DATA_BYTES - KEY_BYTES; data_pointer++) {
Matches[data_pointer] = IsEqualArray(KEY_BYTES);
for(var key_pointer_offset = 0; key_pointer_offset < KEY_BYTES; key_pointer_offset++) {
Matches[data_pointer].in[0][key_pointer_offset] <== key[key_pointer_offset];
Matches[data_pointer].in[1][key_pointer_offset] <== data[data_pointer + key_pointer_offset];
}
log("Matches[", data_pointer, "] = ", Matches[data_pointer].out);
KeyMatches[data_pointer] <== Matches[data_pointer].out;
// Initialze the parser
component State[DATA_BYTES];
State[0] = StateUpdate();
State[0].byte <== data[0];
State[0].tree_depth <== 0;
State[0].parsing_to_key <== 1; // Initialize by saying we are parsing to the first key
State[0].inside_key <== 0;
State[0].parsing_to_value <== 0;
State[0].inside_value <== 0;
State[0].escaping <== 0;
State[0].end_of_kv <== 0;

for(var data_pointer = 1; data_pointer < DATA_BYTES; data_pointer++) {
State[data_pointer] = StateUpdate();
State[data_pointer].byte <== data[data_pointer];
State[data_pointer].tree_depth <== State[data_pointer - 1].next_tree_depth;
State[data_pointer].parsing_to_key <== State[data_pointer - 1].next_parsing_to_key;
State[data_pointer].inside_key <== State[data_pointer - 1].next_inside_key;
State[data_pointer].parsing_to_value <== State[data_pointer - 1].next_parsing_to_value;
State[data_pointer].inside_value <== State[data_pointer - 1].next_inside_value;
State[data_pointer].end_of_kv <== State[data_pointer - 1].next_end_of_kv;
// TODO: For the next state, we should use `next_`, this is only to make this compile for now.
State[data_pointer].escaping <== State[data_pointer - 1].escaping;


// Debugging
log("State[", data_pointer, "].tree_depth", "= ", State[data_pointer].tree_depth);
log("State[", data_pointer, "].parsing_to_key", "= ", State[data_pointer].parsing_to_key);
log("State[", data_pointer, "].inside_key", "= ", State[data_pointer].inside_key);
log("State[", data_pointer, "].parsing_to_value", "= ", State[data_pointer].parsing_to_value);
log("State[", data_pointer, "].inside_value", "= ", State[data_pointer].inside_value);
log("State[", data_pointer, "].end_of_kv", "= ", State[data_pointer].end_of_kv);
log("---");
}
}

// Constrain to have valid JSON (TODO: more is needed)
State[DATA_BYTES - 1].next_tree_depth === 0;
}
82 changes: 80 additions & 2 deletions circuits/operators.circom
Original file line number Diff line number Diff line change
@@ -1,7 +1,25 @@
pragma circom 2.1.9;

// For both see: https://github.com/iden3/circomlib/blob/cff5ab6288b55ef23602221694a6a38a0239dcc0/circuits/comparators.circom
/*
All tests for this file are located in: `./test/operators.test.ts`
Some of the functions here were based off the circomlib:
https://github.com/iden3/circomlib/blob/cff5ab6288b55ef23602221694a6a38a0239dcc0/circuits/comparators.circom
*/


/*
This function is an indicator for zero.
# Inputs:
- `in`: some number
- `out`: either `0` or `1`
- `1` if `in` is equal to `0`
- `0` otherwise
# Constraints
- `in`: must be either `0` or `1`.
*/
template IsZero() {
signal input in;
signal output out;
Expand All @@ -14,7 +32,15 @@ template IsZero() {
in * out === 0;
}

/*
This function is an indicator for two equal inputs.
# Inputs:
- `in[2]`: two numbers
- `out`: either `0` or `1`
- `1` if `in[0]` is equal to `in[1]`
- `0` otherwise
*/
template IsEqual() {
signal input in[2];
signal output out;
Expand All @@ -26,6 +52,16 @@ template IsEqual() {
isz.out ==> out;
}

/*
This function is an indicator for two equal array inputs.
# Inputs:
- `n`: the length of arrays to compare
- `in[2][n]`: two arrays of `n` numbers
- `out`: either `0` or `1`
- `1` if `in[0]` is equal to `in[1]` as arrays (i.e., component by component)
- `0` otherwise
*/
template IsEqualArray(n) {
signal input in[2][n];
signal output out;
Expand All @@ -44,4 +80,46 @@ template IsEqualArray(n) {
totalEqual.in[0] <== n;
totalEqual.in[1] <== accum;
out <== totalEqual.out;
}
}


// TODO: There should be a way to have the below assertion come from the field itself.
/*
This function is an indicator for if an array contains an element.
# Inputs:
- `n`: the size of the array to search through
- `in`: a number
- `array[n]`: the array we want to search through
- `out`: either `0` or `1`
- `1` if `in` is found inside `array`
- `0` otherwise
*/
template Contains(n) {
assert(n > 0);
/*
If `n = p` for this large `p`, then it could be that this function
returns the wrong value if every element in `array` was equal to `in`.
This is EXTREMELY unlikely and iterating this high is impossible anyway.
But it is better to check than miss something, so we bound it by `2**254` for now.
*/
assert(n < 2**254);
signal input in;
signal input array[n];
signal output out;

var accum = 0;
component equalComponent[n];
for(var i = 0; i < n; i++) {
equalComponent[i] = IsEqual();
equalComponent[i].in[0] <== in;
equalComponent[i].in[1] <== array[i];
accum = accum + equalComponent[i].out;
}

component someEqual = IsZero();
someEqual.in <== accum;

// Apply `not` to this by 1-x
out <== 1 - someEqual.out;
}
Loading

0 comments on commit 274d15a

Please sign in to comment.