Skip to content

Commit

Permalink
refactor tests
Browse files Browse the repository at this point in the history
  • Loading branch information
lonerapier committed Aug 12, 2024
1 parent fbf32c6 commit 577152f
Show file tree
Hide file tree
Showing 6 changed files with 155 additions and 5,399 deletions.
8 changes: 5 additions & 3 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
# SPARK
> Succinct Parser Attestation for Reconciliation of Knowledge
> Succinct Parser Attestation for Reconciliation of Knowledge
## Repo Structure
The repository is currently new and being organized as follows:
Expand All @@ -26,7 +26,7 @@ npm install -g snarkjs
```

### Circomkit
You will need `yarn` on your system (brew, or apt-get or something).
You will need `yarn` on your system (brew, or apt-get or something).
Then run: `npm install` to get everything else.

#### Commands
Expand All @@ -41,7 +41,7 @@ For example, to compile the extractor, you can:
```
npx circomkit compile extract
```
Then you can do
Then you can do
```
npx circomkit witness extract witness
```
Expand Down Expand Up @@ -69,6 +69,8 @@ To run specific tests, use the `-g` flag for `mocha`, e.g., to run any proof des
npx mocha -g State
```

> [!NOTE]
> Currently [search](./circuits/search.circom) circuit isn't working with circomkit, so you might have to compile using circom: `circom circuits/main/search.circom --r1cs --wasm -l node_modules/ -o build/search/`
## (MOSTLY DEPRECATED DUE TO CIRCOMKIT) Running an example
```
Expand Down
130 changes: 83 additions & 47 deletions circuits/search.circom
Original file line number Diff line number Diff line change
Expand Up @@ -4,45 +4,67 @@ include "circomlib/circuits/mux1.circom";
include "./utils/hash.circom";
include "./utils/operators.circom";

/// @title SubstringSearch
/// @notice Calculates the index of a substring within a larger string. Uses a probabilistic algorithm to
/// find a substring that is equal to random linear combination of difference between each element of `data` and `key`.
/// `position` returned as output can be a false positive.
/// @dev Is underconstrained and not suitable for standalone usage, i.e. `position` returned can be spoofed by an adversary.
/// Must be verified with a similar template like `SubstringMatch`
/// @param dataLen The maximum length of the input string
/// @param keyLen The maximum length of the substring to be matched
/// @input data Array of ASCII characters as input string
/// @input key Array of ASCII characters as substring to be searched in `data`
/// @output position Index of `key` in `data`
/// @profile 6 * `dataLen` constraints
template SubstringSearch(dataLen, keyLen, random_num) {
/*
SubstringSearch
Calculates the index of a substring within a larger string. Uses a probabilistic algorithm to find a substring that is equal to random linear combination of difference between each element of `data` and `key`.
# NOTE
- Is underconstrained and not suitable for standalone usage, i.e. `position` returned can be spoofed by an adversary. Must be verified with a similar template like `SubstringMatch`
- `r` should be equal to Hash(key + data), otherwise this algorithm yields false positives
# Parameters
- `dataLen`: The maximum length of the input string
- `keyLen`: The maximum length of the substring to be matched
# Inputs
- `data` Array of ASCII characters as input string
- `key` Array of ASCII characters as substring to be searched in `data`
- `random_num`: randomiser used to perform random linear summation for string comparison
# Output
- `position`: index of matched `key` in `data`
*/
template SubstringSearch(dataLen, keyLen) {
signal input data[dataLen];
signal input key[keyLen];
signal input random_num;
signal output position;

assert(dataLen > 0);
assert(keyLen > 0);
assert(dataLen >= keyLen);

// position accumulator
signal pos[dataLen-keyLen+2];
pos[0] <== 0;

// total matches found so far
signal num_matches[dataLen-keyLen+2];
num_matches[0] <== 0;

// iterate through each substring of length `keyLen` in `data` and find substring that matches.
// calculate powers of r
signal r_powers[dataLen];
r_powers[0] <== random_num;
for (var i=1 ; i<dataLen ; i++) {
r_powers[i] <== r_powers[i-1] * random_num;
}

signal is_match_found[dataLen-keyLen+1];
signal is_first_match[dataLen-keyLen+1];
signal index_at_first_match_and_found[dataLen-keyLen+1];
signal found[dataLen-keyLen+1][keyLen];

// iterate through each substring of length `keyLen` in `data` and find substring that matches.
for (var i = 0; i < dataLen - keyLen + 1; i++) {
// this is the underconstrained part, any malicious prover can set found to `0` manually
var found;
for (var j=0 ; j < keyLen ; j++) {
found += (random_num**j) * (data[i+j] - key[j]);
// underconstrained part, any malicious prover can set found to `0` manually
found[i][0] <-- r_powers[0] * (data[i] - key[0]);
for (var j=1 ; j < keyLen ; j++) {
found[i][j] <-- found[i][j-1] + r_powers[j] * (data[i+j]-key[j]);
}

// is substring a match?
is_match_found[i] <== IsZero()(found);
is_match_found[i] <== IsZero()(found[i][keyLen-1]);

// update total number of matches found
num_matches[i+1] <== num_matches[i] + is_match_found[i];
Expand All @@ -60,20 +82,30 @@ template SubstringSearch(dataLen, keyLen, random_num) {
position <== pos[dataLen-keyLen+1];
}

/// @title SubstringMatchWithIndex
/// @notice RLC algorithm for matching substring at index.
/// - Creates a mask for `data` at `[start, start + keyLen]`
/// - apply mask to data
/// - multiply data with powers of `r` to create random linear combination
/// - multiply key with powers of `r`
/// - sum of both arrays should be equal
/// @notice Modified from https://github.com/zkemail/zk-email-verify/tree/main/packages/circuits
/// @param dataLen The maximum length of the input string
/// @param keyLen The maximum length of the substring to be matched
/// @input data Array of ASCII characters as input string
/// @input key Array of ASCII characters as substring to be searched in `data`
/// @input position Index of `key` in `data`
/// @profile 9 * `dataLen` constraints
/*
SubstringMatchWithIndex
RLC algorithm for matching substring at index.
- Creates a mask for `data` at `[start, start + keyLen]`
- apply mask to data
- multiply data with powers of `r` to create random linear combination
- multiply key with powers of `r`
- sum of both arrays should be equal
# Parameters
- `dataLen`: The maximum length of the input string
- `keyLen`: The maximum length of the substring to be matched
# Inputs
- `data`: Array of ASCII characters as input string
- `key`: Array of ASCII characters as substring to be searched in `data`
- `position`: Index of `key` in `data`
# Profile
9 * `dataLen` constraints
NOTE: Modified from https://github.com/zkemail/zk-email-verify/tree/main/packages/circuits
*/
template SubstringMatchWithIndex(dataLen, keyLen) {
signal input data[dataLen];
signal input key[keyLen];
Expand Down Expand Up @@ -168,16 +200,21 @@ template SubstringMatchWithIndex(dataLen, keyLen) {
hashMaskedData[dataLen - 1] === hashMaskedKey[keyLen - 1];
}

/// @title SubstringMatch
/// @notice Matches a substring with an input string and returns the position
/// @param dataLen The maximum length of the input string
/// @param keyLen The maximum length of the substring to be matched
/// @param r Random number initialised as poseidon hash of concatenation of key and data
/// @input data Array of ASCII characters as input string
/// @input key Array of ASCII characters as substring to be searched in `data`
/// @input position Index of `key` in `data`
/// @profile 9 * `dataLen` constraints
template SubstringMatch(dataLen, keyLen, r) {
/*
SubstringMatch: Matches a substring with an input string and returns the position
# Parameters
- `dataLen`: maximum length of the input string
- `keyLen`: maximum length of the substring to be matched
# Inputs
- `data`: Array of ASCII characters as input string
- `key`: Array of ASCII characters as substring to be searched in `data`
# Outputs
- `position`: Index of `key` in `data`
*/
template SubstringMatch(dataLen, keyLen) {
signal input data[dataLen];
signal input key[keyLen];
signal output position;
Expand All @@ -190,16 +227,15 @@ template SubstringMatch(dataLen, keyLen, r) {
for (var i = 0; i < dataLen; i++) {
rHasher.in[i + keyLen] <== data[i];
}
r === rHasher.out;
signal r <== rHasher.out;

// find the start position of `key` first match in `data`
// NOTE: underconstrained (should be paired with SubstringMatchWithIndex)
signal start <== SubstringSearch(dataLen, keyLen, r)(data, key);
log(start);
signal start <== SubstringSearch(dataLen, keyLen)(data, key, r);

// matches a `key` in `data` at `pos`
// NOTE: constrained verification assures correctness
SubstringMatchWithIndex(dataLen, keyLen)(data, key, rHasher.out, start);
SubstringMatchWithIndex(dataLen, keyLen)(data, key, r, start);

position <== start;
}
79 changes: 49 additions & 30 deletions circuits/test/search.test.ts
Original file line number Diff line number Diff line change
@@ -1,61 +1,77 @@
import { circomkit, WitnessTester } from "./common";

import witness from "../../inputs/search/witness2.json";
import witness from "../../inputs/search/witness.json";
import { PoseidonModular } from "./common/poseidon";

describe("search", () => {
describe("SubstringSearch", () => {
let circuit: WitnessTester<["data", "key"], ["position"]>;
let circuit: WitnessTester<["data", "key", "random_num"], ["position"]>;

it("witness: key at first position", async () => {
const key = [10, 8, 9, 4];
it("key at first position", async () => {
const data = [10, 8, 9, 4, 11, 9, 1, 2];
const key = [10, 8, 9, 4];
const concatenatedInput = key.concat(data);
const hashResult = PoseidonModular(concatenatedInput);

circuit = await circomkit.WitnessTester(`SubstringSearch`, {
file: "circuits/search",
template: "SubstringSearch",
params: [data.length, key.length, hashResult],
params: [data.length, key.length],
});

await circuit.expectPass(
{ data: data, key: key },
{ data: data, key: key, random_num: hashResult },
{ position: 0 },
);
});

it("witness: key at last position", async () => {
const key = [10, 8, 9, 4];
it("key at last position", async () => {
const data = [11, 9, 1, 2, 10, 8, 9, 4];
const key = [10, 8, 9, 4];
const concatenatedInput = key.concat(data);
const hashResult = PoseidonModular(concatenatedInput);

circuit = await circomkit.WitnessTester(`SubstringSearch`, {
file: "circuits/search",
template: "SubstringSearch",
params: [data.length, key.length, hashResult],
params: [data.length, key.length],
});

await circuit.expectPass(
{ data: data, key: key },
{ data: data, key: key, random_num: hashResult },
{ position: 4 },
);
});

it("witness: data = inputs/witness2.json:data, key = inputs2/witness.json:key, r = hash(data+key)", async () => {
it("wrong random_num input, correct key position: 2", async () => {
const data = [0, 0, 1, 0, 0];
const key = [1, 0];

circuit = await circomkit.WitnessTester(`SubstringSearch`, {
file: "circuits/search",
template: "SubstringSearch",
params: [data.length, key.length],
});

await circuit.expectPass(
{ data: data, key: key, random_num: 1 },
{ position: 1 },
);
});

it("data = inputs.json:data, key = inputs.json:key, r = hash(data+key)", async () => {
const concatenatedInput = witness["key"].concat(witness["data"]);
const hashResult = PoseidonModular(concatenatedInput);

circuit = await circomkit.WitnessTester(`SubstringSearch`, {
file: "circuits/search",
template: "SubstringSearch",
params: [787, 10, hashResult],
params: [witness["data"].length, witness["key"].length],
});
console.log("#constraints:", await circuit.getConstraintCount());

await circuit.expectPass(
{ data: witness["data"], key: witness["key"] },
{ data: witness["data"], key: witness["key"], random_num: hashResult },
{ position: 6 }
);
});
Expand All @@ -73,48 +89,51 @@ describe("search", () => {
console.log("#constraints:", await circuit.getConstraintCount());
});

it("witness: data = inputs/witness2.json:data, key = inputs2/witness.json:key, r = hash(data+key)", async () => {
it("data = inputs.json:data, key = inputs.json:key, r = hash(data+key)", async () => {
await circuit.expectPass(
{ data: witness["data"], key: witness["key"], r: PoseidonModular(witness["key"].concat(witness["data"])), start: 6 },
{
data: witness["data"],
key: witness["key"],
r: PoseidonModular(witness["key"].concat(witness["data"])),
start: 6
},
);
});

it("witness: data = inputs/witness2.json:data, key = inputs2/witness.json:key, r = hash(data+key), incorrect position", async () => {
it("data = inputs.json:data, key = inputs.json:key, r = hash(data+key), incorrect position", async () => {
await circuit.expectFail(
{ data: witness["data"], key: witness["key"], r: PoseidonModular(witness["key"].concat(witness["data"])), start: 98 },
{
data: witness["data"],
key: witness["key"],
r: PoseidonModular(witness["key"].concat(witness["data"])),
start: 98
},
);
});
});

describe("SubstringMatch", () => {
let circuit: WitnessTester<["data", "key"], ["position"]>;


it("witness: data = inputs/witness2.json:data, key = inputs2/witness.json:key, r = hash(data+key)", async () => {
const hashResult = PoseidonModular(witness["key"].concat(witness["data"]));

before(async () => {
circuit = await circomkit.WitnessTester(`SubstringSearch`, {
file: "circuits/search",
template: "SubstringMatch",
params: [787, 10, hashResult],
params: [787, 10],
});
console.log("#constraints:", await circuit.getConstraintCount());
});

it("data = inputs.json:data, key = inputs.json:key", async () => {
await circuit.expectPass(
{ data: witness["data"], key: witness["key"] },
{ position: 6 },
);
});

it("witness: data = inputs/witness2.json:data, key = inputs2/witness.json:key, r = hash(data+key), wrong hash", async () => {
circuit = await circomkit.WitnessTester(`SubstringSearch`, {
file: "circuits/search",
template: "SubstringMatch",
params: [787, 10, 10],
});

it("data = inputs.json:data, key = wrong key", async () => {
await circuit.expectFail(
{ data: witness["data"], key: witness["key"] },
{ data: witness["data"], key: witness["key"].concat(257) },
);
});
});
Expand Down
Loading

0 comments on commit 577152f

Please sign in to comment.