From 726b6f9c2cd85d8516ed62d0fa76b1626641d742 Mon Sep 17 00:00:00 2001 From: Sambhav Dusad Date: Wed, 25 Sep 2024 01:13:29 +0530 Subject: [PATCH] feat: http + json extended example (#91) * add input * fix array index codegen bug * refactor: tests utils reorganise * add spotify test * save work till now, start on integrated codegen * save work * fix: tests * refactor: codegen * add integrated codegen * test: add separate test for correctness * refactor: change `Syntax` and `StateUpdate` to prevent duplicate callable symbols in JSON parser * feat: accept inputs in bytes for `readJSONInputFile` * add extended witness gen in cli * feat: extended test works!!! * refactor: json cleanup * add tests * add test and lint CI * add http and extended codegen docs * add docs * rm old README info * Update http.rs (#92) --------- Co-authored-by: Colin Roberts --- .github/workflows/lint.yml | 40 ++ .github/workflows/test.yml | 16 +- README.md | 24 - circuits/http/extractor.circom | 8 +- circuits/http/interpreter.circom | 6 +- circuits/http/locker.circom | 20 +- circuits/http/parser/language.circom | 6 +- circuits/http/parser/machine.circom | 14 +- circuits/http/parser/parser.circom | 4 +- circuits/json/interpreter.circom | 4 +- circuits/test/common/http.ts | 88 ++++ circuits/test/common/index.ts | 55 +-- circuits/test/http/codegen.test.ts | 47 +- circuits/test/http/extractor.test.ts | 5 +- circuits/test/http/interpreter.test.ts | 3 +- circuits/test/http/locker.test.ts | 3 +- .../test/json/extractor/extractor.test.ts | 2 +- .../test/json/extractor/interpreter.test.ts | 10 +- circuits/test/spotify_top_artists.test.ts | 195 +++++++++ docs/http.md | 56 +++ docs/pabuild.md | 23 +- examples/http/lockfile/spotify.lock.json | 7 + .../http/lockfile/spotify_extended.lock.json | 19 + .../http/spotify_top_artists_request.http | 3 + .../http/spotify_top_artists_response.http | 8 + examples/json/lockfile/spotify.json | 10 + examples/json/test/spotify.json | 3 + src/circuit_config.rs | 54 ++- src/codegen/http.rs | 369 ++++++++++------ src/codegen/integrated.rs | 219 ++++++++++ src/codegen/json.rs | 413 +++++++++++------- src/codegen/mod.rs | 55 ++- src/main.rs | 40 +- src/witness.rs | 161 ++++--- 34 files changed, 1431 insertions(+), 559 deletions(-) create mode 100644 .github/workflows/lint.yml create mode 100644 circuits/test/common/http.ts create mode 100644 circuits/test/spotify_top_artists.test.ts create mode 100644 examples/http/lockfile/spotify.lock.json create mode 100644 examples/http/lockfile/spotify_extended.lock.json create mode 100644 examples/http/spotify_top_artists_request.http create mode 100644 examples/http/spotify_top_artists_response.http create mode 100644 examples/json/lockfile/spotify.json create mode 100644 examples/json/test/spotify.json create mode 100644 src/codegen/integrated.rs diff --git a/.github/workflows/lint.yml b/.github/workflows/lint.yml new file mode 100644 index 0000000..c910c29 --- /dev/null +++ b/.github/workflows/lint.yml @@ -0,0 +1,40 @@ +name: lint + +on: + pull_request: + types: [opened, synchronize, reopened] + push: + branches: [main] + +jobs: + fmt: + name: fmt + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly-2024-06-10 + components: rustfmt + + - name: cargo fmt + run: cargo fmt --all -- --check + + clippy: + name: clippy + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly-2024-06-10 + components: clippy + + - name: cargo clippy + run: cargo clippy --all \ No newline at end of file diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 41e2171..15497f9 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -7,7 +7,7 @@ on: branches: [ main ] jobs: - test: + circom: runs-on: ubuntu-latest steps: @@ -33,3 +33,17 @@ jobs: - name: Run tests run: npm run test + rust: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v4 + + - name: Install Rust + uses: dtolnay/rust-toolchain@master + with: + toolchain: nightly-2024-06-10 + + - name: Run tests + run: cargo test --all diff --git a/README.md b/README.md index 3314937..a872eb4 100644 --- a/README.md +++ b/README.md @@ -129,30 +129,6 @@ This is our local Rust command line application. Please see the [documentation](docs/pabuild.md) for how to use this alongside the other tools. -### Rust Example Witness JSON Creation -To generate example input JSON files for the Circom circuits, run: - -```bash -cargo install --path . -``` - -to install the `witness` binary. - -To get the basic idea, run `witness --help`. It can process and generate JSON files to be used for the circuits. -For example, if we have a given JSON file we want to parse such as `examples/json/test/example.json` for the `extract` circuit (see `circuits.json`), then we can: - -```bash -witness json --input-file examples/json/test/example.json --output-dir inputs/extract --output-filename input.json -``` - -For an HTTP request/response, you can generate a JSON input via: -```bash -witness http --input-file examples/http/get_request.http --output-dir inputs/get_request --output-filename input.json -``` - -Afterwards, you can run `circomkit compile get_request` then `circomkit witness get_request input`. - - ## License Licensed under the Apache License, Version 2.0 ([LICENSE-APACHE](LICENSE-APACHE) or http://www.apache.org/licenses/LICENSE-2.0) diff --git a/circuits/http/extractor.circom b/circuits/http/extractor.circom index c2b0925..160e8ee 100644 --- a/circuits/http/extractor.circom +++ b/circuits/http/extractor.circom @@ -23,7 +23,7 @@ template ExtractResponse(DATA_BYTES, maxContentLength) { // Initialze the parser component State[DATA_BYTES]; - State[0] = StateUpdate(); + State[0] = HttpStateUpdate(); State[0].byte <== data[0]; State[0].parsing_start <== 1; State[0].parsing_header <== 0; @@ -36,7 +36,7 @@ template ExtractResponse(DATA_BYTES, maxContentLength) { dataMask[0] <== 0; for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) { - State[data_idx] = StateUpdate(); + State[data_idx] = HttpStateUpdate(); State[data_idx].byte <== data[data_idx]; State[data_idx].parsing_start <== State[data_idx - 1].next_parsing_start; State[data_idx].parsing_header <== State[data_idx - 1].next_parsing_header; @@ -96,7 +96,7 @@ template ExtractHeaderValue(DATA_BYTES, headerNameLength, maxValueLength) { // Initialze the parser component State[DATA_BYTES]; - State[0] = StateUpdate(); + State[0] = HttpStateUpdate(); State[0].byte <== data[0]; State[0].parsing_start <== 1; State[0].parsing_header <== 0; @@ -115,7 +115,7 @@ template ExtractHeaderValue(DATA_BYTES, headerNameLength, maxValueLength) { valueMask[0] <== 0; for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) { - State[data_idx] = StateUpdate(); + State[data_idx] = HttpStateUpdate(); State[data_idx].byte <== data[data_idx]; State[data_idx].parsing_start <== State[data_idx - 1].next_parsing_start; State[data_idx].parsing_header <== State[data_idx - 1].next_parsing_header; diff --git a/circuits/http/interpreter.circom b/circuits/http/interpreter.circom index 1fce278..0cd6a2e 100644 --- a/circuits/http/interpreter.circom +++ b/circuits/http/interpreter.circom @@ -18,7 +18,7 @@ template inStartLine() { template inStartMiddle() { signal input parsing_start; signal output out; - + out <== IsEqual()([parsing_start, 2]); } @@ -49,7 +49,7 @@ template HeaderFieldNameValueMatch(dataLen, nameLen, valueLen) { signal input r; signal input index; - component syntax = Syntax(); + component syntax = HttpSyntax(); // signal output value[valueLen]; @@ -76,7 +76,7 @@ template HeaderFieldNameMatch(dataLen, nameLen) { signal input r; signal input index; - component syntax = Syntax(); + component syntax = HttpSyntax(); // is name matches signal headerNameMatch <== SubstringMatchWithIndex(dataLen, nameLen)(data, headerName, r, index); diff --git a/circuits/http/locker.circom b/circuits/http/locker.circom index 8680759..a2bde99 100644 --- a/circuits/http/locker.circom +++ b/circuits/http/locker.circom @@ -22,7 +22,7 @@ template LockStartLine(DATA_BYTES, beginningLen, middleLen, finalLen) { // Initialze the parser component State[DATA_BYTES]; - State[0] = StateUpdate(); + State[0] = HttpStateUpdate(); State[0].byte <== data[0]; State[0].parsing_start <== 1; State[0].parsing_header <== 0; @@ -31,7 +31,7 @@ template LockStartLine(DATA_BYTES, beginningLen, middleLen, finalLen) { State[0].parsing_body <== 0; State[0].line_status <== 0; - /* + /* Note, because we know a beginning is the very first thing in a request we can make this more efficient by just comparing the first `beginningLen` bytes of the data ASCII against the beginning ASCII itself. @@ -50,7 +50,7 @@ template LockStartLine(DATA_BYTES, beginningLen, middleLen, finalLen) { var middle_end_counter = 1; var final_end_counter = 1; for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) { - State[data_idx] = StateUpdate(); + State[data_idx] = HttpStateUpdate(); State[data_idx].byte <== data[data_idx]; State[data_idx].parsing_start <== State[data_idx - 1].next_parsing_start; State[data_idx].parsing_header <== State[data_idx - 1].next_parsing_header; @@ -58,7 +58,7 @@ template LockStartLine(DATA_BYTES, beginningLen, middleLen, finalLen) { State[data_idx].parsing_field_value <== State[data_idx-1].next_parsing_field_value; State[data_idx].parsing_body <== State[data_idx - 1].next_parsing_body; State[data_idx].line_status <== State[data_idx - 1].next_line_status; - + // Check remaining beginning bytes if(data_idx < beginningLen) { beginningIsEqual[data_idx] <== IsEqual()([data[data_idx], beginning[data_idx]]); @@ -70,7 +70,7 @@ template LockStartLine(DATA_BYTES, beginningLen, middleLen, finalLen) { middleMask[data_idx] <== inStartMiddle()(State[data_idx].parsing_start); finalMask[data_idx] <== inStartEnd()(State[data_idx].parsing_start); middle_start_counter += startLineMask[data_idx] - middleMask[data_idx] - finalMask[data_idx]; - // The end of middle is the start of the final + // The end of middle is the start of the final middle_end_counter += startLineMask[data_idx] - finalMask[data_idx]; final_end_counter += startLineMask[data_idx]; @@ -86,7 +86,7 @@ template LockStartLine(DATA_BYTES, beginningLen, middleLen, finalLen) { log("middle_end_counter = ", middle_end_counter); log("final_end_counter = ", final_end_counter); log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx"); - } + } // Debugging log("State[", DATA_BYTES, "].parsing_start ", "= ", State[DATA_BYTES-1].next_parsing_start); @@ -105,7 +105,7 @@ template LockStartLine(DATA_BYTES, beginningLen, middleLen, finalLen) { signal middleMatch <== SubstringMatchWithIndex(DATA_BYTES, middleLen)(data, middle, 100, middle_start_counter); middleMatch === 1; middleLen === middle_end_counter - middle_start_counter - 1; - + // Check final is correct by substring match and length check // TODO: change r signal finalMatch <== SubstringMatchWithIndex(DATA_BYTES, finalLen)(data, final, 100, middle_end_counter); @@ -128,7 +128,7 @@ template LockHeader(DATA_BYTES, headerNameLen, headerValueLen) { // Initialze the parser component State[DATA_BYTES]; - State[0] = StateUpdate(); + State[0] = HttpStateUpdate(); State[0].byte <== data[0]; State[0].parsing_start <== 1; State[0].parsing_header <== 0; @@ -144,7 +144,7 @@ template LockHeader(DATA_BYTES, headerNameLen, headerValueLen) { var hasMatched = 0; for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) { - State[data_idx] = StateUpdate(); + State[data_idx] = HttpStateUpdate(); State[data_idx].byte <== data[data_idx]; State[data_idx].parsing_start <== State[data_idx - 1].next_parsing_start; State[data_idx].parsing_header <== State[data_idx - 1].next_parsing_header; @@ -158,7 +158,7 @@ template LockHeader(DATA_BYTES, headerNameLen, headerValueLen) { headerFieldNameValueMatch[data_idx].data <== data; headerFieldNameValueMatch[data_idx].headerName <== header; headerFieldNameValueMatch[data_idx].headerValue <== value; - headerFieldNameValueMatch[data_idx].r <== 100; + headerFieldNameValueMatch[data_idx].r <== 100; headerFieldNameValueMatch[data_idx].index <== data_idx; isHeaderFieldNameValueMatch[data_idx] <== isHeaderFieldNameValueMatch[data_idx-1] + headerFieldNameValueMatch[data_idx].out; diff --git a/circuits/http/parser/language.circom b/circuits/http/parser/language.circom index ffb48bb..742cf90 100644 --- a/circuits/http/parser/language.circom +++ b/circuits/http/parser/language.circom @@ -2,7 +2,7 @@ pragma circom 2.1.9; // All the possible request methods: https://developer.mozilla.org/en-US/docs/Web/HTTP/Methods -template Syntax() { +template HttpSyntax() { //-Delimeters---------------------------------------------------------------------------------// // - ASCII char `:` signal output COLON <== 58; @@ -16,9 +16,9 @@ template Syntax() { // https://www.rfc-editor.org/rfc/rfc2616#section-2.2 // https://www.rfc-editor.org/rfc/rfc7230#section-3.5 // - ASCII char `\r` (carriage return) - signal output CR <== 13; + signal output CR <== 13; // - ASCII char `\n` (line feed) - signal output LF <== 10; + signal output LF <== 10; // - ASCII char: ` ` signal output SPACE <== 32; //-Escape-------------------------------------------------------------------------------------// diff --git a/circuits/http/parser/machine.circom b/circuits/http/parser/machine.circom index a7a5deb..04a4bee 100644 --- a/circuits/http/parser/machine.circom +++ b/circuits/http/parser/machine.circom @@ -3,13 +3,13 @@ pragma circom 2.1.9; include "language.circom"; include "../../utils/array.circom"; -template StateUpdate() { +template HttpStateUpdate() { signal input parsing_start; // flag that counts up to 3 for each value in the start line signal input parsing_header; // Flag + Counter for what header line we are in signal input parsing_field_name; // flag that tells if parsing header field name signal input parsing_field_value; // flag that tells if parsing header field value signal input parsing_body; // Flag when we are inside body - signal input line_status; // Flag that counts up to 4 to read a double CLRF + signal input line_status; // Flag that counts up to 4 to read a double CRLF signal input byte; signal output next_parsing_start; @@ -19,20 +19,20 @@ template StateUpdate() { signal output next_parsing_body; signal output next_line_status; - component Syntax = Syntax(); + component HttpSyntax = HttpSyntax(); //---------------------------------------------------------------------------------// // check if we read space or colon component readSP = IsEqual(); - readSP.in <== [byte, Syntax.SPACE]; + readSP.in <== [byte, HttpSyntax.SPACE]; component readColon = IsEqual(); - readColon.in <== [byte, Syntax.COLON]; + readColon.in <== [byte, HttpSyntax.COLON]; // Check if what we just read is a CR / LF component readCR = IsEqual(); - readCR.in <== [byte, Syntax.CR]; + readCR.in <== [byte, HttpSyntax.CR]; component readLF = IsEqual(); - readLF.in <== [byte, Syntax.LF]; + readLF.in <== [byte, HttpSyntax.LF]; signal notCRAndLF <== (1 - readCR.out) * (1 - readLF.out); //---------------------------------------------------------------------------------// diff --git a/circuits/http/parser/parser.circom b/circuits/http/parser/parser.circom index 487fb63..dc00454 100644 --- a/circuits/http/parser/parser.circom +++ b/circuits/http/parser/parser.circom @@ -18,7 +18,7 @@ template Parser(DATA_BYTES) { // Initialze the parser component State[DATA_BYTES]; - State[0] = StateUpdate(); + State[0] = HttpStateUpdate(); State[0].byte <== data[0]; State[0].parsing_start <== 1; State[0].parsing_header <== 0; @@ -26,7 +26,7 @@ template Parser(DATA_BYTES) { State[0].line_status <== 0; for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) { - State[data_idx] = StateUpdate(); + State[data_idx] = HttpStateUpdate(); State[data_idx].byte <== data[data_idx]; State[data_idx].parsing_start <== State[data_idx - 1].next_parsing_start; State[data_idx].parsing_header <== State[data_idx - 1].next_parsing_header; diff --git a/circuits/json/interpreter.circom b/circuits/json/interpreter.circom index 501877a..762a062 100644 --- a/circuits/json/interpreter.circom +++ b/circuits/json/interpreter.circom @@ -206,7 +206,7 @@ template NextKVPairAtDepth(n, depth) { signal input currByte; signal output out; - var logMaxDepth = log2Ceil(n); + var logMaxDepth = log2Ceil(n+1); component topOfStack = GetTopOfStack(n); topOfStack.stack <== stack; @@ -219,7 +219,7 @@ template NextKVPairAtDepth(n, depth) { component syntax = Syntax(); signal isComma <== IsEqual()([currByte, syntax.COMMA]); // pointer <= depth - signal atLessDepth <== LessEqThan(logMaxDepth)([pointer, depth]); + signal atLessDepth <== LessEqThan(logMaxDepth)([pointer-1, depth]); // current depth is less than key depth signal isCommaAtDepthLessThanCurrent <== isComma * atLessDepth; diff --git a/circuits/test/common/http.ts b/circuits/test/common/http.ts new file mode 100644 index 0000000..bf5adb7 --- /dev/null +++ b/circuits/test/common/http.ts @@ -0,0 +1,88 @@ +import { toByte } from "."; +import { join } from "path"; +import { readFileSync } from "fs"; + +export function readLockFile(filename: string): T { + const filePath = join(__dirname, "..", "..", "..", "examples", "http", "lockfile", filename); + const jsonString = readFileSync(filePath, 'utf-8'); + const jsonData = JSON.parse(jsonString); + return jsonData; +} + +export function getHeaders(data: Request | Response): [string, string][] { + const headers: [string, string][] = []; + let i = 1; + while (true) { + const nameKey = `headerName${i}`; + const valueKey = `headerValue${i}`; + if (nameKey in data && valueKey in data) { + headers.push([data[nameKey], data[valueKey]]); + i++; + } else { + break; + } + } + return headers; +} + +export interface Request { + method: string, + target: string, + version: string, + [key: string]: string, +} + +export interface Response { + version: string, + status: string, + message: string, + [key: string]: string, +} + +export function readHTTPInputFile(filename: string) { + const filePath = join(__dirname, "..", "..", "..", "examples", "http", filename); + let data = readFileSync(filePath, 'utf-8'); + + let input = toByte(data); + + // Split headers and body, accounting for possible lack of body + const parts = data.split('\r\n\r\n'); + const headerSection = parts[0]; + const bodySection = parts.length > 1 ? parts[1] : ''; + + // Function to parse headers into a dictionary + function parseHeaders(headerLines: string[]) { + const headers: { [id: string]: string } = {}; + + headerLines.forEach(line => { + const [key, value] = line.split(/:\s(.+)/); + if (key) headers[key.toLowerCase()] = value ? value : ''; + }); + + return headers; + } + + // Parse the headers + const headerLines = headerSection.split('\r\n'); + const initialLine = headerLines[0].split(' '); + const headers = parseHeaders(headerLines.slice(1)); + + // Parse the body, if JSON response + let responseBody = {}; + if (headers["content-type"] && headers["content-type"].startsWith("application/json") && bodySection) { + try { + responseBody = JSON.parse(bodySection); + } catch (e) { + console.error("Failed to parse JSON body:", e); + } + } + + // Combine headers and body into an object + return { + input: input, + initialLine: initialLine, + headers: headers, + body: responseBody, + bodyBytes: toByte(bodySection || ''), + }; +} \ No newline at end of file diff --git a/circuits/test/common/index.ts b/circuits/test/common/index.ts index 55aa960..eda2717 100644 --- a/circuits/test/common/index.ts +++ b/circuits/test/common/index.ts @@ -1,5 +1,5 @@ import 'mocha'; -import { readFileSync } from "fs"; +import { readFileSync, existsSync } from "fs"; import { join } from "path"; import { Circomkit, WitnessTester } from "circomkit"; @@ -31,7 +31,10 @@ export function readJSONInputFile(filename: string, key: any[]): [number[], numb let input: number[] = []; let output: number[] = []; - let data = readFileSync(valueStringPath, 'utf-8'); + let data = filename; + if (existsSync(valueStringPath)) { + data = readFileSync(valueStringPath, 'utf-8'); + } let keyUnicode: number[][] = []; for (let i = 0; i < key.length; i++) { @@ -63,52 +66,4 @@ export function toByte(data: string): number[] { byteArray.push(data.charCodeAt(i)); } return byteArray -} - -export function readHTTPInputFile(filename: string) { - const filePath = join(__dirname, "..", "..", "..", "examples", "http", filename); - let data = readFileSync(filePath, 'utf-8'); - - let input = toByte(data); - - // Split headers and body, accounting for possible lack of body - const parts = data.split('\r\n\r\n'); - const headerSection = parts[0]; - const bodySection = parts.length > 1 ? parts[1] : ''; - - // Function to parse headers into a dictionary - function parseHeaders(headerLines: string[]) { - const headers: { [id: string]: string } = {}; - - headerLines.forEach(line => { - const [key, value] = line.split(/:\s(.+)/); - if (key) headers[key] = value ? value : ''; - }); - - return headers; - } - - // Parse the headers - const headerLines = headerSection.split('\r\n'); - const initialLine = headerLines[0].split(' '); - const headers = parseHeaders(headerLines.slice(1)); - - // Parse the body, if JSON response - let responseBody = {}; - if (headers["Content-Type"] == "application/json" && bodySection) { - try { - responseBody = JSON.parse(bodySection); - } catch (e) { - console.error("Failed to parse JSON body:", e); - } - } - - // Combine headers and body into an object - return { - input: input, - initialLine: initialLine, - headers: headers, - body: responseBody, - bodyBytes: toByte(bodySection || ''), - }; } \ No newline at end of file diff --git a/circuits/test/http/codegen.test.ts b/circuits/test/http/codegen.test.ts index aa209c6..2db0a23 100644 --- a/circuits/test/http/codegen.test.ts +++ b/circuits/test/http/codegen.test.ts @@ -1,47 +1,10 @@ -import { circomkit, WitnessTester, readHTTPInputFile, toByte } from "../common"; +import { circomkit, WitnessTester, toByte } from "../common"; +import { readHTTPInputFile, readLockFile, getHeaders, Request, Response } from "../common/http"; import { join } from "path"; import { spawn } from "child_process"; -import { readFileSync } from "fs"; - -function readLockFile(filename: string): T { - const filePath = join(__dirname, "..", "..", "..", "examples", "http", "lockfile", filename); - const jsonString = readFileSync(filePath, 'utf-8'); - const jsonData = JSON.parse(jsonString); - return jsonData; -} - -function getHeaders(data: Request | Response): [string, string][] { - const headers: [string, string][] = []; - let i = 1; - while (true) { - const nameKey = `headerName${i}`; - const valueKey = `headerValue${i}`; - if (nameKey in data && valueKey in data) { - headers.push([data[nameKey], data[valueKey]]); - i++; - } else { - break; - } - } - return headers; -} - -interface Request { - method: string, - target: string, - version: string, - [key: string]: string, -} - -interface Response { - version: string, - status: string, - message: string, - [key: string]: string, -} -function executeCodegen(circuitName: string, inputFileName: string, lockfileName: string) { +export function executeCodegen(circuitName: string, inputFileName: string, lockfileName: string) { return new Promise((resolve, reject) => { const inputFilePath = join(__dirname, "..", "..", "..", "examples", "http", inputFileName); const lockfilePath = join(__dirname, "..", "..", "..", "examples", "http", "lockfile", lockfileName); @@ -175,7 +138,7 @@ describe("HTTP :: Codegen :: Response", async () => { const headers = getHeaders(lockData); - const params = [input.length, parseInt(http.headers["Content-Length"]), lockData.version.length, lockData.status.length, lockData.message.length]; + const params = [input.length, parseInt(http.headers["Content-Length".toLowerCase()]), lockData.version.length, lockData.status.length, lockData.message.length]; headers.forEach(header => { params.push(header[0].length); params.push(header[1].length); @@ -221,7 +184,7 @@ describe("HTTP :: Codegen :: Response", async () => { const headers = getHeaders(lockData); - const params = [input.length, parseInt(http.headers["Content-Length"]), lockData.version.length, lockData.status.length, lockData.message.length]; + const params = [input.length, parseInt(http.headers["Content-Length".toLowerCase()]), lockData.version.length, lockData.status.length, lockData.message.length]; headers.forEach(header => { params.push(header[0].length); params.push(header[1].length); diff --git a/circuits/test/http/extractor.test.ts b/circuits/test/http/extractor.test.ts index 6dbc0a4..20750f1 100644 --- a/circuits/test/http/extractor.test.ts +++ b/circuits/test/http/extractor.test.ts @@ -1,4 +1,5 @@ -import { circomkit, WitnessTester, generateDescription, readHTTPInputFile, toByte } from "../common"; +import { circomkit, WitnessTester, generateDescription, toByte } from "../common"; +import { readHTTPInputFile } from "../common/http"; describe("HTTP :: body Extractor", async () => { let circuit: WitnessTester<["data"], ["response"]>; @@ -74,7 +75,7 @@ describe("HTTP :: header Extractor", async () => { let parsedHttp = readHTTPInputFile("get_response.http"); - generatePassCase(parsedHttp.input, toByte("Content-Length"), toByte(parsedHttp.headers["Content-Length"]), ""); + generatePassCase(parsedHttp.input, toByte("Content-Length"), toByte(parsedHttp.headers["content-length"]), ""); }); }); diff --git a/circuits/test/http/interpreter.test.ts b/circuits/test/http/interpreter.test.ts index ac1a330..5986eac 100644 --- a/circuits/test/http/interpreter.test.ts +++ b/circuits/test/http/interpreter.test.ts @@ -1,4 +1,5 @@ -import { circomkit, WitnessTester, generateDescription, toByte, readHTTPInputFile } from "../common"; +import { circomkit, WitnessTester, generateDescription, toByte } from "../common"; +import { readHTTPInputFile } from "../common/http"; describe("HTTP :: Interpreter", async () => { describe("MethodMatch", async () => { diff --git a/circuits/test/http/locker.test.ts b/circuits/test/http/locker.test.ts index 4969b39..34f32be 100644 --- a/circuits/test/http/locker.test.ts +++ b/circuits/test/http/locker.test.ts @@ -1,4 +1,5 @@ -import { circomkit, WitnessTester, generateDescription, toByte, readHTTPInputFile } from "../common"; +import { circomkit, WitnessTester, generateDescription, toByte } from "../common"; +import { readHTTPInputFile } from "../common/http"; describe("HTTP :: Locker :: Request Line", async () => { let circuit: WitnessTester<["data", "beginning", "middle", "final"], []>; diff --git a/circuits/test/json/extractor/extractor.test.ts b/circuits/test/json/extractor/extractor.test.ts index c67a70b..ec9860c 100644 --- a/circuits/test/json/extractor/extractor.test.ts +++ b/circuits/test/json/extractor/extractor.test.ts @@ -3,7 +3,7 @@ import { join } from "path"; import { spawn } from "child_process"; -function executeCodegen(circuitName: string, inputFileName: string, lockfileName: string) { +export function executeCodegen(circuitName: string, inputFileName: string, lockfileName: string) { return new Promise((resolve, reject) => { const inputFilePath = join(__dirname, "..", "..", "..", "..", "examples", "json", "test", inputFileName); const lockfilePath = join(__dirname, "..", "..", "..", "..", "examples", "json", "lockfile", lockfileName); diff --git a/circuits/test/json/extractor/interpreter.test.ts b/circuits/test/json/extractor/interpreter.test.ts index 26643b1..a4e1b1f 100644 --- a/circuits/test/json/extractor/interpreter.test.ts +++ b/circuits/test/json/extractor/interpreter.test.ts @@ -250,19 +250,19 @@ describe("Interpreter", async () => { } let input1 = { stack: [[1, 0], [2, 0], [3, 1], [1, 0]], currByte: 44 }; - // output = 0 represents correct execution - let output = { out: 0 }; + // output = 1 represents correct execution + let output = { out: 1 }; generatePassCase(input1, output, 3, ""); // key depth is 2, and even if new-kv pair starts at depth greater than 2, it returns 0. let input2 = { stack: [[1, 0], [2, 0], [1, 1], [1, 0]], currByte: 44 }; - generatePassCase(input2, output, 2, ""); + generatePassCase(input2, { out: 0 }, 2, ""); let input3 = { stack: [[1, 0], [1, 0], [0, 0], [0, 0]], currByte: 44 }; - generatePassCase(input3, { out: 1 }, 3, "stack height less than specified"); + generatePassCase(input3, output, 3, "stack height less than specified"); let input4 = { stack: [[1, 0], [2, 0], [1, 0], [0, 0]], currByte: 34 }; - generatePassCase(input4, output, 2, "incorrect currByte"); + generatePassCase(input4, { out: 0 }, 2, "incorrect currByte"); }); describe("KeyMatch", async () => { diff --git a/circuits/test/spotify_top_artists.test.ts b/circuits/test/spotify_top_artists.test.ts new file mode 100644 index 0000000..208cfd4 --- /dev/null +++ b/circuits/test/spotify_top_artists.test.ts @@ -0,0 +1,195 @@ +import { circomkit, WitnessTester, toByte, readJSONInputFile } from "./common"; +import { readLockFile, readHTTPInputFile, getHeaders as getHttpHeaders, Response, Request } from "./common/http"; +import { executeCodegen as httpLockfileCodegen } from "./http/codegen.test"; +import { executeCodegen as jsonLockfileCodegen } from "./json/extractor/extractor.test"; +import { join } from "path"; +import { spawn } from "child_process"; +import { readFileSync } from "fs"; +import { version } from "os"; + +async function extendedLockfileCodegen(circuitName: string, inputFileName: string, lockfileName: string) { + return new Promise((resolve, reject) => { + const inputFilePath = join(__dirname, "..", "..", "examples", "http", inputFileName); + const lockfilePath = join(__dirname, "..", "..", "examples", "http", "lockfile", lockfileName); + + const codegen = spawn("cargo", ["run", "codegen", "extended", "--circuit-name", circuitName, "--input-file", inputFilePath, "--lockfile", lockfilePath]); + + codegen.stdout.on('data', (data) => { + console.log(`stdout: ${data}`); + }); + + codegen.stderr.on('data', (data) => { + console.error(`stderr: ${data}`); + }); + + codegen.on('close', (code) => { + if (code === 0) { + resolve(`child process exited with code ${code}`); // Resolve the promise if the process exits successfully + } else { + reject(new Error(`Process exited with code ${code}`)); // Reject if there's an error + } + }); + }) +} + +// describe("spotify top artists separate", async () => { +// let http_circuit: WitnessTester<["data", "version", "status", "message", "header1", "value1"], ["body"]>; +// let json_circuit: WitnessTester<["data", "key1", "key2", "key4", "key5"], ["value"]>; + +// it("POST response body extraction", async () => { +// let httpLockfile = "spotify.lock" +// let httpInputFile = "spotify_top_artists_response.http"; +// let httpCircuitName = "spotify_top_artists"; + +// await httpLockfileCodegen(httpCircuitName, httpInputFile, `${httpLockfile}.json`); + +// let jsonFilename = "spotify"; + +// await jsonLockfileCodegen(`${jsonFilename}_test`, `${jsonFilename}.json`, `${jsonFilename}.json`); + +// const lockData = readLockFile(`${httpLockfile}.json`); + +// const http = readHTTPInputFile(`${httpInputFile}`); +// const inputHttp = http.input; + +// const headers = getHttpHeaders(lockData); + +// const params = [inputHttp.length, http.bodyBytes.length, lockData.version.length, lockData.status.length, lockData.message.length]; +// headers.forEach(header => { +// params.push(header[0].length); +// params.push(header[1].length); +// }); + +// http_circuit = await circomkit.WitnessTester(`Extract`, { +// file: `main/http_${httpCircuitName}`, +// template: "LockHTTPResponse", +// params: params, +// }); +// console.log("#constraints:", await http_circuit.getConstraintCount()); + +// // match circuit output to original JSON value +// const circuitInput: any = { +// data: inputHttp, +// version: toByte(lockData.version), +// status: toByte(lockData.status), +// message: toByte(lockData.message), +// }; + +// headers.forEach((header, index) => { +// circuitInput[`header${index + 1}`] = toByte(header[0]); +// circuitInput[`value${index + 1}`] = toByte(header[1]); +// }); + +// await http_circuit.expectPass(circuitInput, { body: http.bodyBytes }); + +// let index_0 = 0; + +// let [inputJson, key, output] = readJSONInputFile( +// `${jsonFilename}.json`, +// [ +// "data", +// "items", +// index_0, +// "profile", +// "name" +// ] +// ); + +// json_circuit = await circomkit.WitnessTester(`Extract`, { +// file: `main/json_${jsonFilename}_test`, +// template: "ExtractStringValue", +// params: [inputJson.length, 5, 4, 0, 5, 1, index_0, 2, 7, 3, 4, 4, 12], +// }); +// console.log("#constraints:", await json_circuit.getConstraintCount()); + +// await json_circuit.expectPass({ data: inputJson, key1: key[0], key2: key[1], key4: key[3], key5: key[4] }, { value: output }); +// }); +// }); + +interface JsonLockfile { + keys: any[], + valueType: string, +} + +interface HttpJsonLockdata { + http: Response, + json: JsonLockfile, +} + +describe("spotify top artists", async () => { + let circuit: WitnessTester<["data", "version", "status", "message", "header1", "value1", "key1", "key2", "key4", "key5"], ["value"]>; + + it("extraction", async () => { + let lockfile = "spotify_extended.lock.json" + let inputFile = "spotify_top_artists_response.http"; + let circuitName = "spotify_top_artists"; + + await extendedLockfileCodegen(circuitName, inputFile, lockfile); + + const lockFilePath = join(__dirname, "..", "..", "examples", "http", "lockfile", lockfile); + const fileString = readFileSync(lockFilePath, 'utf-8'); + const lockData: HttpJsonLockdata = JSON.parse(fileString); + + const http = readHTTPInputFile(`${inputFile}`); + const inputHttp = http.input; + let [inputJson, key, finalOutput] = readJSONInputFile(JSON.stringify(http.body), lockData.json.keys); + + const headers = getHttpHeaders(lockData.http); + + const params = [inputHttp.length, http.bodyBytes.length, lockData.http.version.length, lockData.http.status.length, lockData.http.message.length]; + headers.forEach(header => { + params.push(header[0].length); + params.push(header[1].length); + }); + + // JSON extractor params + + // MAX_STACK_HEIGHT + params.push(5); + + // keys + for (var i = 0; i < lockData.json.keys.length; i++) { + let key = lockData.json.keys[i]; + if (typeof (key) == "string") { + params.push(String(key).length); + } else if (typeof (key) == "number") { + params.push(key); + } + params.push(i); + } + + // maxValueLen + params.push(finalOutput.length); + + circuit = await circomkit.WitnessTester(`spotify_top_artists_test`, { + file: `main/extended_${circuitName}`, + template: "HttpJson", + params: params, + }); + console.log("#constraints:", await circuit.getConstraintCount()); + + // circuit input for http + json + + // add http start line + headers + const circuitInput: any = { + data: inputHttp, + version: toByte(lockData.http.version), + status: toByte(lockData.http.status), + message: toByte(lockData.http.message), + }; + headers.forEach((header, index) => { + circuitInput[`header${index + 1}`] = toByte(header[0]); + circuitInput[`value${index + 1}`] = toByte(header[1]); + }); + + // add json key inputs + circuitInput["key1"] = key[0]; + circuitInput["key2"] = key[1]; + circuitInput["key4"] = key[3]; + circuitInput["key5"] = key[4]; + + await circuit.expectPass(circuitInput); + // TODO: currently this fails due to sym file being too large + // await circuit.expectPass(circuitInput, { value: finalOutput }); + }); +}); \ No newline at end of file diff --git a/docs/http.md b/docs/http.md index e69de29..c0a6e14 100644 --- a/docs/http.md +++ b/docs/http.md @@ -0,0 +1,56 @@ +# HTTP Extractor + +HTTP is a more strict and well-defined specification that JSON, and thus, it's parser is a lot easier than JSON. + +Proof generation for HTTP extractor is broken into: +- [Parser](../circuits/http/parser/machine.circom): state parser based on a stack machine +- [Interpreter](../circuits/http/interpreter.circom): interpretation of stack machine to represent different HTTP states. +- [Locker](../circuits/http/locker.circom): locks start line, headers in a HTTP file +- [codegen](../src/codegen/http.rs): generates locker circuit that locks start line, headers and extract response + +## Parser + +We follow [RFC 9112](https://httpwg.org/specs/rfc9112.html) to represent and understand HTTP state in the parser. + +Parser is divided into two files: +- [Language](../circuits/json/parser/language.circom): HTTP language syntax +- [Machine](../circuits/json/parser/machine.circom): stack machine responsible for updating state + +HTTP parser state consists of: +- `parsing_start`: flag that counts up to 3 for each value in the start line. Request has `[method, target, version]` and Response has `[version, status, message]`. +- `parsing_header`: flag + counter for each new header +- `parsing_field_name`: flag tracking if inside a field name +- `parsing_field_value`: flag tracking whether inside field value +- `parsing_body`: flag tracking if inside body +- `line_status`: flag counting double CRLF + +We advise to go through detailed [tests](../circuits/test/http/locker.test.ts) to understand HTTP state parsing. + +## Interpreter +Interpreter builds following high-level circuit to understand parser state: +- `inStartLine`: whether parser is inside start line +- `inStartMiddle`: whether parser is inside second value of start line +- `inStartEnd`: whether parser is inside last value of start line +- `MethodMatch`: matches a method at specified index +- `HeaderFieldNameValueMatch`: match a header field name and value +- `HeaderFieldNameMatch`: match a header field name + +## Codegen +[Lockfile](../examples/http/lockfile/) needs to be supplied while generating the code through `pabuild` cli and should follow certain rules. + +```json +{ + "version": "HTTP/1.1", + "status": "200", + "message": "OK", + "headerName1": "Content-Type", + "headerValue1": "application/json" +} +``` + +It should mention start line values depending on Request or Response file, and header field names and values to be matched. + +Codegen generates a circom template to match lockfile values and extracts response body, if the lockfile is for response data. + +## Extractor +Extracting response body is done by checking whether parser state is inside body and creating a mask to determine starting bytes. Shifting the body by starting byte index gives the response body. \ No newline at end of file diff --git a/docs/pabuild.md b/docs/pabuild.md index 6fbe722..0782a68 100644 --- a/docs/pabuild.md +++ b/docs/pabuild.md @@ -67,7 +67,7 @@ to get options: Usage: pabuild codegen [OPTIONS] --circuit-name --input-file --lockfile Arguments: - [possible values: json, http] + [possible values: json, http, extended] Options: --circuit-name Name of the circuit (to be used in circomkit config) @@ -179,3 +179,24 @@ To test an end-to-end HTTP response extraction proof: # OR snarkjs groth16 verify build/get-response/groth16_vkey.json inputs/get-response/inputs.json build/get-response/groth16_proof.json ``` + +### Extended HTTP + JSON extraction + +`pabuild` allows to create a proof of arbitrary HTTP response. +- Locks start line, and headers for HTTP as specified in [lockfile](../examples/http/lockfile/spotify_extended.lock.json). + - **NOTE**: `Accept-Encoding: identity` header is mandatory as pabuild doesn't support `gzip` encoding. +- extracts response body out +- create a JSON value extractor circuit based on keys in [lockfile](../examples/http/lockfile/spotify_extended.lock.json) +- extract the value out and create a proof + +Steps to run an end-to-end proof is similar to HTTP/JSON extractor: +- Run codegen to generate circuits. Replace `value_string` with `circuit-name`. + ```sh + pabuild codegen extended --circuit-name spotify_top_artists --input-file examples/http/spotify_top_artists.json --lockfile examples/http/lockfile/spotify_extended.lock.json -d + ``` + +- Refer to [HTTP extractor](#http-locking-and-extraction) for following steps: + - generate witness + - create trusted setup + - create proof + - verify proof \ No newline at end of file diff --git a/examples/http/lockfile/spotify.lock.json b/examples/http/lockfile/spotify.lock.json new file mode 100644 index 0000000..e3ca2da --- /dev/null +++ b/examples/http/lockfile/spotify.lock.json @@ -0,0 +1,7 @@ +{ + "version": "HTTP/1.1", + "status": "200", + "message": "OK", + "headerName1": "content-type", + "headerValue1": "application/json; charset=utf-8" +} \ No newline at end of file diff --git a/examples/http/lockfile/spotify_extended.lock.json b/examples/http/lockfile/spotify_extended.lock.json new file mode 100644 index 0000000..8940987 --- /dev/null +++ b/examples/http/lockfile/spotify_extended.lock.json @@ -0,0 +1,19 @@ +{ + "http": { + "version": "HTTP/1.1", + "status": "200", + "message": "OK", + "headerName1": "content-type", + "headerValue1": "application/json; charset=utf-8" + }, + "json": { + "keys": [ + "data", + "items", + 0, + "profile", + "name" + ], + "value_type": "string" + } +} \ No newline at end of file diff --git a/examples/http/spotify_top_artists_request.http b/examples/http/spotify_top_artists_request.http new file mode 100644 index 0000000..d0d382c --- /dev/null +++ b/examples/http/spotify_top_artists_request.http @@ -0,0 +1,3 @@ +GET /v1/me/top/artists?time_range=medium_term&limit=1 HTTP/1.1 +Host: api.spotify.com +Authorization: Bearer BQBXRpIm2NL08akEiaxB5l42eiq6Zd9Q0S2-0Q4k0CMoa5u8o_ah_Ddjxt6Mv3226AEDyKYcFPpgw_6Asg-Y2hJpcuMya8wzqyqgiV-KH0vcEq7EFzODXoaBxsB0wryVCWDF6p5dqcpIHOz4QJqQa9mUA6sFzYNyECglT-BGcRe_N9f_3aqYTGQ-kkE-devPkPkEfDcbziT6mOzJfGRzLw \ No newline at end of file diff --git a/examples/http/spotify_top_artists_response.http b/examples/http/spotify_top_artists_response.http new file mode 100644 index 0000000..37bd5d4 --- /dev/null +++ b/examples/http/spotify_top_artists_response.http @@ -0,0 +1,8 @@ +HTTP/1.1 200 OK +content-type: application/json; charset=utf-8 +content-encoding: gzip +Transfer-Encoding: chunked + +{ + "data": {"items": [{"data": "Artist","profile": {"name": "Taylor Swift"}}]} +} \ No newline at end of file diff --git a/examples/json/lockfile/spotify.json b/examples/json/lockfile/spotify.json new file mode 100644 index 0000000..0669a7b --- /dev/null +++ b/examples/json/lockfile/spotify.json @@ -0,0 +1,10 @@ +{ + "keys": [ + "data", + "items", + 0, + "profile", + "name" + ], + "value_type": "string" +} \ No newline at end of file diff --git a/examples/json/test/spotify.json b/examples/json/test/spotify.json new file mode 100644 index 0000000..dcacc81 --- /dev/null +++ b/examples/json/test/spotify.json @@ -0,0 +1,3 @@ +{ + "data": {"items": [{"data": "Artist","profile": {"name": "Taylor Swift"}}]} +} \ No newline at end of file diff --git a/src/circuit_config.rs b/src/circuit_config.rs index 7d43497..7f7da87 100644 --- a/src/circuit_config.rs +++ b/src/circuit_config.rs @@ -15,36 +15,32 @@ pub struct CircomkitCircuitConfig { #[derive(Debug, Clone, Serialize, Deserialize)] pub struct CircomkitConfig(HashMap); -/// Writes config to `circuits.json` for circomkit support -/// # Inputs -/// - `name`: circuit name -/// - `circuit_config`: [`CircomkitCircuitConfig`] -pub fn write_config( - name: String, - circuit_config: &CircomkitCircuitConfig, -) -> Result<(), Box> { - let mut circomkit_config = env::current_dir()?; - circomkit_config.push("circuits.json"); - - let _ = std::fs::File::create_new(&circomkit_config); - - let mut circomkit_circuits: CircomkitConfig = - serde_json::from_slice(&std::fs::read(&circomkit_config)?)?; - - if let Some(circuits_inputs) = circomkit_circuits.0.get_mut(&name) { - *circuits_inputs = circuit_config.clone(); - } else { - let _ = circomkit_circuits - .0 - .insert(name.clone(), circuit_config.clone()); - } +impl CircomkitCircuitConfig { + /// Writes [`CircomkitCircuitConfig`] to `circuits.json` for circomkit support + /// # Inputs + /// - `name`: circuit name + pub fn write(&self, name: &str) -> Result<(), Box> { + let mut circomkit_config = env::current_dir()?; + circomkit_config.push("circuits.json"); + + let _ = std::fs::File::create_new(&circomkit_config); + + let mut circomkit_circuits: CircomkitConfig = + serde_json::from_slice(&std::fs::read(&circomkit_config)?)?; - std::fs::write( - circomkit_config.clone(), - serde_json::to_string_pretty(&circomkit_circuits)?, - )?; + if let Some(circuits_inputs) = circomkit_circuits.0.get_mut(name) { + *circuits_inputs = self.clone(); + } else { + let _ = circomkit_circuits.0.insert(name.to_string(), self.clone()); + } - println!("Config updated: {}", circomkit_config.display()); + std::fs::write( + circomkit_config.clone(), + serde_json::to_string_pretty(&circomkit_circuits)?, + )?; - Ok(()) + println!("Config updated: {}", circomkit_config.display()); + + Ok(()) + } } diff --git a/src/codegen/http.rs b/src/codegen/http.rs index 217d023..53e9535 100644 --- a/src/codegen/http.rs +++ b/src/codegen/http.rs @@ -1,15 +1,12 @@ -use crate::{ - circuit_config::{write_config, CircomkitCircuitConfig}, - witness::read_input_file_as_bytes, - ExtractorArgs, FileType, -}; +use crate::{circuit_config::CircomkitCircuitConfig, ExtractorArgs, FileType}; use regex::Regex; use serde::{Deserialize, Serialize}; use std::{ - collections::HashMap, + collections::{BTreeMap, HashMap}, error::Error, fs::{self, create_dir_all}, + path::Path, }; #[derive(Serialize, Deserialize)] @@ -21,31 +18,183 @@ pub enum HttpData { #[derive(Debug, Deserialize)] pub struct Request { - method: String, - target: String, - version: String, + pub method: String, + pub target: String, + pub version: String, #[serde(flatten)] #[serde(deserialize_with = "deserialize_headers")] - headers: HashMap, + pub headers: BTreeMap, } #[derive(Debug, Deserialize)] pub struct Response { - version: String, - status: String, - message: String, + pub version: String, + pub status: String, + pub message: String, #[serde(flatten)] #[serde(deserialize_with = "deserialize_headers")] - headers: HashMap, + pub headers: BTreeMap, } impl HttpData { - fn headers(&self) -> HashMap { + pub fn headers(&self) -> BTreeMap { match self { HttpData::Request(request) => request.headers.clone(), HttpData::Response(response) => response.headers.clone(), } } + + pub fn params(&self) -> Vec { + let mut params = vec!["DATA_BYTES".to_string()]; + match self { + HttpData::Request(_) => { + params.append(&mut vec![ + "methodLen".to_string(), + "targetLen".to_string(), + "versionLen".to_string(), + ]); + } + HttpData::Response(_) => { + params.append(&mut vec![ + "maxContentLength".to_string(), + "versionLen".to_string(), + "statusLen".to_string(), + "messageLen".to_string(), + ]); + } + }; + + for i in 0..self.headers().len() { + params.push(format!("headerNameLen{}", i + 1)); + params.push(format!("headerValueLen{}", i + 1)); + } + + params + } + + pub fn inputs(&self) -> Vec { + let mut inputs = vec!["data".to_string()]; + + match self { + HttpData::Request(_) => inputs.append(&mut vec![ + String::from("method"), + String::from("target"), + String::from("version"), + ]), + HttpData::Response(_) => inputs.append(&mut vec![ + String::from("version"), + String::from("status"), + String::from("message"), + ]), + }; + + for (i, _header) in self.headers().iter().enumerate() { + inputs.push(format!("header{}", i + 1)); + inputs.push(format!("value{}", i + 1)); + } + + inputs + } + + pub fn parse_input( + &self, + input: Vec, + ) -> Result<(HttpData, Vec), Box> { + let input_string = String::from_utf8(input)?; + + let parts: Vec<&str> = input_string.split("\r\n\r\n").collect(); + assert!(parts.len() <= 2); + + let mut body = vec![]; + if parts.len() == 2 { + body = parts[1].as_bytes().to_vec(); + } + + let headers: Vec<&str> = parts[0].split("\r\n").collect(); + let start_line: Vec<&str> = headers[0].split(" ").collect(); + assert_eq!(start_line.len(), 3); + + let (_, headers) = headers.split_at(1); + let mut headers_map = BTreeMap::::new(); + let re = Regex::new(r":\s+").unwrap(); + for &header in headers { + println!("header: {:?}", header); + let key_value: Vec<&str> = re.split(header).collect(); + assert_eq!(key_value.len(), 2); + println!("key: {:?}", key_value); + headers_map.insert(key_value[0].to_string(), key_value[1].to_string()); + } + + let http_data = match self { + HttpData::Request(_) => HttpData::Request(Request { + method: start_line[0].to_string(), + target: start_line[1].to_string(), + version: start_line[2].to_string(), + headers: headers_map, + }), + HttpData::Response(_) => HttpData::Response(Response { + version: start_line[0].to_string(), + status: start_line[1].to_string(), + message: start_line[2].to_string(), + headers: headers_map, + }), + }; + + Ok((http_data, body)) + } + + pub fn populate_params( + &self, + input: Vec, + ) -> Result, Box> { + let (_, http_body) = self.parse_input(input.clone())?; + + let mut params = vec![input.len()]; + + match self { + HttpData::Request(request) => { + params.push(request.method.len()); + params.push(request.target.len()); + params.push(request.version.len()); + for (key, value) in request.headers.iter() { + params.push(key.len()); + params.push(value.len()); + } + } + HttpData::Response(response) => { + params.push(http_body.len()); + params.push(response.version.len()); + params.push(response.status.len()); + params.push(response.message.len()); + for (key, value) in response.headers.iter() { + params.push(key.len()); + params.push(value.len()); + } + } + } + + Ok(params) + } + + fn build_circuit_config( + &self, + input_file: &Path, + codegen_filename: &str, + ) -> Result> { + println!("input_ifle: {:?}", input_file); + let input = FileType::Http.read_input(input_file)?; + + let circuit_template_name = match self { + HttpData::Request(_) => String::from("LockHTTPRequest"), + HttpData::Response(_) => String::from("LockHTTPResponse"), + }; + + Ok(CircomkitCircuitConfig { + file: format!("main/{}", codegen_filename), + template: circuit_template_name, + params: self.populate_params(input)?, + }) + } } impl std::fmt::Debug for HttpData { @@ -97,12 +246,12 @@ impl Serialize for Response { } } -fn deserialize_headers<'de, D>(deserializer: D) -> Result, D::Error> +fn deserialize_headers<'de, D>(deserializer: D) -> Result, D::Error> where D: serde::Deserializer<'de>, { - let mut map = HashMap::new(); - let mut temp_map: HashMap = HashMap::deserialize(deserializer)?; + let mut map = BTreeMap::new(); + let mut temp_map: BTreeMap = BTreeMap::deserialize(deserializer)?; let mut i = 1; while let (Some(name), Some(value)) = ( @@ -116,11 +265,10 @@ where Ok(map) } -const PRAGMA: &str = "pragma circom 2.1.9;\n\n"; - fn build_http_circuit( + config: &CircomkitCircuitConfig, data: &HttpData, - output_filename: &String, + output_filename: &str, debug: bool, ) -> Result<(), Box> { let mut circuit_buffer = String::new(); @@ -131,7 +279,7 @@ fn build_http_circuit( circuit_buffer += "\n*/\n"; // Version and includes - circuit_buffer += PRAGMA; + circuit_buffer += "pragma circom 2.1.9;\n\n"; circuit_buffer += "include \"../http/interpreter.circom\";\n"; circuit_buffer += "include \"../http/parser/machine.circom\";\n"; circuit_buffer += "include \"../utils/bytes.circom\";\n"; @@ -140,21 +288,8 @@ fn build_http_circuit( circuit_buffer += "include \"@zk-email/circuits/utils/array.circom\";\n\n"; { - match data { - HttpData::Request(_) => { - circuit_buffer += - "template LockHTTPRequest(DATA_BYTES, methodLen, targetLen, versionLen"; - } - HttpData::Response(_) => { - circuit_buffer += - "template LockHTTPResponse(DATA_BYTES, maxContentLength, versionLen, statusLen, messageLen"; - } - } - - for (i, _header) in data.headers().iter().enumerate() { - circuit_buffer += &format!(", headerNameLen{}, headerValueLen{}", i + 1, i + 1); - } - circuit_buffer += ") {"; + let params = data.params(); + circuit_buffer += &format!("template {}({}) {{", config.template, params.join(", ")); } { @@ -258,7 +393,7 @@ fn build_http_circuit( circuit_buffer += r#" component State[DATA_BYTES]; - State[0] = StateUpdate(); + State[0] = HttpStateUpdate(); State[0].byte <== data[0]; State[0].parsing_start <== 1; State[0].parsing_header <== 0; @@ -282,7 +417,7 @@ fn build_http_circuit( { circuit_buffer += r#" for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) { - State[data_idx] = StateUpdate(); + State[data_idx] = HttpStateUpdate(); State[data_idx].byte <== data[data_idx]; State[data_idx].parsing_start <== State[data_idx - 1].next_parsing_start; State[data_idx].parsing_header <== State[data_idx - 1].next_parsing_header; @@ -489,108 +624,96 @@ fn build_http_circuit( Ok(()) } -fn parse_http_file( - locfile: &HttpData, - input: Vec, -) -> Result<(HttpData, Vec), Box> { - let input_string = String::from_utf8(input)?; +pub fn http_circuit_from_args( + args: &ExtractorArgs, +) -> Result> { + let data = std::fs::read(&args.lockfile)?; - let parts: Vec<&str> = input_string.split("\r\n\r\n").collect(); - assert!(parts.len() <= 2); + let http_data: HttpData = serde_json::from_slice(&data)?; - let mut body = vec![]; - if parts.len() == 2 { - body = parts[1].as_bytes().to_vec(); - } + let codegen_filename = format!("http_{}", args.circuit_name); - let headers: Vec<&str> = parts[0].split("\r\n").collect(); - let start_line: Vec<&str> = headers[0].split(" ").collect(); - assert_eq!(start_line.len(), 3); - - let (_, headers) = headers.split_at(1); - let mut headers_map = HashMap::::new(); - let re = Regex::new(r":\s(.+)").unwrap(); - for &header in headers { - let key_value: Vec<&str> = re.split(header).collect(); - assert_eq!(key_value.len(), 2); - headers_map.insert(key_value[0].to_string(), key_value[1].to_string()); - } + let config = + http_circuit_from_lockfile(&args.input_file, &http_data, &codegen_filename, args.debug)?; + + config.write(&args.circuit_name)?; - let http_data = match locfile { - HttpData::Request(_) => HttpData::Request(Request { - method: start_line[0].to_string(), - target: start_line[1].to_string(), - version: start_line[2].to_string(), - headers: headers_map, - }), - HttpData::Response(_) => HttpData::Response(Response { - version: start_line[0].to_string(), - status: start_line[1].to_string(), - message: start_line[2].to_string(), - headers: headers_map, - }), - }; - - Ok((http_data, body)) + Ok(config) } -fn build_circuit_config( - args: &ExtractorArgs, - lockfile: &HttpData, - codegen_filename: String, +pub fn http_circuit_from_lockfile( + input_file: &Path, + http_data: &HttpData, + codegen_filename: &str, + debug: bool, ) -> Result> { - let input = read_input_file_as_bytes(&FileType::Http, args.input_file.clone())?; + let config = http_data.build_circuit_config(input_file, codegen_filename)?; - let (_, http_body) = parse_http_file(lockfile, input.clone())?; + build_http_circuit(&config, http_data, codegen_filename, debug)?; - let circuit_template_name = match lockfile { - HttpData::Request(_) => String::from("LockHTTPRequest"), - HttpData::Response(_) => String::from("LockHTTPResponse"), - }; + Ok(config) +} - let mut params = vec![input.len()]; +#[cfg(test)] +mod test { + use super::*; - match lockfile { - HttpData::Request(request) => { - params.push(request.method.len()); - params.push(request.target.len()); - params.push(request.version.len()); - for (key, value) in request.headers.iter() { - params.push(key.len()); - params.push(value.len()); - } - } - HttpData::Response(response) => { - params.push(http_body.len()); - params.push(response.version.len()); - params.push(response.status.len()); - params.push(response.message.len()); - for (key, value) in response.headers.iter() { - params.push(key.len()); - params.push(value.len()); - } - } + #[test] + fn params() { + let lockfile: HttpData = serde_json::from_slice(include_bytes!( + "../../examples/http/lockfile/spotify.lock.json" + )) + .unwrap(); + + let params = lockfile.params(); + + assert_eq!(params.len(), 7); + assert_eq!(params[0], "DATA_BYTES"); + assert_eq!(params[1], "maxContentLength"); } - Ok(CircomkitCircuitConfig { - file: format!("main/{}", codegen_filename), - template: circuit_template_name, - params, - }) -} + #[test] + fn inputs() { + let lockfile: HttpData = serde_json::from_slice(include_bytes!( + "../../examples/http/lockfile/spotify.lock.json" + )) + .unwrap(); -pub fn http_circuit(args: ExtractorArgs) -> Result<(), Box> { - let data = std::fs::read(&args.lockfile)?; + let inputs = lockfile.inputs(); - let http_data: HttpData = serde_json::from_slice(&data)?; + assert_eq!(inputs.len(), 6); + assert_eq!(inputs[1], "version"); + assert_eq!(inputs[2], "status"); + assert_eq!(inputs[3], "message"); + } - let codegen_filename = format!("http_{}", args.circuit_name); + #[test] + fn populate_params() { + let lockfile: HttpData = serde_json::from_slice(include_bytes!( + "../../examples/http/lockfile/request.lock.json" + )) + .unwrap(); - build_http_circuit(&http_data, &codegen_filename, args.debug)?; + let input = include_bytes!("../../examples/http/get_request.http"); - let circomkit_circuit_input = build_circuit_config(&args, &http_data, codegen_filename)?; + let params = lockfile.populate_params(input.to_vec()).unwrap(); - write_config(args.circuit_name, &circomkit_circuit_input)?; + assert_eq!(params.len(), 8); + assert_eq!(params, [input.len(), 3, 4, 8, 6, 16, 4, 9]); + } - Ok(()) + #[test] + fn parse_input() { + let lockfile: HttpData = serde_json::from_slice(include_bytes!( + "../../examples/http/lockfile/request.lock.json" + )) + .unwrap(); + + let input = include_bytes!("../../examples/http/get_request.http"); + + let (http, body) = lockfile.parse_input(input.to_vec()).unwrap(); + + assert_eq!(body.len(), 0); + assert_eq!(http.headers()["Accept"], "application/json"); + } } diff --git a/src/codegen/integrated.rs b/src/codegen/integrated.rs new file mode 100644 index 0000000..0d2c354 --- /dev/null +++ b/src/codegen/integrated.rs @@ -0,0 +1,219 @@ +use crate::{ + circuit_config::CircomkitCircuitConfig, + codegen::{ + http::HttpData, + json::{Key, Lockfile as JsonLockfile}, + }, + ExtractorArgs, FileType, +}; +use serde::{Deserialize, Serialize}; +use std::path::Path; + +use super::{http::http_circuit_from_lockfile, json::json_circuit_from_lockfile}; + +#[derive(Debug, Serialize, Deserialize)] +pub struct ExtendedLockfile { + pub http: HttpData, + pub json: JsonLockfile, +} + +fn build_integrated_circuit( + http_data: &HttpData, + http_circuit_config: &CircomkitCircuitConfig, + json_lockfile: &JsonLockfile, + json_circuit_config: &CircomkitCircuitConfig, + integrated_circuit_config: &CircomkitCircuitConfig, + output_filename: &str, +) -> Result<(), Box> { + let mut circuit_buffer = String::new(); + + circuit_buffer += "pragma circom 2.1.9;\n\n"; + + let http_circuit_filename = Path::new(&http_circuit_config.file) + .file_name() + .expect("incorrect filepath in circuit config") + .to_str() + .expect("improper circuit filename"); + + let json_circuit_filename = Path::new(&json_circuit_config.file) + .file_name() + .expect("incorrect filepath in circuit config") + .to_str() + .expect("improper circuit filename"); + + circuit_buffer += &format!("include \"./{}.circom\";\n", http_circuit_filename); + circuit_buffer += &format!("include \"./{}.circom\";\n\n", json_circuit_filename); + + let http_params = http_data.params(); + + let mut json_params = json_lockfile.params(); + // remove `DATA_BYTES` from json params + json_params.remove(0); + + circuit_buffer += &format!( + "template {}({}, {}) {{\n", + integrated_circuit_config.template, + http_params.join(", "), + json_params.join(", ") + ); + + { + circuit_buffer += r#" + // Raw HTTP bytestream + signal input data[DATA_BYTES]; +"#; + + // Start line signals + { + match http_data { + HttpData::Request(_) => { + circuit_buffer += r#" + // Request line attributes + signal input method[methodLen]; + signal input target[targetLen]; + signal input version[versionLen]; + +"#; + } + HttpData::Response(_) => { + circuit_buffer += r#" + // Status line attributes + signal input version[versionLen]; + signal input status[statusLen]; + signal input message[messageLen]; + +"#; + } + } + } + + // Header signals + circuit_buffer += " // Header names and values to lock\n"; + for (i, _header) in http_data.headers().iter().enumerate() { + circuit_buffer += &format!( + " signal input header{}[headerNameLen{}];\n", + i + 1, + i + 1 + ); + circuit_buffer += &format!( + " signal input value{}[headerValueLen{}];\n", + i + 1, + i + 1 + ); + } + } + + circuit_buffer += "\n signal httpBody[maxContentLength];\n\n"; + let http_inputs = http_data.inputs(); + circuit_buffer += &format!( + " httpBody <== {}({})({});\n\n", + http_circuit_config.template, + http_params.join(", "), + http_inputs.join(", "), + ); + + for (i, key) in json_lockfile.keys.iter().enumerate() { + match key { + Key::String(_) => { + circuit_buffer += &format!(" signal input key{}[keyLen{}];\n", i + 1, i + 1) + } + Key::Num(_) => (), + } + } + + circuit_buffer += "\n signal output value[maxValueLen];\n"; + circuit_buffer += &format!( + " value <== {}(maxContentLength, {}", + json_circuit_config.template, + json_params.join(", ") + ); + + let mut json_inputs = json_lockfile.inputs(); + json_inputs.remove(0); + circuit_buffer += &format!(")(httpBody, {});\n", json_inputs.join(", ")); + + circuit_buffer += "}"; + + // write circuits to file + let mut file_path = std::env::current_dir()?; + file_path.push("circuits"); + file_path.push("main"); + + // create dir if doesn't exist + std::fs::create_dir_all(&file_path)?; + + file_path.push(format!("{}.circom", output_filename)); + + std::fs::write(&file_path, circuit_buffer)?; + + println!("Code generated at: {}", file_path.display()); + + Ok(()) +} + +fn build_circuit_config( + args: &ExtractorArgs, + http_data: &HttpData, + json_lockfile: &JsonLockfile, + output_filename: &str, +) -> Result> { + let input = FileType::Http.read_input(&args.input_file)?; + + let (_, http_body) = http_data.parse_input(input.clone())?; + + // populate http params + let mut params = http_data.populate_params(input)?; + + // add json params and remove first param: `DATA_BYTES` + let mut json_params = json_lockfile.populate_params(&http_body)?; + json_params.remove(0); + params.append(&mut json_params); + + Ok(CircomkitCircuitConfig { + file: format!("main/{}", output_filename), + template: String::from("HttpJson"), + params, + }) +} + +/// Builds a HTTP + JSON combined circuit extracting body response from HTTP response and +/// extracting value of keys from JSON. +pub fn integrated_circuit(args: &ExtractorArgs) -> Result<(), Box> { + let extended_lockfile: ExtendedLockfile = + serde_json::from_slice(&std::fs::read(&args.lockfile)?)?; + + let http_data: HttpData = extended_lockfile.http; + let lockfile: JsonLockfile = extended_lockfile.json; + + let http_circuit_filename = format!("{}_http", args.circuit_name); + let http_circuit_config = http_circuit_from_lockfile( + &args.input_file, + &http_data, + &http_circuit_filename, + args.debug, + )?; + + // read http response body as json input + let json_circuit_filename = format!("{}_json", args.circuit_name); + let input = FileType::Http.read_input(&args.input_file)?; + let (_, http_body) = http_data.parse_input(input.clone())?; + + let json_circuit_config = + json_circuit_from_lockfile(&http_body, &lockfile, &json_circuit_filename, args.debug)?; + + let output_filename = format!("extended_{}", args.circuit_name); + let config = build_circuit_config(args, &http_data, &lockfile, &output_filename)?; + + build_integrated_circuit( + &http_data, + &http_circuit_config, + &lockfile, + &json_circuit_config, + &config, + &output_filename, + )?; + + config.write(&args.circuit_name)?; + + Ok(()) +} diff --git a/src/codegen/json.rs b/src/codegen/json.rs index d1e6b78..5f99cad 100644 --- a/src/codegen/json.rs +++ b/src/codegen/json.rs @@ -3,16 +3,14 @@ use serde_json::Value; use std::{ cmp::max_by, collections::HashMap, + error::Error, fs::{self, create_dir_all}, str::FromStr, }; -use crate::{ - circuit_config::{write_config, CircomkitCircuitConfig}, - ExtractorArgs, -}; +use crate::{circuit_config::CircomkitCircuitConfig, ExtractorArgs}; -#[derive(Debug, Deserialize)] +#[derive(Debug, Serialize, Deserialize)] pub enum ValueType { #[serde(rename = "string")] String, @@ -27,14 +25,14 @@ pub enum Key { Num(usize), } -#[derive(Debug, Deserialize)] +#[derive(Debug, Serialize, Deserialize)] pub struct Lockfile { - keys: Vec, - value_type: ValueType, + pub keys: Vec, + pub value_type: ValueType, } impl Lockfile { - pub fn as_bytes(&self) -> HashMap> { + pub fn keys_as_bytes(&self) -> HashMap> { let mut keys = HashMap::>::new(); for (i, key) in self.keys.iter().enumerate() { if let Key::String(key) = key { @@ -44,6 +42,132 @@ impl Lockfile { } keys } + pub fn params(&self) -> Vec { + let mut params = vec!["DATA_BYTES".to_string(), "MAX_STACK_HEIGHT".to_string()]; + + for (i, key) in self.keys.iter().enumerate() { + match key { + Key::String(_) => { + params.push(format!("keyLen{}", i + 1)); + params.push(format!("depth{}", i + 1)); + } + Key::Num(_) => { + params.push(format!("index{}", i + 1)); + params.push(format!("depth{}", i + 1)); + } + } + } + + params.push("maxValueLen".to_string()); + + params + } + + pub fn inputs(&self) -> Vec { + let mut inputs = vec![String::from("data")]; + + for (i, key) in self.keys.iter().enumerate() { + match key { + Key::String(_) => inputs.push(format!("key{}", i + 1)), + Key::Num(_) => (), + } + } + + inputs + } + + /// Builds circuit config for circomkit support. + pub fn build_circuit_config( + &self, + input: &[u8], + output_filename: &str, + ) -> Result> { + let circuit_template_name = match self.value_type { + ValueType::String => String::from("ExtractStringValue"), + ValueType::Number => String::from("ExtractNumValue"), + }; + + Ok(CircomkitCircuitConfig { + file: format!("main/{}", output_filename), + template: circuit_template_name, + params: self.populate_params(input)?, + }) + } + + /// Builds circuit arguments + /// `[DATA_BYTES, MAX_STACK_HEIGHT, keyLen1, depth1, ..., maxValueLen]` + pub fn populate_params(&self, input: &[u8]) -> Result, Box> { + let mut params = vec![input.len(), json_max_stack_height(input)]; + + for (i, key) in self.keys.iter().enumerate() { + match key { + Key::String(key) => params.push(key.len()), + Key::Num(index) => params.push(*index), + } + params.push(i); + } + + let current_value = self.get_value(input)?; + params.push(current_value.as_bytes().len()); + + Ok(params) + } + + pub fn get_value(&self, input: &[u8]) -> Result> { + let mut current_value: Value = serde_json::from_slice(input)?; + for key in self.keys.iter() { + match key { + Key::String(key) => { + if let Some(value) = current_value.get_mut(key) { + // update current object value inside key + current_value = value.to_owned(); + } else { + return Err(String::from("provided key not present in input JSON").into()); + } + } + Key::Num(index) => { + if let Some(value) = current_value.get_mut(index) { + current_value = value.to_owned(); + } else { + return Err(String::from("provided index not present in input JSON").into()); + } + } + } + } + + match current_value { + Value::Number(num) => Ok(num.to_string()), + Value::String(val) => Ok(val), + _ => unimplemented!(), + } + } +} + +/// Returns maximum stack height for JSON parser circuit. Tracks maximum open braces and square +/// brackets at any position. +/// +/// # Input +/// - `input`: input json bytes +/// # Output +/// - `max_stack_height`: maximum stack height needed for JSON parser circuit +pub fn json_max_stack_height(input: &[u8]) -> usize { + let mut max_stack_height = 1; + let mut curr_stack_height = 1; + let mut inside_string: bool = false; + + for (i, char) in input.iter().skip(1).enumerate() { + match char { + b'"' if input[i] != b'\\' => inside_string = !inside_string, + b'{' | b'[' if !inside_string => { + curr_stack_height += 1; + max_stack_height = max_by(max_stack_height, curr_stack_height, |x, y| x.cmp(y)); + } + b'}' | b']' if !inside_string => curr_stack_height -= 1, + _ => {} + } + } + + max_stack_height } fn extract_string( @@ -52,17 +176,10 @@ fn extract_string( circuit_buffer: &mut String, debug: bool, ) { - *circuit_buffer += &format!( - "template {}(DATA_BYTES, MAX_STACK_HEIGHT, ", - config.template - ); - for (i, key) in data.keys.iter().enumerate() { - match key { - Key::String(_) => *circuit_buffer += &format!("keyLen{}, depth{}, ", i + 1, i + 1), - Key::Num(_) => *circuit_buffer += &format!("index{}, depth{}, ", i + 1, i + 1), - } - } - *circuit_buffer += "maxValueLen) {\n"; + let params = data.params(); + let inputs = data.inputs(); + + *circuit_buffer += &format!("template {}({}) {{\n", config.template, params.join(", "),); *circuit_buffer += " signal input data[DATA_BYTES];\n\n"; @@ -83,24 +200,11 @@ fn extract_string( // value_starting_index <== ExtractValue(DATA_BYTES, MAX_STACK_HEIGHT, keyLen1, depth1, keyLen2, depth2, index3, depth3, index4, depth4, maxValueLen)(data, key1, key2); { - *circuit_buffer += - " value_starting_index <== ExtractValue(DATA_BYTES, MAX_STACK_HEIGHT, "; - for (i, key) in data.keys.iter().enumerate() { - match key { - Key::String(_) => *circuit_buffer += &format!("keyLen{}, depth{}, ", i + 1, i + 1), - Key::Num(_) => *circuit_buffer += &format!("index{}, depth{}, ", i + 1, i + 1), - } - } - *circuit_buffer += "maxValueLen)(data, "; - for (i, key) in data.keys.iter().enumerate() { - match key { - Key::String(_) => *circuit_buffer += &format!("key{}, ", i + 1), - Key::Num(_) => (), - } - } - circuit_buffer.pop(); - circuit_buffer.pop(); - *circuit_buffer += ");\n"; + *circuit_buffer += &format!( + " value_starting_index <== ExtractValue({})({});\n", + params.join(", "), + inputs.join(", "), + ); } *circuit_buffer += r#" @@ -125,17 +229,10 @@ fn extract_number( circuit_buffer: &mut String, debug: bool, ) { - *circuit_buffer += &format!( - "template {}(DATA_BYTES, MAX_STACK_HEIGHT, ", - config.template - ); - for (i, key) in data.keys.iter().enumerate() { - match key { - Key::String(_) => *circuit_buffer += &format!("keyLen{}, depth{}, ", i + 1, i + 1), - Key::Num(_) => *circuit_buffer += &format!("index{}, depth{}, ", i + 1, i + 1), - } - } - *circuit_buffer += "maxValueLen) {\n"; + let params = data.params(); + let inputs = data.inputs(); + + *circuit_buffer += &format!("template {}({}) {{\n", config.template, params.join(", "),); *circuit_buffer += " signal input data[DATA_BYTES];\n\n"; @@ -157,24 +254,11 @@ fn extract_number( // value_starting_index <== ExtractValue(DATA_BYTES, MAX_STACK_HEIGHT, keyLen1, depth1, keyLen2, depth2, index3, depth3, index4, depth4, maxValueLen)(data, key1, key2); { - *circuit_buffer += - " value_starting_index <== ExtractValue(DATA_BYTES, MAX_STACK_HEIGHT, "; - for (i, key) in data.keys.iter().enumerate() { - match key { - Key::String(_) => *circuit_buffer += &format!("keyLen{}, depth{}, ", i + 1, i + 1), - Key::Num(_) => *circuit_buffer += &format!("index{}, depth{}, ", i + 1, i + 1), - } - } - *circuit_buffer += "maxValueLen)(data, "; - for (i, key) in data.keys.iter().enumerate() { - match key { - Key::String(_) => *circuit_buffer += &format!("key{}, ", i + 1), - Key::Num(_) => (), - } - } - circuit_buffer.pop(); - circuit_buffer.pop(); - *circuit_buffer += ");\n"; + *circuit_buffer += &format!( + " value_starting_index <== ExtractValue({})({});\n", + params.join(", "), + inputs.join(", "), + ); } *circuit_buffer += r#" @@ -205,9 +289,9 @@ fn extract_number( fn build_json_circuit( config: &CircomkitCircuitConfig, data: &Lockfile, - output_filename: &String, + output_filename: &str, debug: bool, -) -> Result<(), Box> { +) -> Result<(), Box> { let mut circuit_buffer = String::new(); // Dump out the contents of the lockfile used into the circuit @@ -220,14 +304,8 @@ fn build_json_circuit( // template ExtractValue(DATA_BYTES, MAX_STACK_HEIGHT, keyLen1, depth1, index2, depth2, keyLen3, depth3, index4, depth4, maxValueLen) { { - circuit_buffer += "template ExtractValue(DATA_BYTES, MAX_STACK_HEIGHT, "; - for (i, key) in data.keys.iter().enumerate() { - match key { - Key::String(_) => circuit_buffer += &format!("keyLen{}, depth{}, ", i + 1, i + 1), - Key::Num(_) => circuit_buffer += &format!("index{}, depth{}, ", i + 1, i + 1), - } - } - circuit_buffer += "maxValueLen) {\n"; + let params = data.params(); + circuit_buffer += &format!("template ExtractValue({}) {{\n", params.join(", ")); } /* @@ -556,104 +634,117 @@ fn build_json_circuit( Ok(()) } -pub fn json_max_stack_height(input: &[u8]) -> usize { - let mut max_stack_height = 1; - let mut curr_stack_height = 1; - let mut inside_string: bool = false; +/// Builds a JSON extractor circuit from [`ExtractorArgs`] +/// - reads [`Lockfile`] +/// - reads input +/// - create [`CircomkitCircuitConfig`] +/// - builds circuit +/// - writes file +pub fn json_circuit_from_args( + args: &ExtractorArgs, +) -> Result> { + let lockfile: Lockfile = serde_json::from_slice(&fs::read(&args.lockfile)?)?; - for (i, char) in input.iter().skip(1).enumerate() { - match char { - b'"' if input[i] != b'\\' => inside_string = !inside_string, - b'{' | b'[' if !inside_string => { - curr_stack_height += 1; - max_stack_height = max_by(max_stack_height, curr_stack_height, |x, y| x.cmp(y)); - } - b'}' | b']' if !inside_string => curr_stack_height -= 1, - _ => {} - } - } + let circuit_filename = format!("json_{}", args.circuit_name); - max_stack_height + let input = fs::read(&args.input_file)?; + + let config = json_circuit_from_lockfile(&input, &lockfile, &circuit_filename, args.debug)?; + config.write(&args.circuit_name)?; + + Ok(config) } -/// Builds circuit config for circomkit support. -pub fn build_circuit_config( - args: &ExtractorArgs, +pub fn json_circuit_from_lockfile( + input: &[u8], lockfile: &Lockfile, - codegen_filename: &str, -) -> Result> { - let input = fs::read(args.input_file.clone())?; + output_filename: &str, + debug: bool, +) -> Result> { + let config = lockfile.build_circuit_config(input, output_filename)?; - let circuit_template_name = match lockfile.value_type { - ValueType::String => String::from("ExtractStringValue"), - ValueType::Number => String::from("ExtractNumValue"), - }; + build_json_circuit(&config, lockfile, output_filename, debug)?; + Ok(config) +} - // build circuit arguments - // [DATA_BYTES, MAX_STACK_HEIGHT, keyLen1, depth1, ..., maxValueLen] - let mut params = vec![input.len(), json_max_stack_height(&input)]; +#[cfg(test)] +mod test { + use super::*; - let mut current_value: Value = serde_json::from_slice(&input)?; - for (i, key) in lockfile.keys.iter().enumerate() { - match key { - Key::String(key) => { - if let Some(value) = current_value.get_mut(key) { - // update circuit params - params.push(key.len()); - - // update current object value inside key - current_value = value.to_owned(); - } else { - return Err(String::from("provided key not present in input JSON").into()); - } - } - Key::Num(index) => { - if let Some(value) = current_value.get_mut(index) { - params.push(index.to_string().as_bytes().len()); - current_value = value.to_owned(); - } else { - return Err(String::from("provided index not present in input JSON").into()); - } - } - } - params.push(i); + #[test] + fn params() { + let lockfile: Lockfile = serde_json::from_slice(include_bytes!( + "../../examples/json/lockfile/value_array_object.json" + )) + .unwrap(); + + let params = lockfile.params(); + + assert_eq!(params[0], "DATA_BYTES"); + assert_eq!(params[1], "MAX_STACK_HEIGHT"); + assert_eq!(params.len(), 2 + 2 * lockfile.keys.len() + 1); } - // get value of specified key - // Currently only supports number, string - let value_bytes = match lockfile.value_type { - ValueType::Number => { - if !current_value.is_u64() { - return Err(String::from("value type doesn't match").into()); - } - current_value.as_u64().unwrap().to_string() - } - ValueType::String => { - if !current_value.is_string() { - return Err(String::from("value type doesn't match").into()); - } - current_value.as_str().unwrap().to_string() - } - }; - params.push(value_bytes.as_bytes().len()); - - Ok(CircomkitCircuitConfig { - file: format!("main/{}", codegen_filename), - template: circuit_template_name, - params, - }) -} + #[test] + fn inputs() { + let lockfile: Lockfile = serde_json::from_slice(include_bytes!( + "../../examples/json/lockfile/value_array_number.json" + )) + .unwrap(); -pub fn json_circuit(args: ExtractorArgs) -> Result<(), Box> { - let lockfile: Lockfile = serde_json::from_slice(&std::fs::read(&args.lockfile)?)?; + let inputs = lockfile.inputs(); - let circuit_filename = format!("json_{}", args.circuit_name); + assert_eq!(inputs.len(), 2); + assert_eq!(inputs[0], "data"); + } - let config = build_circuit_config(&args, &lockfile, &circuit_filename)?; + #[test] + fn populate_params() { + let input = include_bytes!("../../examples/json/test/spotify.json"); + let lockfile: Lockfile = + serde_json::from_slice(include_bytes!("../../examples/json/lockfile/spotify.json")) + .unwrap(); - build_json_circuit(&config, &lockfile, &circuit_filename, args.debug)?; + let params = lockfile.populate_params(input).unwrap(); - write_config(args.circuit_name, &config)?; + assert_eq!(params.len(), lockfile.params().len()); + assert_eq!(params[0], input.len()); + } - Ok(()) + #[test] + fn build_circuit_config() { + let input = include_bytes!("../../examples/json/test/spotify.json"); + let lockfile: Lockfile = + serde_json::from_slice(include_bytes!("../../examples/json/lockfile/spotify.json")) + .unwrap(); + + let config = lockfile + .build_circuit_config(input, "output_filename") + .unwrap(); + + assert_eq!(config.template, "ExtractStringValue"); + assert_eq!(config.file, "main/output_filename"); + } + + #[test] + fn json_value() { + let input = include_bytes!("../../examples/json/test/spotify.json"); + let lockfile: Lockfile = + serde_json::from_slice(include_bytes!("../../examples/json/lockfile/spotify.json")) + .unwrap(); + + let value = lockfile.get_value(input).unwrap(); + + assert_eq!(value, "Taylor Swift"); + } + + #[test] + fn max_stack_height() { + let input = include_bytes!("../../examples/json/test/two_keys.json"); + + assert_eq!(json_max_stack_height(input), 1); + + let input = include_bytes!("../../examples/json/test/spotify.json"); + assert_eq!(json_max_stack_height(input), 5); + } } diff --git a/src/codegen/mod.rs b/src/codegen/mod.rs index 9550283..7378216 100644 --- a/src/codegen/mod.rs +++ b/src/codegen/mod.rs @@ -1,2 +1,55 @@ pub mod http; -pub mod json; \ No newline at end of file +pub mod integrated; +pub mod json; +use crate::FileType; + +use clap::Parser; +use http::http_circuit_from_args; +use integrated::integrated_circuit; +use json::json_circuit_from_args; +use std::path::PathBuf; + +#[derive(Parser, Debug)] +/// JSON Extractor arguments +pub struct ExtractorArgs { + #[arg(value_enum)] + subcommand: FileType, + + /// Name of the circuit (to be used in circomkit config) + #[arg(long)] + circuit_name: String, + + /// Path to the JSON/HTTP file + #[arg(long)] + input_file: PathBuf, + + /// Path to the lockfile + #[arg(long)] + lockfile: PathBuf, + + /// Optional circuit debug logs + #[arg(long, short, action = clap::ArgAction::SetTrue)] + debug: bool, +} + +impl ExtractorArgs { + pub fn subcommand(&self) -> FileType { + self.subcommand.clone() + } + + pub fn build_circuit(&self) -> Result<(), Box> { + match self.subcommand { + FileType::Http => { + http_circuit_from_args(self)?; + } + FileType::Json => { + json_circuit_from_args(self)?; + } + FileType::Extended => { + integrated_circuit(self)?; + } + } + + Ok(()) + } +} diff --git a/src/main.rs b/src/main.rs index 3678922..be7b41a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,8 @@ pub mod circuit_config; pub mod codegen; pub mod witness; +use crate::codegen::ExtractorArgs; + #[derive(Parser, Debug)] #[command(name = "pabuild")] pub struct Args { @@ -19,10 +21,12 @@ pub enum Command { Codegen(ExtractorArgs), } +/// Lockfile file type #[derive(clap::ValueEnum, Clone, Debug, PartialEq)] pub enum FileType { Json, Http, + Extended, } #[derive(Debug, Parser)] @@ -31,6 +35,7 @@ pub enum WitnessType { Extractor(ExtractorWitnessArgs), } +/// Parser witness arguments #[derive(Parser, Debug)] pub struct ParserWitnessArgs { #[arg(value_enum)] @@ -63,37 +68,14 @@ pub struct ExtractorWitnessArgs { lockfile: PathBuf, } -#[derive(Parser, Debug)] -pub struct ExtractorArgs { - #[arg(value_enum)] - subcommand: FileType, - - /// Name of the circuit (to be used in circomkit config) - #[arg(long)] - circuit_name: String, - - /// Path to the JSON/HTTP file - #[arg(long)] - input_file: PathBuf, - - /// Path to the lockfile - #[arg(long)] - lockfile: PathBuf, - - /// Optional circuit debug logs - #[arg(long, short, action = clap::ArgAction::SetTrue)] - debug: bool, -} - pub fn main() -> Result<(), Box> { match Args::parse().command { Command::Witness(witness_type) => match witness_type { - WitnessType::Parser(args) => witness::parser_witness(args), - WitnessType::Extractor(args) => witness::extractor_witness(args), + WitnessType::Parser(args) => witness::parser_witness(args)?, + WitnessType::Extractor(args) => witness::extractor_witness(args)?, }, - Command::Codegen(args) => match args.subcommand { - FileType::Http => codegen::http::http_circuit(args), - FileType::Json => codegen::json::json_circuit(args), - }, - } + Command::Codegen(args) => args.build_circuit()?, + }; + + Ok(()) } diff --git a/src/witness.rs b/src/witness.rs index ec09bd9..9baf88b 100644 --- a/src/witness.rs +++ b/src/witness.rs @@ -1,18 +1,21 @@ +use serde::Serialize; + use crate::{ codegen::{ http::HttpData, + integrated::ExtendedLockfile, json::{json_max_stack_height, Lockfile}, }, ExtractorWitnessArgs, FileType, ParserWitnessArgs, }; -use std::{collections::HashMap, io::Write, path::PathBuf}; +use std::{collections::HashMap, io::Write, path::Path}; -#[derive(serde::Serialize)] +#[derive(Serialize)] pub struct ParserWitness { data: Vec, } -#[derive(serde::Serialize)] +#[derive(Serialize)] pub struct JsonExtractorWitness { data: Vec, @@ -20,7 +23,7 @@ pub struct JsonExtractorWitness { keys: HashMap>, } -#[derive(serde::Serialize)] +#[derive(Serialize)] pub struct HttpExtractorWitness { data: Vec, @@ -28,6 +31,14 @@ pub struct HttpExtractorWitness { http_data: HttpData, } +#[derive(Serialize)] +pub struct ExtendedWitness { + #[serde(flatten)] + http_witness: HttpExtractorWitness, + #[serde(flatten)] + keys: HashMap>, +} + fn print_boxed_output(lines: Vec) { // Determine the maximum length of the lines let max_length = lines.iter().map(|line| line.len()).max().unwrap_or(0); @@ -44,46 +55,55 @@ fn print_boxed_output(lines: Vec) { println!("{}", bottom_border); } -pub fn read_input_file_as_bytes( - file_type: &FileType, - file_path: PathBuf, -) -> Result, Box> { - match file_type { - FileType::Json => Ok(std::fs::read(file_path)?), - FileType::Http => { - let mut data = std::fs::read(file_path)?; - let mut i = 0; - // convert LF to CRLF - while i < data.len() { - if data[i] == 10 && (i == 0 || data[i - 1] != 13) { - data.insert(i, 13); - i += 2; - } else { - i += 1; +impl FileType { + pub fn read_input(&self, input: &Path) -> Result, Box> { + match self { + FileType::Json => Ok(std::fs::read(input)?), + FileType::Http | FileType::Extended => { + let mut data = std::fs::read(input)?; + let mut i = 0; + // convert LF to CRLF + while i < data.len() { + if data[i] == 10 && (i == 0 || data[i - 1] != 13) { + data.insert(i, 13); + i += 2; + } else { + i += 1; + } } + Ok(data) } - Ok(data) } } } -pub fn parser_witness(args: ParserWitnessArgs) -> Result<(), Box> { - let data = read_input_file_as_bytes(&args.subcommand, args.input_file)?; - - let witness = ParserWitness { data: data.clone() }; - +fn write_witness(circuit_name: &str, witness: &[u8]) -> Result> { let mut output_dir = std::env::current_dir()?; output_dir.push("inputs"); - output_dir.push(args.circuit_name); + output_dir.push(circuit_name); if !output_dir.exists() { std::fs::create_dir_all(&output_dir)?; } let output_file = output_dir.join("inputs.json"); - let mut file = std::fs::File::create(output_file)?; + let mut file = std::fs::File::create(&output_file)?; + + file.write_all(witness)?; + + let output = format!("Witness file generated: {:?}", output_file.display()); + Ok(output) +} + +pub fn parser_witness(args: ParserWitnessArgs) -> Result<(), Box> { + let data = args.subcommand.read_input(&args.input_file)?; + + let witness = ParserWitness { data: data.clone() }; - file.write_all(serde_json::to_string_pretty(&witness)?.as_bytes())?; + let output = write_witness( + &args.circuit_name, + serde_json::to_string_pretty(&witness)?.as_bytes(), + )?; // Prepare lines to print let mut lines = Vec::new(); @@ -95,6 +115,9 @@ pub fn parser_witness(args: ParserWitnessArgs) -> Result<(), Box Result<(), Box Result<(), Box> { // read input and lockfile - let input_data = read_input_file_as_bytes(&args.subcommand, args.input_file)?; + let input_data = args.subcommand.read_input(&args.input_file)?; let lockfile_data = std::fs::read(&args.lockfile)?; let lockfile: Lockfile = serde_json::from_slice(&lockfile_data)?; @@ -111,21 +134,13 @@ fn json_extractor_witness(args: ExtractorWitnessArgs) -> Result<(), Box Result<(), Box Result<(), Box Result<(), Box> { // read input and lockfile - let input_data = read_input_file_as_bytes(&args.subcommand, args.input_file)?; + let data = args.subcommand.read_input(&args.input_file)?; let lockfile_data = std::fs::read(&args.lockfile)?; let http_data: HttpData = serde_json::from_slice(&lockfile_data)?; // create witness data let witness = HttpExtractorWitness { - data: input_data.clone(), + data: data.clone(), http_data, }; - // create witness dir - let mut output_dir = std::env::current_dir()?; - output_dir.push("inputs"); - output_dir.push(&args.circuit_name); - if !output_dir.exists() { - std::fs::create_dir_all(&output_dir)?; - } + let output = write_witness( + &args.circuit_name, + serde_json::to_string_pretty(&witness)?.as_bytes(), + )?; - // write witness to file - let output_file = output_dir.join("inputs.json"); - let mut file = std::fs::File::create(output_file)?; - file.write_all(serde_json::to_string_pretty(&witness)?.as_bytes())?; + // Prepare lines to print + let mut lines = Vec::new(); + lines.push(format!("Data length: {}", data.len())); + + lines.push(output); + + // Print the output inside a nicely formatted box + print_boxed_output(lines); + + Ok(()) +} + +fn extended_extractor_witness( + args: ExtractorWitnessArgs, +) -> Result<(), Box> { + // read input and lockfile + let data = args.subcommand.read_input(&args.input_file)?; + + let lockfile_data = std::fs::read(&args.lockfile)?; + let lockfile: ExtendedLockfile = serde_json::from_slice(&lockfile_data)?; + + // create witness data + let witness = ExtendedWitness { + http_witness: HttpExtractorWitness { + data: data.clone(), + http_data: lockfile.http, + }, + keys: lockfile.json.keys_as_bytes(), + }; + + let output = write_witness( + &args.circuit_name, + serde_json::to_string_pretty(&witness)?.as_bytes(), + )?; // Prepare lines to print let mut lines = Vec::new(); - lines.push(format!("Data length: {}", input_data.len())); + lines.push(format!("Data length: {}", data.len())); + + lines.push(output); // Print the output inside a nicely formatted box print_boxed_output(lines); @@ -181,5 +227,6 @@ pub fn extractor_witness(args: ExtractorWitnessArgs) -> Result<(), Box json_extractor_witness(args), FileType::Http => http_extractor_witness(args), + FileType::Extended => extended_extractor_witness(args), } }