feat: NIVC circuits + tests (#96)

* simplify `JsonMaskObjectNIVC` * passing `JsonMaskArrayIndexNIVC` * bug: incorrect mask for "profile" key * bug: not extracting `"profile"` * add extra key index to match end quote * fix: masking tests pass * fix: final extraction * cleanup: unused signals, consistency Found unused signals with the circom-witnesscalc `build-circuit` binary. These are now removed. * feat: passing HTTP NIVC tests * fix: unallocated signals `HTTPParseAndLockStartLine` had unused signals. I set these properly now. * rename SubstringMatchWithIndex * add `HeaderFieldNameValueMatchPadded` * add a todo in json interpreter * change `LockHeader` to handle max key and value length * use circuit.compute to find witness and simplify test --------- Co-authored-by: lonerapier <[email protected]>
pluto · Oct 22, 2024 · 5fd9651 · 5fd9651
1 parent af08a7a
commit 5fd9651
Show file tree

Hide file tree

Showing 16 changed files with 992 additions and 9 deletions.
diff --git a/.gitignore b/.gitignore
@@ -10,4 +10,9 @@ circuits/main/*
 
 # Rust generated
 inputs/**/*.json
-!inputs/search/witness.json
+!inputs/search/witness.json
+
+# Circom-witnesscalc generated
+ir_log/*
+log_input_signals.txt
+*.bin
diff --git a/circuits/http/interpreter.circom b/circuits/http/interpreter.circom
@@ -47,8 +47,6 @@ template HeaderFieldNameValueMatch(dataLen, nameLen, valueLen) {
     signal input headerValue[valueLen];
     signal input index;
 
-    // signal output value[valueLen];
-
     // is name matches
     signal headerNameMatch <== SubstringMatchWithIndex(dataLen, nameLen)(data, headerName, index);
 
@@ -65,6 +63,31 @@ template HeaderFieldNameValueMatch(dataLen, nameLen, valueLen) {
     signal output out <== headerNameMatchAndNextByteColon * headerValueMatch;
 }
 
+// https://www.rfc-editor.org/rfc/rfc9112.html#name-field-syntax
+template HeaderFieldNameValueMatchPadded(dataLen, maxNameLen, maxValueLen) {
+    signal input data[dataLen];
+    signal input headerName[maxNameLen];
+    signal input nameLen;
+    signal input headerValue[maxValueLen];
+    signal input valueLen;
+    signal input index;
+
+    // is name matchesnameLen
+    signal headerNameMatch <== SubstringMatchWithIndexPadded(dataLen, maxNameLen)(data, headerName, nameLen, index);
+
+    // next byte to name should be COLON
+    signal endOfHeaderName <== IndexSelector(dataLen)(data, index + nameLen);
+    signal isNextByteColon <== IsEqual()([endOfHeaderName, 58]);
+
+    signal headerNameMatchAndNextByteColon <== headerNameMatch * isNextByteColon;
+
+    // field-name: SP field-value
+    signal headerValueMatch <== SubstringMatchWithIndexPadded(dataLen, maxValueLen)(data, headerValue, valueLen, index + nameLen + 2);
+
+    // header name matches + header value matches
+    signal output out <== headerNameMatchAndNextByteColon * headerValueMatch;
+}
+
 // https://www.rfc-editor.org/rfc/rfc9112.html#name-field-syntax
 template HeaderFieldNameMatch(dataLen, nameLen) {
     signal input data[dataLen];

diff --git a/circuits/http/nivc/body_mask.circom b/circuits/http/nivc/body_mask.circom
@@ -0,0 +1,36 @@
+pragma circom 2.1.9;
+
+include "../interpreter.circom";
+
+template HTTPMaskBodyNIVC(DATA_BYTES, MAX_STACK_HEIGHT) {
+    // ------------------------------------------------------------------------------------------------------------------ //
+    // ~~ Set sizes at compile time ~~
+    // Total number of variables in the parser for each byte of data
+    var PER_ITERATION_DATA_LENGTH = MAX_STACK_HEIGHT * 2 + 2;
+    var TOTAL_BYTES_ACROSS_NIVC   = DATA_BYTES * (PER_ITERATION_DATA_LENGTH + 1) + 1;
+    // ------------------------------------------------------------------------------------------------------------------ //
+
+    // ------------------------------------------------------------------------------------------------------------------ //
+    // ~ Unravel from previous NIVC step ~
+    // Read in from previous NIVC step (HttpParseAndLockStartLine or HTTPLockHeader)
+    signal input step_in[TOTAL_BYTES_ACROSS_NIVC]; 
+    signal output step_out[TOTAL_BYTES_ACROSS_NIVC];
+
+    signal data[DATA_BYTES];
+    signal parsing_body[DATA_BYTES];
+    for (var i = 0 ; i < DATA_BYTES ; i++) {
+        data[i]         <== step_in[i];
+        parsing_body[i] <== step_in[DATA_BYTES + i * 5 + 4]; // `parsing_body` stored in every 5th slot of step_in/out
+    }
+
+    // ------------------------------------------------------------------------------------------------------------------ //
+    // ~ Write out to next NIVC step
+    for (var i = 0 ; i < DATA_BYTES ; i++) {
+        step_out[i] <== data[i] * parsing_body[i];
+    }
+    // Write out padded with zeros
+    for (var i = DATA_BYTES ; i < TOTAL_BYTES_ACROSS_NIVC ; i++) {
+        step_out[i] <== 0;
+    }
+}
+
diff --git a/circuits/http/nivc/lock_header.circom b/circuits/http/nivc/lock_header.circom
@@ -0,0 +1,113 @@
+pragma circom 2.1.9;
+
+include "../interpreter.circom";
+include "../../utils/array.circom";
+
+// TODO: should use a MAX_HEADER_NAME_LENGTH and a MAX_HEADER_VALUE_LENGTH
+template LockHeader(DATA_BYTES, MAX_STACK_HEIGHT, MAX_HEADER_NAME_LENGTH, MAX_HEADER_VALUE_LENGTH) {
+    // ------------------------------------------------------------------------------------------------------------------ //
+    // ~~ Set sizes at compile time ~~
+    // Total number of variables in the parser for each byte of data
+    /* 5 is for the variables:
+        next_parsing_start
+        next_parsing_header
+        next_parsing_field_name
+        next_parsing_field_value
+        State[i].next_parsing_body
+    */
+    var TOTAL_BYTES_HTTP_STATE    = DATA_BYTES * (5 + 1); // data + parser vars
+    var PER_ITERATION_DATA_LENGTH = MAX_STACK_HEIGHT * 2 + 2;
+    var TOTAL_BYTES_ACROSS_NIVC   = DATA_BYTES * (PER_ITERATION_DATA_LENGTH + 1) + 1;
+    // ------------------------------------------------------------------------------------------------------------------ //
+
+    // ------------------------------------------------------------------------------------------------------------------ //
+    // ~ Unravel from previous NIVC step ~
+    // Read in from previous NIVC step (HttpParseAndLockStartLine or HTTPLockHeader)
+    signal input step_in[TOTAL_BYTES_ACROSS_NIVC];
+    signal output step_out[TOTAL_BYTES_ACROSS_NIVC];
+
+    signal data[DATA_BYTES];
+    for (var i = 0 ; i < DATA_BYTES ; i++) {
+        data[i] <== step_in[i];
+    }
+    signal httpParserState[DATA_BYTES * 5];
+    for (var i = 0 ; i < DATA_BYTES * 5 ; i++) {
+        httpParserState[i] <== step_in[DATA_BYTES + i];
+    }
+
+    // TODO: Better naming for these variables
+    signal input header[MAX_HEADER_NAME_LENGTH];
+    signal input headerNameLength;
+    signal input value[MAX_HEADER_VALUE_LENGTH];
+    signal input headerValueLength;
+
+    // find header location
+    signal headerNameLocation <== FirstStringMatch(DATA_BYTES, MAX_HEADER_NAME_LENGTH)(data, header);
+
+    // This is the assertion that we have locked down the correct header
+    signal headerFieldNameValueMatch <==  HeaderFieldNameValueMatchPadded(DATA_BYTES, MAX_HEADER_NAME_LENGTH, MAX_HEADER_VALUE_LENGTH)(data, header, headerNameLength, value, headerValueLength, headerNameLocation);
+    headerFieldNameValueMatch === 1;
+
+    // parser state should be parsing header
+    signal isParsingHeader <== IndexSelector(DATA_BYTES * 5)(httpParserState, headerNameLocation * 5 + 1);
+    isParsingHeader === 1;
+
+    // ------------------------------------------------------------------------------------------------------------------ //
+    // ~ Write out to next NIVC step
+    for (var i = 0 ; i < DATA_BYTES ; i++) {
+        // add plaintext http input to step_out
+        step_out[i] <== step_in[i];
+
+        // add parser state
+        step_out[DATA_BYTES + i * 5]     <== step_in[DATA_BYTES + i * 5];
+        step_out[DATA_BYTES + i * 5 + 1] <== step_in[DATA_BYTES + i * 5 + 1];
+        step_out[DATA_BYTES + i * 5 + 2] <== step_in[DATA_BYTES + i * 5 + 2];
+        step_out[DATA_BYTES + i * 5 + 3] <== step_in[DATA_BYTES + i * 5 + 3];
+        step_out[DATA_BYTES + i * 5 + 4] <== step_in[DATA_BYTES + i * 5 + 4];
+    }
+    // Pad remaining with zeros
+    for (var i = TOTAL_BYTES_HTTP_STATE ; i < TOTAL_BYTES_ACROSS_NIVC ; i++ ) {
+        step_out[i] <== 0;
+    }
+}
+
+// TODO: Handrolled template that I haven't tested YOLO.
+template FirstStringMatch(dataLen, maxKeyLen) {
+    signal input data[dataLen];
+    signal input key[maxKeyLen];
+    signal output position;
+
+    signal paddedData[dataLen + maxKeyLen];
+    for (var i = 0 ; i < dataLen ; i++) {
+        paddedData[i] <== data[i];
+    }
+    for (var i = 0 ; i < maxKeyLen ; i++) {
+        paddedData[dataLen + i] <== 0;
+    }
+
+    var matched = 0;
+    var counter = 0;
+    component stringMatch[dataLen];
+    component hasMatched[dataLen];
+    signal isKeyOutOfBounds[maxKeyLen];
+    signal isFirstMatchAndInsideBound[dataLen * maxKeyLen];
+    for (var i = 0 ; i < maxKeyLen ; i++) {
+        isKeyOutOfBounds[i] <== IsZero()(key[i]);
+    }
+
+    for (var idx = 0 ; idx < dataLen ; idx++) {
+        stringMatch[idx] = IsEqualArray(maxKeyLen);
+        stringMatch[idx].in[0] <== key;
+        for (var key_idx = 0 ; key_idx < maxKeyLen ; key_idx++) {
+            isFirstMatchAndInsideBound[idx * maxKeyLen + key_idx] <== (1 - matched) * (1 - isKeyOutOfBounds[key_idx]);
+            stringMatch[idx].in[1][key_idx] <== paddedData[idx + key_idx] * isFirstMatchAndInsideBound[idx * maxKeyLen + key_idx];
+        }
+        hasMatched[idx] = IsEqual();
+        hasMatched[idx].in <== [stringMatch[idx].out, 1];
+        matched += hasMatched[idx].out;
+        counter += (1 - matched); // TODO: Off by one? Move before?
+    }
+    position <== counter;
+}
+
+
diff --git a/circuits/http/nivc/parse_and_lock_start_line.circom b/circuits/http/nivc/parse_and_lock_start_line.circom
@@ -0,0 +1,139 @@
+pragma circom 2.1.9;
+
+include "../parser/machine.circom";
+include "../interpreter.circom";
+include "../../utils/bytes.circom";
+
+// TODO: Note that TOTAL_BYTES will match what we have for AESGCMFOLD step_out
+// I have not gone through to double check the sizes of everything yet.
+template ParseAndLockStartLine(DATA_BYTES, MAX_STACK_HEIGHT, BEGINNING_LENGTH, MIDDLE_LENGTH, FINAL_LENGTH) {
+    // ------------------------------------------------------------------------------------------------------------------ //
+    // ~~ Set sizes at compile time ~~
+    // Total number of variables in the parser for each byte of data
+    // var AES_BYTES                 = DATA_BYTES + 50; // TODO: Might be wrong, but good enough for now
+    /* 5 is for the variables:
+        next_parsing_start
+        next_parsing_header
+        next_parsing_field_name
+        next_parsing_field_value
+        State[i].next_parsing_body
+    */
+    var TOTAL_BYTES_HTTP_STATE    = DATA_BYTES * (5 + 1); // data + parser vars
+    var PER_ITERATION_DATA_LENGTH = MAX_STACK_HEIGHT * 2 + 2;
+    var TOTAL_BYTES_ACROSS_NIVC   = DATA_BYTES * (PER_ITERATION_DATA_LENGTH + 1) + 1;
+    // ------------------------------------------------------------------------------------------------------------------ //
+
+    // ------------------------------------------------------------------------------------------------------------------ //
+    // ~ Unravel from previous NIVC step ~
+    // Read in from previous NIVC step (JsonParseNIVC)
+    signal input step_in[TOTAL_BYTES_ACROSS_NIVC]; 
+    signal output step_out[TOTAL_BYTES_ACROSS_NIVC];
+
+    signal data[DATA_BYTES];
+    for (var i = 0 ; i < DATA_BYTES ; i++) {
+        // data[i] <== step_in[50 + i]; // THIS WAS OFFSET FOR AES, WHICH WE NEED TO TAKE INTO ACCOUNT
+        data[i] <== step_in[i];
+    }
+
+    // // TODO: check if these needs to here or not
+    // DON'T THINK WE NEED THIS SINCE AES SHOULD OUTPUT ASCII OR FAIL
+    // component dataASCII = ASCII(DATA_BYTES);
+    // dataASCII.in <== data;
+
+    signal input beginning[BEGINNING_LENGTH];
+    signal input middle[MIDDLE_LENGTH];
+    signal input final[FINAL_LENGTH];
+
+    // Initialze the parser
+    component State[DATA_BYTES];
+    State[0]                     = HttpStateUpdate();
+    State[0].byte                <== data[0];
+    State[0].parsing_start       <== 1;
+    State[0].parsing_header      <== 0;
+    State[0].parsing_field_name  <== 0;
+    State[0].parsing_field_value <== 0;
+    State[0].parsing_body        <== 0;
+    State[0].line_status         <== 0;
+
+    /*
+    Note, because we know a beginning is the very first thing in a request
+    we can make this more efficient by just comparing the first `BEGINNING_LENGTH` bytes
+    of the data ASCII against the beginning ASCII itself.
+    */
+    // Check first beginning byte
+    signal beginningIsEqual[BEGINNING_LENGTH];
+    beginningIsEqual[0] <== IsEqual()([data[0],beginning[0]]);
+    beginningIsEqual[0] === 1;
+
+    // Setup to check middle bytes
+    signal startLineMask[DATA_BYTES];
+    signal middleMask[DATA_BYTES];
+    signal finalMask[DATA_BYTES];
+    startLineMask[0] <== inStartLine()(State[0].parsing_start);
+    middleMask[0]    <== inStartMiddle()(State[0].parsing_start);
+    finalMask[0]     <== inStartEnd()(State[0].parsing_start);
+
+
+    var middle_start_counter = 1;
+    var middle_end_counter = 1;
+    var final_end_counter = 1;
+    for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) {
+        State[data_idx]                     = HttpStateUpdate();
+        State[data_idx].byte                <== data[data_idx];
+        State[data_idx].parsing_start       <== State[data_idx - 1].next_parsing_start;
+        State[data_idx].parsing_header      <== State[data_idx - 1].next_parsing_header;
+        State[data_idx].parsing_field_name  <== State[data_idx - 1].next_parsing_field_name;
+        State[data_idx].parsing_field_value <== State[data_idx - 1].next_parsing_field_value;
+        State[data_idx].parsing_body        <== State[data_idx - 1].next_parsing_body;
+        State[data_idx].line_status         <== State[data_idx - 1].next_line_status;
+
+        // Check remaining beginning bytes
+        if(data_idx < BEGINNING_LENGTH) {
+            beginningIsEqual[data_idx] <== IsEqual()([data[data_idx], beginning[data_idx]]);
+            beginningIsEqual[data_idx] === 1;
+        }
+
+        // Set the masks based on parser state
+        startLineMask[data_idx] <== inStartLine()(State[data_idx].parsing_start);
+        middleMask[data_idx]    <== inStartMiddle()(State[data_idx].parsing_start);
+        finalMask[data_idx]     <== inStartEnd()(State[data_idx].parsing_start);
+
+        // Increment counters based on mask information
+        middle_start_counter += startLineMask[data_idx] - middleMask[data_idx] - finalMask[data_idx];
+        middle_end_counter   += startLineMask[data_idx] - finalMask[data_idx];
+        final_end_counter    += startLineMask[data_idx];
+    }
+
+    // Additionally verify beginning had correct length
+    BEGINNING_LENGTH === middle_start_counter - 1;
+
+    // Check middle is correct by substring match and length check
+    signal middleMatch <== SubstringMatchWithIndex(DATA_BYTES, MIDDLE_LENGTH)(data, middle, middle_start_counter);
+    middleMatch === 1;
+    MIDDLE_LENGTH === middle_end_counter - middle_start_counter - 1;
+
+    // Check final is correct by substring match and length check
+    signal finalMatch <== SubstringMatchWithIndex(DATA_BYTES, FINAL_LENGTH)(data, final, middle_end_counter);
+    finalMatch === 1;
+    // -2 here for the CRLF
+    FINAL_LENGTH === final_end_counter - middle_end_counter - 2;
+
+    // ------------------------------------------------------------------------------------------------------------------ //
+    // ~ Write out to next NIVC step (Lock Header)
+    for (var i = 0 ; i < DATA_BYTES ; i++) {
+        // add plaintext http input to step_out
+        // step_out[i] <== step_in[50 + i]; // AGAIN, NEED TO ACCOUNT FOR AES VARIABLES POSSIBLY
+        step_out[i] <== step_in[i];
+
+        // add parser state
+        step_out[DATA_BYTES + i * 5]     <== State[i].next_parsing_start;
+        step_out[DATA_BYTES + i * 5 + 1] <== State[i].next_parsing_header;
+        step_out[DATA_BYTES + i * 5 + 2] <== State[i].next_parsing_field_name;
+        step_out[DATA_BYTES + i * 5 + 3] <== State[i].next_parsing_field_value;
+        step_out[DATA_BYTES + i * 5 + 4] <== State[i].next_parsing_body;
+    }
+    // Pad remaining with zeros
+    for (var i = TOTAL_BYTES_HTTP_STATE ; i < TOTAL_BYTES_ACROSS_NIVC ; i++ ) {
+        step_out[i] <== 0;
+    }
+}