feat: manifest digest verification (#83)

* feat: `PolynomialDigest` * WIP: working to get through NIVC * feat: HTTP circuit digesting * feat: ChaCha circuit digesting * feat: JSON circuit digesting * fix: `JSONExtraction` * IT WORKS * feat: TS init digest * feat: separate sequence/value
pluto · Dec 13, 2024 · 3e69566 · 3e69566
1 parent 76ebd9a
commit 3e69566
Show file tree

Hide file tree

Showing 12 changed files with 514 additions and 352 deletions.
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -12,13 +12,27 @@ and this project adheres to [Semantic Versioning](http://semver.org/).
 #### Circuit Builds
 #### Artifacts
 - **Circuit sizes:**
+    - `plaintext_authentication_1024b` (with `--O1` build): 
+        - non-linear constaints: `383,300`
+        - linear-constraints: `27,418` 
+        - R1CS file: `83.9MB`
+        - Graph file: `20.7MB`
     - `http_verification_1024b` (with `--O1` build): 
-        - non-linear constaints: `128,653`
-        - linear-constraints: `77,400` 
-        - Theoretical storage size: `(128,653 + 77,400) * 3 * 32 bytes = 19,781,088 bytes ≈ 19.7 MB`
-        - R1CS file: `46.9MB`
-        - Graph file: N/A
-        - **WARNING:** Seems to not build with `--O2` flag. Need to investigate.
+        - non-linear constaints: `121,835`
+        - linear-constraints: `64,974` 
+        - R1CS file: `25.7MB`
+        - Graph file: `5MB`
+        - **WARNING:** Extremely slow build with `--O2` flag. Need to investigate.
+    - `json_extraction_1024b` (with `--O1` build): 
+        - non-linear constaints: `460,102`
+        - linear-constraints: `225,781` 
+        - R1CS file: `95.3MB`
+        - Graph file: `13.1MB`
+    - **Total size:** `243.7MB`
+- **Circuit param file sizes (SNARK):**
+    - `aux_params`: `112.5MB`
+    - `prover_key`: `100.7MB`
+    - `verifier_key`: `321.3MB`         
 
 ### Notes
 

diff --git a/circuits.json b/circuits.json
@@ -15,8 +15,8 @@
     ]
   },
   "json_extraction_1024b": {
-    "file": "json/parser/hash_parser",
-    "template": "ParserHasher",
+    "file": "json/extraction",
+    "template": "JSONExtraction",
     "params": [
       1024,
       10

diff --git a/circuits/chacha20/nivc/chacha20_nivc.circom b/circuits/chacha20/nivc/chacha20_nivc.circom
@@ -7,6 +7,7 @@ include "../chacha-qr.circom";
 include "../../utils/bits.circom";
 include "../../utils/hash.circom";
 include "../../utils/array.circom";
+include "circomlib/circuits/poseidon.circom";
 
 
 /** ChaCha20 in counter mode */
@@ -35,13 +36,13 @@ template ChaCha20_NIVC(DATA_BYTES) {
   // in => N 32-bit words => N 4 byte words
   signal input plainText[DATA_BYTES];
 
-  // step_in should be the ciphertext digest
+  // step_in should be the ciphertext digest + the HTTP digests + JSON seq digest
   signal input step_in[1];
 
   // step_out should be the plaintext digest
   signal output step_out[1];
 
-  signal isPadding[DATA_BYTES];
+  signal isPadding[DATA_BYTES]; // == 1 in the case we hit padding number
   signal plaintextBits[DATA_BYTES / 4][32];
   component toBits[DATA_BYTES / 4];
   for (var i = 0 ; i < DATA_BYTES / 4 ; i++) {
@@ -141,10 +142,16 @@ template ChaCha20_NIVC(DATA_BYTES) {
     }
   }
 
+  signal ciphertext_digest <== DataHasher(DATA_BYTES)(bigEndianCiphertext);
 
-  signal ciphertext_hash <== DataHasher(DATA_BYTES)(bigEndianCiphertext);
-  step_in[0]             === ciphertext_hash;
+  signal zeroed_plaintext[DATA_BYTES];
+  for(var i = 0 ; i < DATA_BYTES ; i++) {
+     // Sets any padding bytes to zero (which are presumably at the end) so they don't accum into the poly hash
+    zeroed_plaintext[i] <== (1 - isPadding[i]) * plainText[i];
+  }
+  signal plaintext_digest   <== PolynomialDigest(DATA_BYTES)(zeroed_plaintext, ciphertext_digest);
+  signal plaintext_digest_hashed <== Poseidon(1)([plaintext_digest]);
 
-  signal plaintext_hash <== DataHasher(DATA_BYTES)(plainText);
-  step_out[0]           <== plaintext_hash;
+  // TODO: I'm not sure we need to subtract the CT digest
+  step_out[0] <== step_in[0] - ciphertext_digest + plaintext_digest_hashed;
 }
diff --git a/circuits/http/verification.circom b/circuits/http/verification.circom
@@ -7,19 +7,24 @@ include "../utils/hash.circom";
 template HTTPVerification(DATA_BYTES, MAX_NUMBER_OF_HEADERS) {
     signal input step_in[1];
     signal output step_out[1];
+
+    signal input ciphertext_digest;
 
-    // Authenticate the plaintext we are passing in
     signal input data[DATA_BYTES];
-    // TODO: we don't need this if we do a poly digest of the plaintext in authentication circuit
-    signal data_hash <== DataHasher(DATA_BYTES)(data);
-    data_hash        === step_in[0];
+    signal isPadding[DATA_BYTES]; // == 1 in the case we hit padding number
+    signal zeroed_data[DATA_BYTES];
+    for (var i = 0 ; i < DATA_BYTES ; i++) {
+      isPadding[i]   <== IsEqual()([data[i], -1]);
+      zeroed_data[i] <== (1 - isPadding[i]) * data[i];
+    }
+    signal data_digest <== PolynomialDigest(DATA_BYTES)(zeroed_data, ciphertext_digest);
 
     signal input main_digests[MAX_NUMBER_OF_HEADERS + 1];  // Contains digests of start line and all intended headers (up to `MAX_NUMBER_OF_HEADERS`)
-    signal contained[MAX_NUMBER_OF_HEADERS + 1];
+    signal not_contained[MAX_NUMBER_OF_HEADERS + 1];
     var num_to_match = MAX_NUMBER_OF_HEADERS + 1;
     for(var i = 0 ; i < MAX_NUMBER_OF_HEADERS + 1 ; i++) {
-        contained[i] <== IsZero()(main_digests[i]);
-        num_to_match -= contained[i];
+        not_contained[i] <== IsZero()(main_digests[i]);
+        num_to_match -= not_contained[i];
     }
 
     component State[DATA_BYTES];
@@ -55,7 +60,7 @@ template HTTPVerification(DATA_BYTES, MAX_NUMBER_OF_HEADERS) {
         is_line_change[i]               <== Contains(2)(data[i + 1], [10, 13]); // capture if we hit an end line sequence
         was_cleared[i]                  <== IsZero()(main_monomials[i]);
         not_body_and_not_line_change[i] <== (1 - State[i + 1].parsing_body) * (1 - is_line_change[i]);
-        rescaled_or_was_cleared[i]      <== (main_monomials[i] * step_in[0] + was_cleared[i]);
+        rescaled_or_was_cleared[i]      <== (main_monomials[i] * ciphertext_digest + was_cleared[i]);
         main_monomials[i + 1]           <==  not_body_and_not_line_change[i] * rescaled_or_was_cleared[i];
     }
 
@@ -80,27 +85,38 @@ template HTTPVerification(DATA_BYTES, MAX_NUMBER_OF_HEADERS) {
 
     // BODY
     signal body_monomials[DATA_BYTES];
-    body_monomials[0] <== 0;
     signal body_accum[DATA_BYTES];
-    body_accum[0] <== 0;
     signal body_switch[DATA_BYTES -1];
     signal body_digest[DATA_BYTES];
-    body_digest[0] <== 0;
+    body_monomials[0] <== 0;
+    body_accum[0]     <== 0;
+    body_digest[0]    <== 0;
     for(var i = 0 ; i < DATA_BYTES - 1 ; i++) {
         body_accum[i + 1]        <== body_accum[i] + State[i + 1].parsing_body;
         body_switch[i]           <== IsEqual()([body_accum[i + 1], 1]);
-        body_monomials[i + 1]    <== body_monomials[i] * step_in[0] + body_switch[i];
-        body_digest[i + 1] <== body_digest[i] + body_monomials[i + 1] * data[i + 1];
+        body_monomials[i + 1]    <== body_monomials[i] * ciphertext_digest + body_switch[i];
+        body_digest[i + 1]       <== body_digest[i] + body_monomials[i + 1] * data[i + 1];
     }
 
-    // TODO: This, for now, passes back out the hash of body_digest and the plaintext_hash so it can be properly verified in the JSON
-    step_out[0] <== PoseidonChainer()([body_digest[DATA_BYTES - 1], step_in[0]]);
-
     // Verify machine ends in a valid state
     State[DATA_BYTES - 1].next_parsing_start       === 0;
     State[DATA_BYTES - 1].next_parsing_header      === 0;
     State[DATA_BYTES - 1].next_parsing_field_name  === 0;
     State[DATA_BYTES - 1].next_parsing_field_value === 0;
     State[DATA_BYTES - 1].next_parsing_body        === 1;
     State[DATA_BYTES - 1].next_line_status         === 0;
+
+    // TODO: Need to subtract all the header digests here and also wrap them in poseidon. We can use the ones from the input to make this cheaper since they're verified in this circuit!
+    signal body_digest_hashed <== Poseidon(1)([body_digest[DATA_BYTES - 1]]);
+    signal data_digest_hashed <== Poseidon(1)([data_digest]);
+    signal option_hash[MAX_NUMBER_OF_HEADERS + 1];
+    signal main_digests_hashed[MAX_NUMBER_OF_HEADERS + 1];
+    var accumulated_main_digests_hashed = 0;
+    for(var i = 0 ; i < MAX_NUMBER_OF_HEADERS + 1 ; i++) {
+        option_hash[i] <== Poseidon(1)([(1 - not_contained[i]) * main_digests[i]]);
+        main_digests_hashed[i] <== (1 - not_contained[i]) * option_hash[i];
+        accumulated_main_digests_hashed +=  main_digests_hashed[i];
+    }
+
+    step_out[0] <== step_in[0] + body_digest_hashed - accumulated_main_digests_hashed - data_digest_hashed; // TODO: data_digest is really plaintext_digest from before, consider changing names
 }
diff --git a/circuits/json/extraction.circom b/circuits/json/extraction.circom
@@ -5,15 +5,13 @@ include "hash_machine.circom";
 
 template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT) {
     signal input data[DATA_BYTES];
-    signal input polynomial_input;
-    signal input sequence_digest;
+    signal input ciphertext_digest;
+    signal input sequence_digest; 
+    signal input value_digest;
 
     signal input step_in[1];
     signal output step_out[1];
 
-    // TODO: Change this
-    step_out[0] <== step_in[0];
-
     //--------------------------------------------------------------------------------------------//
     // Initialze the parser
     component State[DATA_BYTES];
@@ -23,18 +21,18 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT) {
         State[0].tree_hash[i]   <== [0,0];
     }
     State[0].byte             <== data[0];
-    State[0].polynomial_input <== polynomial_input;
+    State[0].polynomial_input <== ciphertext_digest;
     State[0].monomial         <== 0;
     State[0].parsing_string   <== 0;
     State[0].parsing_number   <== 0;
 
     // Set up monomials for stack/tree digesting
-    signal monomials[4 * MAX_STACK_HEIGHT];
+    signal monomials[3 * MAX_STACK_HEIGHT];
     monomials[0] <== 1;
-    for(var i = 1 ; i < 4 * MAX_STACK_HEIGHT ; i++) {
-        monomials[i] <== monomials[i - 1] * polynomial_input;
+    for(var i = 1 ; i < 3 * MAX_STACK_HEIGHT ; i++) {
+        monomials[i] <== monomials[i - 1] * ciphertext_digest;
     }
-    signal intermediate_digest[DATA_BYTES][4 * MAX_STACK_HEIGHT];
+    signal intermediate_digest[DATA_BYTES][3 * MAX_STACK_HEIGHT];
     signal state_digest[DATA_BYTES];
 
     // Debugging
@@ -50,29 +48,39 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT) {
     // log("xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx");
 
     var total_matches = 0;
-    signal is_matched[DATA_BYTES];
+    signal sequence_is_matched[DATA_BYTES];
+    signal value_is_matched[DATA_BYTES];
+    signal sequence_and_value_matched[DATA_BYTES];
     for(var data_idx = 1; data_idx < DATA_BYTES; data_idx++) {
         State[data_idx]                    = StateUpdateHasher(MAX_STACK_HEIGHT);
         State[data_idx].byte             <== data[data_idx];
-        State[data_idx].polynomial_input <== polynomial_input;
+        State[data_idx].polynomial_input <== ciphertext_digest;
         State[data_idx].stack            <== State[data_idx - 1].next_stack;
         State[data_idx].parsing_string   <== State[data_idx - 1].next_parsing_string;
         State[data_idx].parsing_number   <== State[data_idx - 1].next_parsing_number;
         State[data_idx].monomial         <== State[data_idx - 1].next_monomial;
         State[data_idx].tree_hash        <== State[data_idx - 1].next_tree_hash;
 
-        // Digest the whole stack and tree hash
+        // Digest the whole stack and key tree hash
         var accumulator = 0;
         for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) {
-            intermediate_digest[data_idx][4 * i]     <== State[data_idx].next_stack[i][0] * monomials[4 * i];
-            intermediate_digest[data_idx][4 * i + 1] <== State[data_idx].next_stack[i][1] * monomials[4 * i + 1];
-            intermediate_digest[data_idx][4 * i + 2] <== State[data_idx].next_tree_hash[i][0] * monomials[4 * i + 2];
-            intermediate_digest[data_idx][4 * i + 3] <== State[data_idx].next_tree_hash[i][1] * monomials[4 * i + 3];  
-            accumulator += intermediate_digest[data_idx][4 * i] + intermediate_digest[data_idx][4 * i + 1] + intermediate_digest[data_idx][4 * i + 2] + intermediate_digest[data_idx][4 * i + 3];
+            intermediate_digest[data_idx][3 * i]     <== State[data_idx].next_stack[i][0] * monomials[3 * i];
+            intermediate_digest[data_idx][3 * i + 1] <== State[data_idx].next_stack[i][1] * monomials[3 * i + 1];
+            intermediate_digest[data_idx][3 * i + 2] <== State[data_idx].next_tree_hash[i][0] * monomials[3 * i + 2];
+            accumulator += intermediate_digest[data_idx][3 * i] + intermediate_digest[data_idx][3 * i + 1] + intermediate_digest[data_idx][3 * i + 2];
         }
         state_digest[data_idx] <== accumulator;
-        is_matched[data_idx] <== IsEqual()([state_digest[data_idx], sequence_digest]);
-        total_matches += is_matched[data_idx];
+        sequence_is_matched[data_idx] <== IsEqual()([state_digest[data_idx], sequence_digest]);
+
+        // Now check for if the value digest appears 
+        var value_digest_in_stack = 0;
+        for(var i = 0 ; i < MAX_STACK_HEIGHT ; i++) {
+            // A single value can be present only, and it is on index 1, so we can just accum
+            value_digest_in_stack += State[data_idx].next_tree_hash[i][1];
+        }
+        value_is_matched[data_idx] <== IsEqual()([value_digest, value_digest_in_stack]);
+        sequence_and_value_matched[data_idx] <== sequence_is_matched[data_idx] * value_is_matched[data_idx];
+        total_matches += sequence_and_value_matched[data_idx];
 
         // Debugging
         // for(var i = 0; i<MAX_STACK_HEIGHT; i++) {
@@ -98,4 +106,21 @@ template JSONExtraction(DATA_BYTES, MAX_STACK_HEIGHT) {
         State[DATA_BYTES - 1].next_stack[i]      === [0,0];
         State[DATA_BYTES - 1].next_tree_hash[i]  === [0,0];
     }
+
+    // Verify we have now processed all the data properly
+    // TODO: This data is now the HTTP body, consider renaming
+    signal isPadding[DATA_BYTES]; // == 1 in the case we hit padding number
+    signal zeroed_data[DATA_BYTES];
+    for (var i = 0 ; i < DATA_BYTES ; i++) {
+      isPadding[i]   <== IsEqual()([data[i], -1]);
+      zeroed_data[i] <== (1 - isPadding[i]) * data[i];
+    }
+    signal data_digest <== PolynomialDigest(DATA_BYTES)(zeroed_data, ciphertext_digest);
+    signal sequence_digest_hashed <== Poseidon(1)([sequence_digest]);
+    signal data_digest_hashed <== Poseidon(1)([data_digest]);
+
+    0 === step_in[0] - sequence_digest_hashed - data_digest_hashed;
+
+    // Set the output to the digest of the intended value
+    step_out[0] <== value_digest;
 }
diff --git a/circuits/test/chacha20/chacha20-nivc.test.ts b/circuits/test/chacha20/chacha20-nivc.test.ts
@@ -1,7 +1,8 @@
 import { WitnessTester } from "circomkit";
-import { circomkit, toByte, toUint32Array, uintArray32ToBits } from "../common";
+import { circomkit, PolynomialDigest, toByte, toUint32Array, uintArray32ToBits, modAdd } from "../common";
 import { DataHasher } from "../common/poseidon";
 import { assert } from "chai";
+import { poseidon1 } from "poseidon-lite";
 
 
 describe("chacha20-nivc", () => {
@@ -55,9 +56,13 @@ describe("chacha20-nivc", () => {
                 nonce: toInput(Buffer.from(nonceBytes)),
                 counter: counterBits,
                 plainText: plaintextBytes,
-                step_in: DataHasher(ciphertextBytes)
+                step_in: 0
             }, (["step_out"]));
-            assert.deepEqual(w.step_out, DataHasher(plaintextBytes));
+            // Output
+            let ciphertext_digest = DataHasher(ciphertextBytes);
+            let plaintext_digest_hashed = poseidon1([PolynomialDigest(plaintextBytes, ciphertext_digest)]);
+            let output = modAdd(plaintext_digest_hashed - ciphertext_digest, BigInt(0));
+            assert.deepEqual(w.step_out, output);
         });
     });
 
@@ -105,57 +110,16 @@ describe("chacha20-nivc", () => {
             let paddedPlaintextBytes = plaintextBytes.concat(Array(totalLength - plaintextBytes.length).fill(-1));
             const counterBits = uintArray32ToBits([1])[0]
             let w = await circuit.compute({
-                key: toInput(Buffer.from(keyBytes)),
-                nonce: toInput(Buffer.from(nonceBytes)),
-                counter: counterBits,
-                plainText: paddedPlaintextBytes,
-                step_in: DataHasher(ciphertextBytes)
-            }, (["step_out"]));
-            assert.deepEqual(w.step_out, DataHasher(paddedPlaintextBytes));
-        });
-    });
-
-    describe("wrong ciphertext hash", () => {
-        it("should fail", async () => {
-            circuit = await circomkit.WitnessTester(`ChaCha20`, {
-                file: "chacha20/nivc/chacha20_nivc",
-                template: "ChaCha20_NIVC",
-                params: [128] // number of bytes in plaintext
-            });
-            // Test case from RCF https://www.rfc-editor.org/rfc/rfc7539.html#section-2.4.2
-            // the input encoding here is not the most intuitive. inputs are serialized as little endian.
-            // i.e. "e4e7f110" is serialized as "10 f1 e7 e4". So the way i am reading in inputs is
-            // to ensure that every 32 bit word is byte reversed before being turned into bits.
-            // i think this should be easy when we compute witness in rust.
-            let keyBytes = [
-                0x00, 0x01, 0x02, 0x03,
-                0x04, 0x05, 0x06, 0x07,
-                0x08, 0x09, 0x0a, 0x0b,
-                0x0c, 0x0d, 0x0e, 0x0f,
-                0x10, 0x11, 0x12, 0x13,
-                0x14, 0x15, 0x16, 0x17,
-                0x18, 0x19, 0x1a, 0x1b,
-                0x1c, 0x1d, 0x1e, 0x1f
-            ];
-
-            let nonceBytes =
-                [
-                    0x00, 0x00, 0x00, 0x00,
-                    0x00, 0x00, 0x00, 0x4a,
-                    0x00, 0x00, 0x00, 0x00
-                ];
-            let plaintextBytes =
-                toByte("Ladies and Gentlemen of the class of '99: If I could offer you only one tip ");
-            let totalLength = 128;
-            let paddedPlaintextBytes = plaintextBytes.concat(Array(totalLength - plaintextBytes.length).fill(-1));
-            const counterBits = uintArray32ToBits([1])[0]
-            await circuit.expectFail({
                 key: toInput(Buffer.from(keyBytes)),
                 nonce: toInput(Buffer.from(nonceBytes)),
                 counter: counterBits,
                 plainText: paddedPlaintextBytes,
                 step_in: 0
-            });
+            }, (["step_out"]));
+            let ciphertext_digest = DataHasher(ciphertextBytes);
+            let plaintext_digest = poseidon1([PolynomialDigest(plaintextBytes, ciphertext_digest)]);
+            let output = modAdd(plaintext_digest - ciphertext_digest, BigInt(0));
+            assert.deepEqual(w.step_out, output);
         });
     });
 });
@@ -175,4 +139,4 @@ export function fromInput(bits: number[]) {
         buffer.writeUInt32LE(uint32Array[i], i * 4);
     }
     return buffer;
-}
+}