diff --git a/packages/circuits/email-verifier.circom b/packages/circuits/email-verifier.circom index f6388434..ab3c95cf 100644 --- a/packages/circuits/email-verifier.circom +++ b/packages/circuits/email-verifier.circom @@ -9,6 +9,7 @@ include "./lib/sha.circom"; include "./utils/array.circom"; include "./utils/regex.circom"; include "./utils/hash.circom"; +include "./helpers/remove-soft-line-breaks.circom"; /// @title EmailVerifier @@ -29,7 +30,7 @@ include "./utils/hash.circom"; /// @input bodyHashIndex Index of the body hash `bh` in the emailHeader. /// @input precomputedSHA[32] Precomputed SHA-256 hash of the email body till the bodyHashIndex. /// @output pubkeyHash Poseidon hash of the pubkey - Poseidon(n/2)(n/2 chunks of pubkey with k*2 bits per chunk). -template EmailVerifier(maxHeadersLength, maxBodyLength, n, k, ignoreBodyHashCheck) { +template EmailVerifier(maxHeadersLength, maxBodyLength, n, k, ignoreBodyHashCheck, removeSoftLineBreaks) { assert(maxHeadersLength % 64 == 0); assert(maxBodyLength % 64 == 0); assert(n * k > 2048); // to support 2048 bit RSA @@ -122,6 +123,19 @@ template EmailVerifier(maxHeadersLength, maxBodyLength, n, k, ignoreBodyHashChec } computedBodyHashInts[i].out === headerBodyHash[i]; } + + if (removeSoftLineBreaks == 1) { + signal input decodedEmailBodyIn[maxBodyLength]; + signal output decodedEmailBodyOut[maxBodyLength]; + component qpEncodingChecker = RemoveSoftLineBreaks(maxBodyLength); + + qpEncodingChecker.encoded <== emailBody; + qpEncodingChecker.decoded <== decodedEmailBodyIn; + + qpEncodingChecker.isValid === 1; + + decodedEmailBodyOut <== qpEncodingChecker.decoded; + } } diff --git a/packages/circuits/helpers/remove-soft-line-breaks.circom b/packages/circuits/helpers/remove-soft-line-breaks.circom index 0b3e6c8c..5951576b 100644 --- a/packages/circuits/helpers/remove-soft-line-breaks.circom +++ b/packages/circuits/helpers/remove-soft-line-breaks.circom @@ -7,109 +7,108 @@ include "../utils/hash.circom"; template RemoveSoftLineBreaks(maxLength) { signal input encoded[maxLength]; signal input decoded[maxLength]; - signal output is_valid; + signal output isValid; // Helper signals signal r; signal processed[maxLength]; - signal is_equals[maxLength]; - signal is_cr[maxLength]; - signal is_lf[maxLength]; - signal temp_soft_break[maxLength - 2]; - signal is_soft_break[maxLength]; - signal should_zero[maxLength]; - signal is_valid_char[maxLength]; - signal r_enc[maxLength]; - signal sum_enc[maxLength]; - signal r_dec[maxLength]; - signal sum_dec[maxLength]; + signal isEquals[maxLength]; + signal isCr[maxLength]; + signal isLf[maxLength]; + signal tempSoftBreak[maxLength - 2]; + signal isSoftBreak[maxLength]; + signal shouldZero[maxLength]; + signal rEnc[maxLength]; + signal sumEnc[maxLength]; + signal rDec[maxLength]; + signal sumDec[maxLength]; // Helper components - component mux_enc[maxLength]; + component muxEnc[maxLength]; // Deriving r from Poseidon hash - component r_hasher = PoseidonModular(2 * maxLength); + component rHasher = PoseidonModular(2 * maxLength); for (var i = 0; i < maxLength; i++) { - r_hasher.in[i] <== encoded[i]; + rHasher.in[i] <== encoded[i]; } for (var i = 0; i < maxLength; i++) { - r_hasher.in[maxLength + i] <== decoded[i]; + rHasher.in[maxLength + i] <== decoded[i]; } - r <== r_hasher.out; + r <== rHasher.out; // Check for '=' (61 in ASCII) for (var i = 0; i < maxLength; i++) { - is_equals[i] <== IsEqual()([encoded[i], 61]); + isEquals[i] <== IsEqual()([encoded[i], 61]); } // Check for '\r' (13 in ASCII) for (var i = 0; i < maxLength - 1; i++) { - is_cr[i] <== IsEqual()([encoded[i + 1], 13]); + isCr[i] <== IsEqual()([encoded[i + 1], 13]); } - is_cr[maxLength - 1] <== 0; + isCr[maxLength - 1] <== 0; // Check for '\n' (10 in ASCII) for (var i = 0; i < maxLength - 2; i++) { - is_lf[i] <== IsEqual()([encoded[i + 2], 10]); + isLf[i] <== IsEqual()([encoded[i + 2], 10]); } - is_lf[maxLength - 2] <== 0; - is_lf[maxLength - 1] <== 0; + isLf[maxLength - 2] <== 0; + isLf[maxLength - 1] <== 0; // Identify soft line breaks for (var i = 0; i < maxLength - 2; i++) { - temp_soft_break[i] <== is_equals[i] * is_cr[i]; - is_soft_break[i] <== temp_soft_break[i] * is_lf[i]; + tempSoftBreak[i] <== isEquals[i] * isCr[i]; + isSoftBreak[i] <== tempSoftBreak[i] * isLf[i]; } // Handle the last two characters - is_soft_break[maxLength - 2] <== 0; - is_soft_break[maxLength - 1] <== 0; + isSoftBreak[maxLength - 2] <== 0; + isSoftBreak[maxLength - 1] <== 0; // Determine which characters should be zeroed for (var i = 0; i < maxLength; i++) { if (i == 0) { - should_zero[i] <== is_soft_break[i]; + shouldZero[i] <== isSoftBreak[i]; } else if (i == 1) { - should_zero[i] <== is_soft_break[i] + is_soft_break[i-1]; + shouldZero[i] <== isSoftBreak[i] + isSoftBreak[i-1]; } else if (i == maxLength - 1) { - should_zero[i] <== is_soft_break[i-1] + is_soft_break[i-2]; + shouldZero[i] <== isSoftBreak[i-1] + isSoftBreak[i-2]; } else { - should_zero[i] <== is_soft_break[i] + is_soft_break[i-1] + is_soft_break[i-2]; + shouldZero[i] <== isSoftBreak[i] + isSoftBreak[i-1] + isSoftBreak[i-2]; } } // Process the encoded input for (var i = 0; i < maxLength; i++) { - processed[i] <== (1 - should_zero[i]) * encoded[i]; + processed[i] <== (1 - shouldZero[i]) * encoded[i]; } // Calculate powers of r for encoded - r_enc[0] <== 1; + rEnc[0] <== 1; for (var i = 1; i < maxLength; i++) { - mux_enc[i] = Mux1(); - mux_enc[i].c[0] <== r_enc[i - 1] * r; - mux_enc[i].c[1] <== r_enc[i - 1]; - mux_enc[i].s <== should_zero[i]; - r_enc[i] <== mux_enc[i].out; + muxEnc[i] = Mux1(); + muxEnc[i].c[0] <== rEnc[i - 1] * r; + muxEnc[i].c[1] <== rEnc[i - 1]; + muxEnc[i].s <== shouldZero[i]; + rEnc[i] <== muxEnc[i].out; } // Calculate powers of r for decoded - r_dec[0] <== 1; + rDec[0] <== 1; for (var i = 1; i < maxLength; i++) { - r_dec[i] <== r_dec[i - 1] * r; + rDec[i] <== rDec[i - 1] * r; } // Calculate rlc for processed - sum_enc[0] <== processed[0]; + sumEnc[0] <== processed[0]; for (var i = 1; i < maxLength; i++) { - sum_enc[i] <== sum_enc[i - 1] + r_enc[i] * processed[i]; + sumEnc[i] <== sumEnc[i - 1] + rEnc[i] * processed[i]; } // Calculate rlc for decoded - sum_dec[0] <== decoded[0]; + sumDec[0] <== decoded[0]; for (var i = 1; i < maxLength; i++) { - sum_dec[i] <== sum_dec[i - 1] + r_dec[i] * decoded[i]; + sumDec[i] <== sumDec[i - 1] + rDec[i] * decoded[i]; } // Check if rlc for decoded is equal to rlc for encoded - is_valid <== IsEqual()([sum_enc[maxLength - 1], sum_dec[maxLength - 1]]); + isValid <== IsEqual()([sumEnc[maxLength - 1], sumDec[maxLength - 1]]); } \ No newline at end of file diff --git a/packages/circuits/tests/email-verifier.test.ts b/packages/circuits/tests/email-verifier.test.ts index b87a0223..6030d928 100644 --- a/packages/circuits/tests/email-verifier.test.ts +++ b/packages/circuits/tests/email-verifier.test.ts @@ -216,3 +216,45 @@ describe("EmailVerifier : Without body check", () => { await circuit.checkConstraints(witness); }); }); + +describe('EmailVerifier : With soft line breaks', () => { + jest.setTimeout(10 * 60 * 1000); // 10 minutes + + let dkimResult: DKIMVerificationResult; + let circuit: any; + + beforeAll(async () => { + const rawEmail = fs.readFileSync( + path.join(__dirname, './test-emails/lorem_ipsum.eml'), + 'utf8' + ); + dkimResult = await verifyDKIMSignature(rawEmail); + + circuit = await wasm_tester( + path.join( + __dirname, + './test-circuits/email-verifier-with-soft-line-breaks-test.circom' + ), + { + recompile: true, + include: path.join(__dirname, '../../../node_modules'), + output: path.join(__dirname, "./compiled-test-circuits"), + } + ); + }); + + it('should verify email when removeSoftLineBreaks is true', async function () { + const emailVerifierInputs = generateEmailVerifierInputsFromDKIMResult( + dkimResult, + { + maxHeadersLength: 640, + maxBodyLength: 1408, + ignoreBodyHashCheck: false, + removeSoftLineBreaks: true, + } + ); + + const witness = await circuit.calculateWitness(emailVerifierInputs); + await circuit.checkConstraints(witness); + }); +}); diff --git a/packages/circuits/tests/test-circuits/email-verifier-no-body-test.circom b/packages/circuits/tests/test-circuits/email-verifier-no-body-test.circom index 9ff2f64b..622de30f 100644 --- a/packages/circuits/tests/test-circuits/email-verifier-no-body-test.circom +++ b/packages/circuits/tests/test-circuits/email-verifier-no-body-test.circom @@ -2,4 +2,4 @@ pragma circom 2.1.6; include "../../email-verifier.circom"; -component main { public [ pubkey ] } = EmailVerifier(640, 768, 121, 17, 1); +component main { public [ pubkey ] } = EmailVerifier(640, 768, 121, 17, 1, 0); diff --git a/packages/circuits/tests/test-circuits/email-verifier-test.circom b/packages/circuits/tests/test-circuits/email-verifier-test.circom index 58772343..f027d66d 100644 --- a/packages/circuits/tests/test-circuits/email-verifier-test.circom +++ b/packages/circuits/tests/test-circuits/email-verifier-test.circom @@ -2,4 +2,4 @@ pragma circom 2.1.6; include "../../email-verifier.circom"; -component main { public [ pubkey ] } = EmailVerifier(640, 768, 121, 17, 0); +component main { public [ pubkey ] } = EmailVerifier(640, 768, 121, 17, 0, 0); diff --git a/packages/circuits/tests/test-circuits/email-verifier-with-soft-line-breaks-test.circom b/packages/circuits/tests/test-circuits/email-verifier-with-soft-line-breaks-test.circom new file mode 100644 index 00000000..f7605912 --- /dev/null +++ b/packages/circuits/tests/test-circuits/email-verifier-with-soft-line-breaks-test.circom @@ -0,0 +1,5 @@ +pragma circom 2.1.6; + +include "../../email-verifier.circom"; + +component main { public [ pubkey ] } = EmailVerifier(640, 1408, 121, 17, 0, 1); diff --git a/packages/helpers/src/input-generators.ts b/packages/helpers/src/input-generators.ts index 42cd26c9..0a79b8bc 100644 --- a/packages/helpers/src/input-generators.ts +++ b/packages/helpers/src/input-generators.ts @@ -12,6 +12,7 @@ type CircuitInput = { emailBodyLength?: string; precomputedSHA?: string[]; bodyHashIndex?: string; + decodedEmailBodyIn?: string[]; }; type InputGenerationArgs = { @@ -19,8 +20,31 @@ type InputGenerationArgs = { shaPrecomputeSelector?: string; maxHeadersLength?: number; // Max length of the email header including padding maxBodyLength?: number; // Max length of the email body after shaPrecomputeSelector including padding + removeSoftLineBreaks?: boolean; }; +function removeSoftLineBreaks(body: string[]): string[] { + const result = []; + let i = 0; + while (i < body.length) { + if (i + 2 < body.length && + body[i] === '61' && // '=' character + body[i + 1] === '13' && // '\r' character + body[i + 2] === '10') { // '\n' character + // Skip the soft line break sequence + i += 3; // Move past the soft line break + } else { + result.push(body[i]); + i++; + } + } + // Pad the result with zeros to make it the same length as the body + while (result.length < body.length) { + result.push('0'); + } + return result; +} + /** * * @description Generate circuit inputs for the EmailVerifier circuit from raw email content @@ -97,6 +121,10 @@ export function generateEmailVerifierInputsFromDKIMResult( circuitInputs.precomputedSHA = Uint8ArrayToCharArray(precomputedSha); circuitInputs.bodyHashIndex = bodyHashIndex.toString(); circuitInputs.emailBody = Uint8ArrayToCharArray(bodyRemaining); + + if (params.removeSoftLineBreaks) { + circuitInputs.decodedEmailBodyIn = removeSoftLineBreaks(circuitInputs.emailBody); + } } return circuitInputs;