Skip to content

Commit

Permalink
Merge pull request #202 from zkemail/feat/remove-qp-enc
Browse files Browse the repository at this point in the history
Feat: Circuit for Removing the Quoted Printable Encoding in Email Body
  • Loading branch information
Divide-By-0 authored Jul 30, 2024
2 parents ffc2960 + 0eaa2f8 commit 0bc2538
Show file tree
Hide file tree
Showing 12 changed files with 672 additions and 3 deletions.
19 changes: 18 additions & 1 deletion packages/circuits/email-verifier.circom
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ include "./lib/sha.circom";
include "./utils/array.circom";
include "./utils/regex.circom";
include "./utils/hash.circom";
include "./helpers/remove-soft-line-breaks.circom";


/// @title EmailVerifier
Expand All @@ -20,6 +21,7 @@ include "./utils/hash.circom";
/// @param n Number of bits per chunk the RSA key is split into. Recommended to be 121.
/// @param k Number of chunks the RSA key is split into. Recommended to be 17.
/// @param ignoreBodyHashCheck Set 1 to skip body hash check in case data to prove/extract is only in the headers.
/// @param removeSoftLineBreaks Set 1 to remove soft line breaks from the email body.
/// @input emailHeader[maxHeadersLength] Email headers that are signed (ones in `DKIM-Signature` header) as ASCII int[], padded as per SHA-256 block size.
/// @input emailHeaderLength Length of the email header including the SHA-256 padding.
/// @input pubkey[k] RSA public key split into k chunks of n bits each.
Expand All @@ -28,8 +30,10 @@ include "./utils/hash.circom";
/// @input emailBodyLength Length of the email body including the SHA-256 padding.
/// @input bodyHashIndex Index of the body hash `bh` in the emailHeader.
/// @input precomputedSHA[32] Precomputed SHA-256 hash of the email body till the bodyHashIndex.
/// @input decodedEmailBodyIn[maxBodyLength] Decoded email body without soft line breaks.
/// @output pubkeyHash Poseidon hash of the pubkey - Poseidon(n/2)(n/2 chunks of pubkey with k*2 bits per chunk).
template EmailVerifier(maxHeadersLength, maxBodyLength, n, k, ignoreBodyHashCheck) {
/// @output decodedEmailBodyOut[maxBodyLength] Decoded email body with soft line breaks removed.
template EmailVerifier(maxHeadersLength, maxBodyLength, n, k, ignoreBodyHashCheck, removeSoftLineBreaks) {
assert(maxHeadersLength % 64 == 0);
assert(maxBodyLength % 64 == 0);
assert(n * k > 2048); // to support 2048 bit RSA
Expand Down Expand Up @@ -122,6 +126,19 @@ template EmailVerifier(maxHeadersLength, maxBodyLength, n, k, ignoreBodyHashChec
}
computedBodyHashInts[i].out === headerBodyHash[i];
}

if (removeSoftLineBreaks == 1) {
signal input decodedEmailBodyIn[maxBodyLength];
signal output decodedEmailBodyOut[maxBodyLength];
component qpEncodingChecker = RemoveSoftLineBreaks(maxBodyLength);

qpEncodingChecker.encoded <== emailBody;
qpEncodingChecker.decoded <== decodedEmailBodyIn;

qpEncodingChecker.isValid === 1;

decodedEmailBodyOut <== qpEncodingChecker.decoded;
}
}


Expand Down
121 changes: 121 additions & 0 deletions packages/circuits/helpers/remove-soft-line-breaks.circom
Original file line number Diff line number Diff line change
@@ -0,0 +1,121 @@
pragma circom 2.1.6;

include "circomlib/circuits/comparators.circom";
include "circomlib/circuits/mux1.circom";
include "../utils/hash.circom";

/// @title RemoveSoftLineBreaks
/// @notice This template verifies the removal of soft line breaks from an encoded input string
/// @dev Soft line breaks are defined as "=\r\n" sequences in the encoded input
/// @param maxLength The maximum length of the input strings
/// @input encoded An array of ASCII values representing the input string with potential soft line breaks
/// @input decoded An array of ASCII values representing the expected output after removing soft line breaks
/// @output isValid A signal that is 1 if the decoded input correctly represents the encoded input with soft line breaks removed, 0 otherwise
template RemoveSoftLineBreaks(maxLength) {
signal input encoded[maxLength];
signal input decoded[maxLength];
signal output isValid;

// Helper signals
signal r;
signal processed[maxLength];
signal isEquals[maxLength];
signal isCr[maxLength];
signal isLf[maxLength];
signal tempSoftBreak[maxLength - 2];
signal isSoftBreak[maxLength];
signal shouldZero[maxLength];
signal rEnc[maxLength];
signal sumEnc[maxLength];
signal rDec[maxLength];
signal sumDec[maxLength];

// Helper components
component muxEnc[maxLength];

// Deriving r from Poseidon hash
component rHasher = PoseidonModular(2 * maxLength);
for (var i = 0; i < maxLength; i++) {
rHasher.in[i] <== encoded[i];
}
for (var i = 0; i < maxLength; i++) {
rHasher.in[maxLength + i] <== decoded[i];
}
r <== rHasher.out;

// Check for '=' (61 in ASCII)
for (var i = 0; i < maxLength; i++) {
isEquals[i] <== IsEqual()([encoded[i], 61]);
}

// Check for '\r' (13 in ASCII)
for (var i = 0; i < maxLength - 1; i++) {
isCr[i] <== IsEqual()([encoded[i + 1], 13]);
}
isCr[maxLength - 1] <== 0;

// Check for '\n' (10 in ASCII)
for (var i = 0; i < maxLength - 2; i++) {
isLf[i] <== IsEqual()([encoded[i + 2], 10]);
}
isLf[maxLength - 2] <== 0;
isLf[maxLength - 1] <== 0;

// Identify soft line breaks
for (var i = 0; i < maxLength - 2; i++) {
tempSoftBreak[i] <== isEquals[i] * isCr[i];
isSoftBreak[i] <== tempSoftBreak[i] * isLf[i];
}
// Handle the last two characters
isSoftBreak[maxLength - 2] <== 0;
isSoftBreak[maxLength - 1] <== 0;

// Determine which characters should be zeroed
for (var i = 0; i < maxLength; i++) {
if (i == 0) {
shouldZero[i] <== isSoftBreak[i];
} else if (i == 1) {
shouldZero[i] <== isSoftBreak[i] + isSoftBreak[i-1];
} else if (i == maxLength - 1) {
shouldZero[i] <== isSoftBreak[i-1] + isSoftBreak[i-2];
} else {
shouldZero[i] <== isSoftBreak[i] + isSoftBreak[i-1] + isSoftBreak[i-2];
}
}

// Process the encoded input
for (var i = 0; i < maxLength; i++) {
processed[i] <== (1 - shouldZero[i]) * encoded[i];
}

// Calculate powers of r for encoded
rEnc[0] <== 1;
for (var i = 1; i < maxLength; i++) {
muxEnc[i] = Mux1();
muxEnc[i].c[0] <== rEnc[i - 1] * r;
muxEnc[i].c[1] <== rEnc[i - 1];
muxEnc[i].s <== shouldZero[i];
rEnc[i] <== muxEnc[i].out;
}

// Calculate powers of r for decoded
rDec[0] <== 1;
for (var i = 1; i < maxLength; i++) {
rDec[i] <== rDec[i - 1] * r;
}

// Calculate rlc for processed
sumEnc[0] <== processed[0];
for (var i = 1; i < maxLength; i++) {
sumEnc[i] <== sumEnc[i - 1] + rEnc[i] * processed[i];
}

// Calculate rlc for decoded
sumDec[0] <== decoded[0];
for (var i = 1; i < maxLength; i++) {
sumDec[i] <== sumDec[i - 1] + rDec[i] * decoded[i];
}

// Check if rlc for decoded is equal to rlc for encoded
isValid <== IsEqual()([sumEnc[maxLength - 1], sumDec[maxLength - 1]]);
}
42 changes: 42 additions & 0 deletions packages/circuits/tests/email-verifier.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -216,3 +216,45 @@ describe("EmailVerifier : Without body check", () => {
await circuit.checkConstraints(witness);
});
});

describe('EmailVerifier : With soft line breaks', () => {
jest.setTimeout(10 * 60 * 1000); // 10 minutes

let dkimResult: DKIMVerificationResult;
let circuit: any;

beforeAll(async () => {
const rawEmail = fs.readFileSync(
path.join(__dirname, './test-emails/lorem_ipsum.eml'),
'utf8'
);
dkimResult = await verifyDKIMSignature(rawEmail);

circuit = await wasm_tester(
path.join(
__dirname,
'./test-circuits/email-verifier-with-soft-line-breaks-test.circom'
),
{
recompile: true,
include: path.join(__dirname, '../../../node_modules'),
output: path.join(__dirname, "./compiled-test-circuits"),
}
);
});

it('should verify email when removeSoftLineBreaks is true', async function () {
const emailVerifierInputs = generateEmailVerifierInputsFromDKIMResult(
dkimResult,
{
maxHeadersLength: 640,
maxBodyLength: 1408,
ignoreBodyHashCheck: false,
removeSoftLineBreaks: true,
}
);

const witness = await circuit.calculateWitness(emailVerifierInputs);
await circuit.checkConstraints(witness);
});
});
Loading

0 comments on commit 0bc2538

Please sign in to comment.