Merge pull request #29 from zkemail/audit/duplicate-code

veredise audit fix: Remove duplicate code and abstract multiple occurrences of code into separate functions
zkemail · Jan 13, 2025 · d066c09 · d066c09
2 parents d294871 + ddd35f8
commit d066c09
Show file tree

Hide file tree

Showing 3 changed files with 39 additions and 49 deletions.
diff --git a/lib/src/headers/mod.nr b/lib/src/headers/mod.nr
@@ -4,7 +4,7 @@ pub mod body_hash;
 pub mod email_address;
 
 /**
- * Constrain a sequence in a header to match the specific header field
+ * Constrain a sequence in a header to be within the correct bounds
  *
  * @param MAX_HEADER_LENGTH - The maximum length of the email header
  * @param MAX_HEADER_FIELD_LENGTH - The maximum length of the header field
@@ -13,18 +13,13 @@ pub mod email_address;
  * @param header_field_sequence - The sequence of the header field
  * @param header_field_name - The name of the header field
  */
-pub fn constrain_header_field<let MAX_HEADER_LENGTH: u32, let MAX_HEADER_FIELD_LENGTH: u32, let HEADER_FIELD_NAME_LENGTH: u32>(
+fn check_header_field_bounds<let MAX_HEADER_LENGTH: u32, let MAX_HEADER_FIELD_LENGTH: u32, let HEADER_FIELD_NAME_LENGTH: u32>(
     header: BoundedVec<u8, MAX_HEADER_LENGTH>,
     header_field_sequence: Sequence,
     header_field_name: [u8; HEADER_FIELD_NAME_LENGTH],
 ) {
-    // check that the sequence is within bounds
-    assert(
-        header_field_sequence.index + header_field_sequence.length <= header.len(),
-        "Header field out of bounds",
-    );
     // check the range of the sequence is within the header (so we can use get_unchecked)
-    let end_index = header_field_sequence.index + header_field_sequence.length;
+    let end_index = header_field_sequence.end_index();
     assert(end_index <= header.len(), "Header field out of bounds of header");
 
     // if the sequence is not the start, check for a newline
@@ -38,11 +33,13 @@ pub fn constrain_header_field<let MAX_HEADER_LENGTH: u32, let MAX_HEADER_FIELD_L
             "Header field must start with CRLF",
         );
     }
+
     // if the sequence is not the end, check for a newline
     if end_index != header.len() {
         assert(header.get_unchecked(end_index) == CR, "Header field must end with CRLF");
         assert(header.get_unchecked(end_index + 1) == LF, "Header field must end with CRLF");
     }
+
     // check that the header field name matches the expected name
     for i in 0..HEADER_FIELD_NAME_LENGTH {
         assert(
@@ -54,68 +51,63 @@ pub fn constrain_header_field<let MAX_HEADER_LENGTH: u32, let MAX_HEADER_FIELD_L
         header.get_unchecked(header_field_sequence.index + HEADER_FIELD_NAME_LENGTH) == 0x3a,
         "Header field name must be followed by a colon",
     );
+}
+
+/**
+ * Constrain a sequence in a header to match the specific header field
+ *
+ * @param MAX_HEADER_LENGTH - The maximum length of the email header
+ * @param MAX_HEADER_FIELD_LENGTH - The maximum length of the header field
+ * @param HEADER_FIELD_NAME_LENGTH - The length of the header field name
+ * @param header - The email header as validated in the DKIM signature
+ * @param header_field_sequence - The sequence of the header field
+ * @param header_field_name - The name of the header field
+ */
+pub fn constrain_header_field<let MAX_HEADER_LENGTH: u32, let MAX_HEADER_FIELD_LENGTH: u32, let HEADER_FIELD_NAME_LENGTH: u32>(
+    header: BoundedVec<u8, MAX_HEADER_LENGTH>,
+    header_field_sequence: Sequence,
+    header_field_name: [u8; HEADER_FIELD_NAME_LENGTH],
+) {
+    // constrain beginning of header field
+    check_header_field_bounds::<MAX_HEADER_LENGTH, MAX_HEADER_FIELD_LENGTH, HEADER_FIELD_NAME_LENGTH>(
+        header,
+        header_field_sequence,
+        header_field_name,
+    );
+
     // check the header field is uninterrupted
     let start_index = header_field_sequence.index + HEADER_FIELD_NAME_LENGTH + 1;
     for i in 0..MAX_HEADER_FIELD_LENGTH {
         // is it safe enough to cut this constraint cost in half by not checking lf? i think so
         let index = start_index + i;
-        if (index < header_field_sequence.index + header_field_sequence.length) {
+        if (index < header_field_sequence.end_index()) {
             assert(header.get_unchecked(index) != CR, "Header field must not contain newlines");
         }
     }
 }
 
 /**
- * contrain_header_field with checks for the last occurence of "<" inside the loop to save constraints
+ * constrain_header_field with checks for the last occurence of "<" inside the loop to save constraints
  */
 pub fn constrain_header_field_detect_last_angle_bracket<let MAX_HEADER_LENGTH: u32, let MAX_HEADER_FIELD_LENGTH: u32, let HEADER_FIELD_NAME_LENGTH: u32>(
     header: BoundedVec<u8, MAX_HEADER_LENGTH>,
     header_field_sequence: Sequence,
     header_field_name: [u8; HEADER_FIELD_NAME_LENGTH],
 ) -> u32 {
-    // check that the sequence is within bounds
-    assert(
-        header_field_sequence.index + header_field_sequence.length <= header.len(),
-        "Header field out of bounds",
+    // constrain beginning of header field
+    check_header_field_bounds::<MAX_HEADER_LENGTH, MAX_HEADER_FIELD_LENGTH, HEADER_FIELD_NAME_LENGTH>(
+        header,
+        header_field_sequence,
+        header_field_name,
     );
-    // check the range of the sequence is within the header (so we can use get_unchecked)
-    let end_index = header_field_sequence.index + header_field_sequence.length;
-    assert(end_index <= header.len(), "Header field out of bounds of header");
 
-    // if the sequence is not the start, check for a newline
-    if header_field_sequence.index != 0 {
-        assert(
-            header.get_unchecked(header_field_sequence.index - 2) == CR,
-            "Header field must start with CRLF",
-        );
-        assert(
-            header.get_unchecked(header_field_sequence.index - 1) == LF,
-            "Header field must start with CRLF",
-        );
-    }
-    // if the sequence is not the end, check for a newline
-    if end_index != header.len() {
-        assert(header.get_unchecked(end_index) == CR, "Header field must end with CRLF");
-        assert(header.get_unchecked(end_index + 1) == LF, "Header field must end with CRLF");
-    }
-    // check that the header field name matches the expected name
-    for i in 0..HEADER_FIELD_NAME_LENGTH {
-        assert(
-            header.get_unchecked(header_field_sequence.index + i) == header_field_name[i],
-            "Header field name does not match",
-        );
-    }
-    assert(
-        header.get_unchecked(header_field_sequence.index + HEADER_FIELD_NAME_LENGTH) == 0x3a,
-        "Header field name must be followed by a colon",
-    );
     // check the header field is uninterrupted
     let mut last_angle_bracket = 0;
     let start_index = header_field_sequence.index + HEADER_FIELD_NAME_LENGTH + 1;
     for i in (HEADER_FIELD_NAME_LENGTH + 1)..MAX_HEADER_FIELD_LENGTH {
         // is it safe enough to cut this constraint cost in half by not checking lf? i think so
         let index = start_index + i;
-        if (index < header_field_sequence.index + header_field_sequence.length) {
+        if (index < header_field_sequence.end_index()) {
             let byte = header.get_unchecked(index);
             assert(byte != CR, "Header field must not contain newlines");
             if byte == 0x3c {

diff --git a/lib/src/lib.nr b/lib/src/lib.nr
@@ -55,9 +55,6 @@ let KEY_LIMBS: u32>(
     pubkey: RSAPubkey<KEY_LIMBS_2048>,
     signature: [Field; KEY_LIMBS_2048],
 ) {
-    // check the body and header lengths are within bounds
-    assert(header.len() <= MAX_EMAIL_HEADER_LENGTH);
-
     // ~ 86,553 constraints with 2048-bit RSA & 1024 bit max header length
     // verify the dkim signature over the header
     pubkey.verify_dkim_signature(header, signature);

diff --git a/lib/src/remove_soft_line_breaks.nr b/lib/src/remove_soft_line_breaks.nr
@@ -1,3 +1,4 @@
+use crate::{CR, LF};
 use nodash::array::pack_bytes;
 use std::hash::poseidon2::Poseidon2;
 
@@ -29,7 +30,7 @@ pub fn find_zeroes<let N: u32>(encoded: [u8; N]) -> [bool; N] {
     // identify soft line breaks
     let mut is_break: [bool; N] = [false; N];
     for i in 0..N - 2 {
-        is_break[i] = (encoded[i] == 0x3D) & (encoded[i + 1] == 0x0D) & (encoded[i + 2] == 0x0A);
+        is_break[i] = (encoded[i] == 0x3D) & (encoded[i + 1] == CR) & (encoded[i + 2] == LF);
     }
 
     // find indexes of chars to zero