From e5f1dbf36b9b89f97591fb80c8f39f61685ee4fd Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 8 Apr 2020 20:10:16 +0800
Subject: [PATCH 01/43] Added asm code generation for montgomery multiplication
 and squaring

---
 algebra-core/build.rs             | 253 ++++++++++++++++++++++++++++++
 algebra-core/src/fields/macros.rs | 103 ++++++------
 algebra-core/src/lib.rs           |   5 +-
 3 files changed, 314 insertions(+), 47 deletions(-)
 create mode 100644 algebra-core/build.rs

diff --git a/algebra-core/build.rs b/algebra-core/build.rs
new file mode 100644
index 000000000..90bca415f
--- /dev/null
+++ b/algebra-core/build.rs
@@ -0,0 +1,253 @@
+use std::env;
+use std::fs;
+use std::path::Path;
+
+const MAX_LIMBS: usize = 8;
+
+fn main() {
+    let out_dir = env::var_os("OUT_DIR").unwrap();
+    let dest_path = Path::new(&out_dir).join("assembly.rs");
+
+    let macro_string = generate_macro_string(MAX_LIMBS);
+
+    fs::write(
+        &dest_path,
+        macro_string
+    ).unwrap();
+
+    println!("cargo:rerun-if-changed=build.rs");
+}
+
+// // Different strategies for limbs <= 6, 7 <= limbs <= 12, limbs > 12?
+// fn generate_mul_add_1_asm (limbs: usize) -> String {
+//     let mut asm_string = String::from("");
+//     for i in 0..limbs {
+
+
+fn generate_macro_string (max_limbs:usize) -> std::string::String {
+    let mut macro_string = String::from(
+    "macro_rules! asm_mul {
+        ($a:expr, $b:expr, $limbs:expr, $modulus:expr, $inverse:expr) => {
+            match $limbs {
+    ");
+    for i in 2..(max_limbs+1) {
+        let mut rs = String::from("");
+        for k in 0..i {
+            rs = format!("{}{}", rs, format!("\"r{}\", ", 8+k));
+        }
+        let limb_specialisation = format!(
+    "           {} => {{
+                    unsafe {{
+                        asm!({}
+                            :
+                            : \"r\"(&mut $a),                            // $0
+                              \"r\"(&$b),                                // $1
+                              \"r\"(&$modulus),                          // $2
+                              \"i\"(0u64),                               // $3
+                              \"i\"($inverse)                            // $4
+                            : \"rcx\", \"rbx\", \"rdx\", \"rax\", {} \"cc\", \"memory\"
+                        );
+                    }}
+                }}
+
+    ", i, generate_asm_mul_string(i), rs);//ASM_STR, rs);//
+        macro_string = format!("{}{}", macro_string, limb_specialisation);
+    }
+    macro_string = format!("{}{}", macro_string,
+                        "x => panic!(\"asm_mul (no-carry): number of limbs supported is 2 up to 8. You had {}\", x)
+        };
+    }
+}");
+    macro_string = format!("{}{}", macro_string,
+    "macro_rules! asm_square {
+        ($a:expr, $limbs:expr, $modulus:expr, $inverse:expr) => {
+            match $limbs {
+    ");
+    for i in 2..(max_limbs+1) {
+        let mut rs = String::from("");
+        for k in 0..i {
+            rs = format!("{}{}", rs, format!("\"r{}\", ", 8+k));
+        }
+        let limb_specialisation = format!("
+            {} => {{
+                    unsafe {{
+                        asm!({}
+                            :
+                            : \"r\"(&mut $a),                            // $0
+                              \"r\"(&$modulus),                          // $1
+                              \"i\"(0u64),                               // $2
+                              \"i\"($inverse)                            // $3
+                            : \"rcx\", \"rbx\", \"rdx\", \"rax\", {} \"cc\", \"memory\"
+                        );
+                    }}
+                }}
+
+    ", i, generate_asm_square_string(i), rs);//ASM_STR, rs);//
+        macro_string = format!("{}{}", macro_string, limb_specialisation);
+    }
+    macro_string = format!("{}{}", macro_string,
+                        "x => panic!(\"asm_mul (no-carry): number of limbs supported is 2 up to 8. You had {}\", x)
+        };
+    }
+    }");
+    macro_string
+}
+
+fn generate_asm_square_string (limbs: usize) -> String {
+    let mut asm_string = String::from("");
+    for i in 0..limbs {
+        // First inner loop
+        if i == 0 {
+            asm_string = format!("{}{}", asm_string,"\"
+                            movq 0($0), %rdx
+                            xorq %rcx, %rcx
+                                mulxq 0($0), %r8, %r9");
+            for j in 1..limbs-1 {
+                asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($0), %rax, %r{}
+                                adcxq %rax, %r{}",
+                                j*8, 8 + ((j+1) % limbs), 8+j));
+            }
+            asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($0), %rax, %rcx
+                                mov $2, %rdx
+                                adcxq %rax, %r{}
+                                adcxq %rdx, %rcx               // %rcx is carry1",
+                                (limbs-1)*8, 8+limbs-1));
+        } else {
+            asm_string = format!("{}{}", asm_string, format!("
+                            movq {}($0), %rdx", i * 8));
+            for j in 0..limbs-1 {
+                asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($0), %rax, %rbx
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
+                                j * 8,
+                                8 + ((j+i) % limbs),
+                                8 + ((j+i+1) % limbs)));
+            }
+            asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($0), %rax, %rcx
+                                mov $2, %rdx
+                                adcxq %rax, %r{}
+                                adoxq %rdx, %rcx
+                                adcxq %rdx, %rcx",
+                                (limbs-1) * 8,
+                                8 + ((i+limbs-1) % limbs)));
+        }
+        // Second inner loop
+        asm_string = format!("{}{}", asm_string, format!("
+                            movq $3, %rdx
+                            mulxq %r{}, %rdx, %rax            // wrapping_mul", 8+i));
+        asm_string = format!("{}{}", asm_string, format!("
+                                mulxq 0($1), %rax, %rbx
+                                adcxq %r{}, %rax              // put junk in rax
+                                adoxq %rbx, %r{}",
+                                8 + (i % limbs),
+                                8 + ((i+1) % limbs)));
+        for j in 1..limbs-1 {
+            asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($1), %rax, %rbx
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
+                                j * 8,
+                                8 + ((j+i) % limbs),
+                                8 + ((j+i+1) % limbs)));
+        }
+        asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($1), %rax, %r{2}
+                                mov $2, %rdx
+                                adcxq %rax, %r{}
+                                adoxq %rcx, %r{2}
+                                adcxq %rdx, %r{2}",
+                                (limbs-1)*8,
+                                8 + ((i+limbs-1) % limbs),
+                                8 + ((i) % limbs)));
+    }
+    for i in 0..limbs {
+        asm_string = format!("{}{}", asm_string, format!("
+                            movq %r{}, {}($0)", 8+(i % limbs), i*8));
+    }
+    format!("{}{}",asm_string, "\"")
+}
+
+// For now, generated code only works for up to  8/10 limbss
+// In the future, we can try to implement data movement to and from an address
+// for higher number of limbs
+fn generate_asm_mul_string (limbs: usize) -> String {
+    let mut asm_string = String::from("");
+    for i in 0..limbs {
+        // First inner loop
+        if i == 0 {
+            asm_string = format!("{}{}", asm_string,"\"
+                            movq 0($0), %rdx
+                            xorq %rcx, %rcx
+                                mulxq 0($1), %r8, %r9");
+            for j in 1..limbs-1 {
+                asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($1), %rax, %r{}
+                                adcxq %rax, %r{}",
+                                j*8, 8 + ((j+1) % limbs), 8+j));
+            }
+            asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($1), %rax, %rcx
+                                mov $3, %rdx
+                                adcxq %rax, %r{}
+                                adcxq %rdx, %rcx               // %rcx is carry1",
+                                (limbs-1)*8, 8+limbs-1));
+        } else {
+            asm_string = format!("{}{}", asm_string, format!("
+                            movq {}($0), %rdx", i * 8));
+            for j in 0..limbs-1 {
+                asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($1), %rax, %rbx
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
+                                j * 8,
+                                8 + ((j+i) % limbs),
+                                8 + ((j+i+1) % limbs)));
+            }
+            asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($1), %rax, %rcx
+                                mov $3, %rdx
+                                adcxq %rax, %r{}
+                                adoxq %rdx, %rcx
+                                adcxq %rdx, %rcx",
+                                (limbs-1) * 8,
+                                8 + ((i+limbs-1) % limbs)));
+        }
+        // Second inner loop
+        asm_string = format!("{}{}", asm_string, format!("
+                            movq $4, %rdx
+                            mulxq %r{}, %rdx, %rax            // wrapping_mul", 8+i));
+        asm_string = format!("{}{}", asm_string, format!("
+                                mulxq 0($2), %rax, %rbx
+                                adcxq %r{}, %rax              // put junk in rax
+                                adoxq %rbx, %r{}",
+                                8 + (i % limbs),
+                                8 + ((i+1) % limbs)));
+        for j in 1..limbs-1 {
+            asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($2), %rax, %rbx
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
+                                j * 8,
+                                8 + ((j+i) % limbs),
+                                8 + ((j+i+1) % limbs)));
+        }
+        asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($2), %rax, %r{2}
+                                mov $3, %rdx
+                                adcxq %rax, %r{}
+                                adoxq %rcx, %r{2}
+                                adcxq %rdx, %r{2}",
+                                (limbs-1)*8,
+                                8 + ((i+limbs-1) % limbs),
+                                8 + ((i) % limbs)));
+    }
+    for i in 0..limbs {
+        asm_string = format!("{}{}", asm_string, format!("
+                            movq %r{}, {}($0)", 8+(i % limbs), i*8));
+    }
+    format!("{}{}",asm_string, "\"")
+}
diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index cdcacdca9..d382c8c56 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -1,6 +1,7 @@
 macro_rules! impl_Fp {
     ($Fp:ident, $FpParameters:ident, $limbs:expr) => {
         pub trait $FpParameters: FpParameters<BigInt = BigInteger> {}
+        include!(concat!(env!("OUT_DIR"), "/assembly.rs"));
 
         #[derive(Derivative)]
         #[derivative(
@@ -451,24 +452,30 @@ macro_rules! impl_field_mul_assign {
             }
             let no_carry:bool = !(first_bit_set || all_bits_set);
 
+
             // No-carry optimisation applied to CIOS
             if no_carry {
-                let mut r = [0u64; $limbs];
-                let mut carry1 = 0u64;
-                let mut carry2 = 0u64;
-
-                for i in 0..$limbs {
-                    r[0] = fa::mac(r[0], (self.0).0[0], (other.0).0[i], &mut carry1);
-                    let k = r[0].wrapping_mul(P::INV);
-                    fa::mac_discard(r[0], k, P::MODULUS.0[0], &mut carry2);
-                    for j in 1..$limbs {
-                        r[j] = fa::mac_with_carry(r[j], (self.0).0[j], (other.0).0[i], &mut carry1);
-                        r[j - 1] = fa::mac_with_carry(r[j], k, P::MODULUS.0[j], &mut carry2);
+                if $limbs <= 8 {//== 4 {//
+                    asm_mul!((self.0).0, (other.0).0, $limbs, P::MODULUS.0, P::INV);
+                    self.reduce();
+                } else {
+                    let mut r = [0u64; $limbs];
+                    let mut carry1 = 0u64;
+                    let mut carry2 = 0u64;
+
+                    for i in 0..$limbs {
+                        r[0] = fa::mac(r[0], (self.0).0[0], (other.0).0[i], &mut carry1);
+                        let k = r[0].wrapping_mul(P::INV);
+                        fa::mac_discard(r[0], k, P::MODULUS.0[0], &mut carry2);
+                        for j in 1..$limbs {
+                            r[j] = fa::mac_with_carry(r[j], (self.0).0[j], (other.0).0[i], &mut carry1);
+                            r[j - 1] = fa::mac_with_carry(r[j], k, P::MODULUS.0[j], &mut carry2);
+                        }
+                        r[$limbs - 1] = carry1 + carry2;
                     }
-                    r[$limbs - 1] = carry1 + carry2;
+                    (self.0).0 = r;
+                    self.reduce();
                 }
-                (self.0).0 = r;
-                self.reduce();
             // Alternative implementation
             } else {
                 let mut r = [0u64; $limbs * 2];
@@ -528,44 +535,50 @@ macro_rules! impl_field_square_in_place {
         #[inline]
         #[unroll_for_loops]
         fn square_in_place(&mut self) -> &mut Self {
-            let mut r = [0u64; $limbs*2];
+            if $limbs <= 8 {
+                asm_square!((self.0).0, $limbs, P::MODULUS.0, P::INV);
+                self.reduce();
+                self
+            } else {
+                let mut r = [0u64; $limbs*2];
 
-            let mut carry = 0;
-            for i in 0..$limbs {
-                if i < $limbs-1 {
-                    for j in 0..$limbs {
-                        if j >= (i+1) { r[i+j] = fa::mac_with_carry(r[i+j], (self.0).0[i], (self.0).0[j], &mut carry); }
+                let mut carry = 0;
+                for i in 0..$limbs {
+                    if i < $limbs-1 {
+                        for j in 0..$limbs {
+                            if j >= (i+1) { r[i+j] = fa::mac_with_carry(r[i+j], (self.0).0[i], (self.0).0[j], &mut carry); }
+                        }
+                        r[$limbs+i] = carry;
+                        carry = 0;
                     }
-                    r[$limbs+i] = carry;
-                    carry = 0;
                 }
-            }
 
-            r[$limbs*2-1] = r[$limbs*2-2] >> 63;
-            for i in 0..$limbs { r[$limbs*2-2-i] = (r[$limbs*2-2-i] << 1) | (r[$limbs*2-3-i] >> 63); }
-            for i in 3..$limbs { r[$limbs+1-i] = (r[$limbs+1-i] << 1) | (r[$limbs-i] >> 63); }
-            r[1] = r[1] << 1;
+                r[$limbs*2-1] = r[$limbs*2-2] >> 63;
+                for i in 0..$limbs { r[$limbs*2-2-i] = (r[$limbs*2-2-i] << 1) | (r[$limbs*2-3-i] >> 63); }
+                for i in 3..$limbs { r[$limbs+1-i] = (r[$limbs+1-i] << 1) | (r[$limbs-i] >> 63); }
+                r[1] = r[1] << 1;
 
-            for i in 0..$limbs {
-                r[2*i] = fa::mac_with_carry(r[2*i], (self.0).0[i], (self.0).0[i], &mut carry);
-                r[2*i+1] = fa::adc(r[2*i+1], 0, &mut carry);
-            }
-            // Montgomery reduction
-            let mut _carry2 = 0;
-            for i in 0..$limbs {
-                let k = r[i].wrapping_mul(P::INV);
-                let mut carry = 0;
-                fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
-                for j in 1..$limbs {
-                    r[j+i] = fa::mac_with_carry(r[j+i], k, P::MODULUS.0[j], &mut carry);
+                for i in 0..$limbs {
+                    r[2*i] = fa::mac_with_carry(r[2*i], (self.0).0[i], (self.0).0[i], &mut carry);
+                    r[2*i+1] = fa::adc(r[2*i+1], 0, &mut carry);
+                }
+                // Montgomery reduction
+                let mut _carry2 = 0;
+                for i in 0..$limbs {
+                    let k = r[i].wrapping_mul(P::INV);
+                    let mut carry = 0;
+                    fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
+                    for j in 1..$limbs {
+                        r[j+i] = fa::mac_with_carry(r[j+i], k, P::MODULUS.0[j], &mut carry);
+                    }
+                    r[$limbs+i] = fa::adc(r[$limbs+i], _carry2, &mut carry);
+                    _carry2 = carry;
                 }
-                r[$limbs+i] = fa::adc(r[$limbs+i], _carry2, &mut carry);
-                _carry2 = carry;
-            }
 
-            (self.0).0.copy_from_slice(&r[$limbs..]);
-            self.reduce();
-            self
+                (self.0).0.copy_from_slice(&r[$limbs..]);
+                self.reduce();
+                self
+            }
         }
     }
 }
diff --git a/algebra-core/src/lib.rs b/algebra-core/src/lib.rs
index 612300f03..5971dbb58 100644
--- a/algebra-core/src/lib.rs
+++ b/algebra-core/src/lib.rs
@@ -4,8 +4,9 @@
 #![deny(non_shorthand_field_patterns, unused_attributes, unused_imports)]
 #![deny(unused_extern_crates, renamed_and_removed_lints, unused_allocation)]
 #![deny(unused_comparisons, bare_trait_objects, const_err, unused_must_use)]
-#![deny(unused_mut, unused_unsafe, private_in_public, unsafe_code)]
-#![forbid(unsafe_code)]
+#![deny(unused_mut, unused_unsafe, private_in_public)]//, unsafe_code)]
+// #![forbid(unsafe_code)]
+#![feature(asm)]
 
 #[cfg(all(test, not(feature = "std")))]
 #[macro_use]

From cc3d14f51c7b9aad8912f85094bfd08330b4a132 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Fri, 10 Apr 2020 15:04:12 +0800
Subject: [PATCH 02/43] add data movement

---
 algebra-core/Cargo.toml                |   4 +
 algebra-core/build.rs                  | 244 +------------------------
 algebra-core/field-assembly/Cargo.toml |   9 +
 algebra-core/field-assembly/src/lib.rs | 170 +++++++++++++++++
 algebra-core/src/fields/macros.rs      |  10 +-
 5 files changed, 193 insertions(+), 244 deletions(-)
 create mode 100644 algebra-core/field-assembly/Cargo.toml
 create mode 100644 algebra-core/field-assembly/src/lib.rs

diff --git a/algebra-core/Cargo.toml b/algebra-core/Cargo.toml
index 335144dff..8c303bd1e 100644
--- a/algebra-core/Cargo.toml
+++ b/algebra-core/Cargo.toml
@@ -18,6 +18,7 @@ categories = ["cryptography"]
 include = ["Cargo.toml", "src", "README.md", "LICENSE-APACHE", "LICENSE-MIT"]
 license = "MIT/Apache-2.0"
 edition = "2018"
+build = "build.rs"
 
 ################################# Dependencies ################################
 
@@ -29,6 +30,9 @@ rand = { version = "0.7", default-features = false }
 rayon = { version = "1", optional = true }
 unroll = "0.1.4"
 
+[build-dependencies]
+field-assembly = { path = "./field-assembly" }
+
 [dev-dependencies]
 rand_xorshift = "0.2"
 
diff --git a/algebra-core/build.rs b/algebra-core/build.rs
index 90bca415f..9c663ad07 100644
--- a/algebra-core/build.rs
+++ b/algebra-core/build.rs
@@ -2,252 +2,18 @@ use std::env;
 use std::fs;
 use std::path::Path;
 
-const MAX_LIMBS: usize = 8;
+use field_assembly::generate_macro_string;
+
+const NUM_LIMBS: usize = 16;
 
 fn main() {
     let out_dir = env::var_os("OUT_DIR").unwrap();
-    let dest_path = Path::new(&out_dir).join("assembly.rs");
-
-    let macro_string = generate_macro_string(MAX_LIMBS);
+    let dest_path = Path::new(&out_dir).join("field_assembly.rs");
 
     fs::write(
         &dest_path,
-        macro_string
+        generate_macro_string(NUM_LIMBS)
     ).unwrap();
 
     println!("cargo:rerun-if-changed=build.rs");
 }
-
-// // Different strategies for limbs <= 6, 7 <= limbs <= 12, limbs > 12?
-// fn generate_mul_add_1_asm (limbs: usize) -> String {
-//     let mut asm_string = String::from("");
-//     for i in 0..limbs {
-
-
-fn generate_macro_string (max_limbs:usize) -> std::string::String {
-    let mut macro_string = String::from(
-    "macro_rules! asm_mul {
-        ($a:expr, $b:expr, $limbs:expr, $modulus:expr, $inverse:expr) => {
-            match $limbs {
-    ");
-    for i in 2..(max_limbs+1) {
-        let mut rs = String::from("");
-        for k in 0..i {
-            rs = format!("{}{}", rs, format!("\"r{}\", ", 8+k));
-        }
-        let limb_specialisation = format!(
-    "           {} => {{
-                    unsafe {{
-                        asm!({}
-                            :
-                            : \"r\"(&mut $a),                            // $0
-                              \"r\"(&$b),                                // $1
-                              \"r\"(&$modulus),                          // $2
-                              \"i\"(0u64),                               // $3
-                              \"i\"($inverse)                            // $4
-                            : \"rcx\", \"rbx\", \"rdx\", \"rax\", {} \"cc\", \"memory\"
-                        );
-                    }}
-                }}
-
-    ", i, generate_asm_mul_string(i), rs);//ASM_STR, rs);//
-        macro_string = format!("{}{}", macro_string, limb_specialisation);
-    }
-    macro_string = format!("{}{}", macro_string,
-                        "x => panic!(\"asm_mul (no-carry): number of limbs supported is 2 up to 8. You had {}\", x)
-        };
-    }
-}");
-    macro_string = format!("{}{}", macro_string,
-    "macro_rules! asm_square {
-        ($a:expr, $limbs:expr, $modulus:expr, $inverse:expr) => {
-            match $limbs {
-    ");
-    for i in 2..(max_limbs+1) {
-        let mut rs = String::from("");
-        for k in 0..i {
-            rs = format!("{}{}", rs, format!("\"r{}\", ", 8+k));
-        }
-        let limb_specialisation = format!("
-            {} => {{
-                    unsafe {{
-                        asm!({}
-                            :
-                            : \"r\"(&mut $a),                            // $0
-                              \"r\"(&$modulus),                          // $1
-                              \"i\"(0u64),                               // $2
-                              \"i\"($inverse)                            // $3
-                            : \"rcx\", \"rbx\", \"rdx\", \"rax\", {} \"cc\", \"memory\"
-                        );
-                    }}
-                }}
-
-    ", i, generate_asm_square_string(i), rs);//ASM_STR, rs);//
-        macro_string = format!("{}{}", macro_string, limb_specialisation);
-    }
-    macro_string = format!("{}{}", macro_string,
-                        "x => panic!(\"asm_mul (no-carry): number of limbs supported is 2 up to 8. You had {}\", x)
-        };
-    }
-    }");
-    macro_string
-}
-
-fn generate_asm_square_string (limbs: usize) -> String {
-    let mut asm_string = String::from("");
-    for i in 0..limbs {
-        // First inner loop
-        if i == 0 {
-            asm_string = format!("{}{}", asm_string,"\"
-                            movq 0($0), %rdx
-                            xorq %rcx, %rcx
-                                mulxq 0($0), %r8, %r9");
-            for j in 1..limbs-1 {
-                asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($0), %rax, %r{}
-                                adcxq %rax, %r{}",
-                                j*8, 8 + ((j+1) % limbs), 8+j));
-            }
-            asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($0), %rax, %rcx
-                                mov $2, %rdx
-                                adcxq %rax, %r{}
-                                adcxq %rdx, %rcx               // %rcx is carry1",
-                                (limbs-1)*8, 8+limbs-1));
-        } else {
-            asm_string = format!("{}{}", asm_string, format!("
-                            movq {}($0), %rdx", i * 8));
-            for j in 0..limbs-1 {
-                asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($0), %rax, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rbx, %r{}",
-                                j * 8,
-                                8 + ((j+i) % limbs),
-                                8 + ((j+i+1) % limbs)));
-            }
-            asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($0), %rax, %rcx
-                                mov $2, %rdx
-                                adcxq %rax, %r{}
-                                adoxq %rdx, %rcx
-                                adcxq %rdx, %rcx",
-                                (limbs-1) * 8,
-                                8 + ((i+limbs-1) % limbs)));
-        }
-        // Second inner loop
-        asm_string = format!("{}{}", asm_string, format!("
-                            movq $3, %rdx
-                            mulxq %r{}, %rdx, %rax            // wrapping_mul", 8+i));
-        asm_string = format!("{}{}", asm_string, format!("
-                                mulxq 0($1), %rax, %rbx
-                                adcxq %r{}, %rax              // put junk in rax
-                                adoxq %rbx, %r{}",
-                                8 + (i % limbs),
-                                8 + ((i+1) % limbs)));
-        for j in 1..limbs-1 {
-            asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rbx, %r{}",
-                                j * 8,
-                                8 + ((j+i) % limbs),
-                                8 + ((j+i+1) % limbs)));
-        }
-        asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, %r{2}
-                                mov $2, %rdx
-                                adcxq %rax, %r{}
-                                adoxq %rcx, %r{2}
-                                adcxq %rdx, %r{2}",
-                                (limbs-1)*8,
-                                8 + ((i+limbs-1) % limbs),
-                                8 + ((i) % limbs)));
-    }
-    for i in 0..limbs {
-        asm_string = format!("{}{}", asm_string, format!("
-                            movq %r{}, {}($0)", 8+(i % limbs), i*8));
-    }
-    format!("{}{}",asm_string, "\"")
-}
-
-// For now, generated code only works for up to  8/10 limbss
-// In the future, we can try to implement data movement to and from an address
-// for higher number of limbs
-fn generate_asm_mul_string (limbs: usize) -> String {
-    let mut asm_string = String::from("");
-    for i in 0..limbs {
-        // First inner loop
-        if i == 0 {
-            asm_string = format!("{}{}", asm_string,"\"
-                            movq 0($0), %rdx
-                            xorq %rcx, %rcx
-                                mulxq 0($1), %r8, %r9");
-            for j in 1..limbs-1 {
-                asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, %r{}
-                                adcxq %rax, %r{}",
-                                j*8, 8 + ((j+1) % limbs), 8+j));
-            }
-            asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, %rcx
-                                mov $3, %rdx
-                                adcxq %rax, %r{}
-                                adcxq %rdx, %rcx               // %rcx is carry1",
-                                (limbs-1)*8, 8+limbs-1));
-        } else {
-            asm_string = format!("{}{}", asm_string, format!("
-                            movq {}($0), %rdx", i * 8));
-            for j in 0..limbs-1 {
-                asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rbx, %r{}",
-                                j * 8,
-                                8 + ((j+i) % limbs),
-                                8 + ((j+i+1) % limbs)));
-            }
-            asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, %rcx
-                                mov $3, %rdx
-                                adcxq %rax, %r{}
-                                adoxq %rdx, %rcx
-                                adcxq %rdx, %rcx",
-                                (limbs-1) * 8,
-                                8 + ((i+limbs-1) % limbs)));
-        }
-        // Second inner loop
-        asm_string = format!("{}{}", asm_string, format!("
-                            movq $4, %rdx
-                            mulxq %r{}, %rdx, %rax            // wrapping_mul", 8+i));
-        asm_string = format!("{}{}", asm_string, format!("
-                                mulxq 0($2), %rax, %rbx
-                                adcxq %r{}, %rax              // put junk in rax
-                                adoxq %rbx, %r{}",
-                                8 + (i % limbs),
-                                8 + ((i+1) % limbs)));
-        for j in 1..limbs-1 {
-            asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($2), %rax, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rbx, %r{}",
-                                j * 8,
-                                8 + ((j+i) % limbs),
-                                8 + ((j+i+1) % limbs)));
-        }
-        asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($2), %rax, %r{2}
-                                mov $3, %rdx
-                                adcxq %rax, %r{}
-                                adoxq %rcx, %r{2}
-                                adcxq %rdx, %r{2}",
-                                (limbs-1)*8,
-                                8 + ((i+limbs-1) % limbs),
-                                8 + ((i) % limbs)));
-    }
-    for i in 0..limbs {
-        asm_string = format!("{}{}", asm_string, format!("
-                            movq %r{}, {}($0)", 8+(i % limbs), i*8));
-    }
-    format!("{}{}",asm_string, "\"")
-}
diff --git a/algebra-core/field-assembly/Cargo.toml b/algebra-core/field-assembly/Cargo.toml
new file mode 100644
index 000000000..c8879c640
--- /dev/null
+++ b/algebra-core/field-assembly/Cargo.toml
@@ -0,0 +1,9 @@
+[package]
+name = "field-assembly"
+version = "0.1.0"
+authors = ["jon-chuang <9093549+jon-chuang@users.noreply.github.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
new file mode 100644
index 000000000..bbe29462d
--- /dev/null
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -0,0 +1,170 @@
+extern crate std;
+
+const MAX_REGS: usize = 8;
+
+pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
+    let mut macro_string = String::from(
+    "macro_rules! asm_mul {
+        ($limbs:expr, $a:expr, $b:expr, $modulus:expr, $inverse:expr) => {
+            match $limbs {");
+    macro_string = generate_matches(num_limbs, macro_string, true);
+
+    macro_string = format!("{}{}", macro_string,
+    "macro_rules! asm_square {
+        ($limbs:expr, $a:expr, $modulus:expr, $inverse:expr) => {
+            match $limbs {");
+    macro_string = generate_matches(num_limbs, macro_string, false);
+    macro_string
+}
+
+fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -> String {
+    for i in 2..(num_limbs+1) {
+        let mut rs_clobber = String::from("");
+        let (mut b_declare, mut regs_declare, mut b, mut regs) = ("                   // $3", String::from(""), "$0", "");
+
+        // logic to format macro based on how many limbs there are, whether it is a mul
+        for k in 0..i { rs_clobber = format!("{}{}", rs_clobber, format!("\"r{}\", ", 8+k)); }
+        let mut limb_specialisation = format!("
+                {} => {{", i);
+        if is_mul {
+            b_declare = ",                  // $3
+                              \"r\"(&$b)";
+            b = "$4";
+            regs_declare = String::from("                        // $4");
+        }
+        if i > MAX_REGS {
+            let extra_reg = if i <= 2*MAX_REGS { 2*(i-MAX_REGS) } else { i };
+            limb_specialisation = format!("{}{}", limb_specialisation, format!("
+                    let mut regs = [0u64; {}];", extra_reg));
+            if is_mul { regs = "$5"; } else { regs = "$4";}
+            regs_declare = format!(",                       // ${}
+                              \"r\"(&mut regs)                  // {}", 3+(is_mul as usize), regs);
+        }
+
+        // Actual asm declaration
+        limb_specialisation = format!("{}{}", limb_specialisation, format!("
+                    unsafe {{
+                        asm!({asm_string}
+                            :
+                            : \"r\"(&mut $a),                   // $0
+                              \"r\"(&$modulus),                 // $1
+                              \"i\"(0u64),                      // $2
+                              \"i\"($inverse){b_declare}{regs_declare}
+                            : \"rcx\", \"rbx\", \"rdx\", \"rax\", {rs_clobber}\"cc\", \"memory\"
+                        );
+                    }}
+                }}",
+                asm_string = generate_asm_mul_string(i, "$0", b, regs),
+                rs_clobber=rs_clobber,
+                b_declare=b_declare,
+                regs_declare=regs_declare));
+        macro_string = format!("{}{}", macro_string, limb_specialisation);
+    }
+    macro_string = format!("{}{}", macro_string, format!("
+            x => panic!(\"asm_mul (no-carry): number of limbs supported is 2 up to {}. You had {{}}.\", x)
+        }};
+    }}
+}}
+
+", num_limbs));
+    macro_string
+}
+
+fn generate_asm_mul_string (limbs: usize, a: &str, b: &str, regs: &str) -> String {
+    let extra_reg = if limbs <= MAX_REGS { 0 } else { limbs - MAX_REGS };
+    let reg_max = std::cmp::min(limbs, MAX_REGS);
+    let block_size = if limbs <= MAX_REGS { 0 } else if limbs <= 2*MAX_REGS { limbs-MAX_REGS } else { MAX_REGS };
+    let n_spill_blocks = 1 + limbs / MAX_REGS;
+
+    let mut asm_string = String::from("");
+    let mut store = "";
+
+    for i in 0..limbs {
+        // First inner loop
+        if i == 0 {
+            asm_string = format!("{}{}", asm_string,format!("\"
+                            movq 0({a}), %rdx
+                            xorq %rcx, %rcx
+                                mulxq 0({b}), %r8, %r9",
+                                a=a, b=b));
+            for j in 1..limbs-1 {
+                asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}({b}), %rax, %r{}
+                                adcxq %rax, %r{}",
+                                j*8, 8 + ((j+1) % limbs), 8+j, b=b));
+            }
+            asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}({b}), %rax, %rcx
+                                mov $2, %rbx
+                                adcxq %rax, %r{}
+                                adcxq %rbx, %rcx               // %rcx is carry1",
+                                (limbs-1)*8, 8+limbs-1, b=b));
+        } else {
+            asm_string = format!("{}{}", asm_string, format!("
+                            movq {}($0), %rdx", i * 8));
+            for j in 0..limbs-1 {
+                let index_lo = (j+i) % reg_max;
+                let index_hi = (j+i+1) % reg_max;
+                asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}({b}), %rax, %rbx
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
+                                j * 8, 8 + index_lo, 8 + index_hi, b=b));
+                // Store the lower word if register spills
+                if index_lo < extra_reg {
+                    let reg_index = (j+i) % limbs;
+                    let block_index = reg_index / MAX_REGS;
+                    asm_string = format!("{}{}", asm_string, format!("
+                                mov %r{r}, {}({regs})
+                                mov {}({regs}), %r{r}",
+                                8 * (block_index*block_size + index_lo),
+                                8 * (((block_index+block_size) % n_spill_blocks) + index_lo),
+                                r = 8 + index_lo, regs=regs));
+                }
+            }
+            let index_lo = (i+limbs-1) % reg_max;
+            asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}({b}), %rax, %rcx
+                                mov $2, %rbx
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %rcx
+                                adcxq %rbx, %rcx",
+                                (limbs-1) * 8,
+                                8 + index_lo,
+                                b=b));
+        }
+        // Second inner loop
+        asm_string = format!("{}{}", asm_string, format!("
+                            movq $3, %rdx
+                            mulxq %r{}, %rdx, %rax            // wrapping_mul", 8+i));
+        asm_string = format!("{}{}", asm_string, format!("
+                                mulxq 0($1), %rax, %rbx
+                                adcxq %r{}, %rax              // put junk in rax
+                                adoxq %rbx, %r{}",
+                                8 + (i % limbs),
+                                8 + ((i+1) % limbs)));
+        for j in 1..limbs-1 {
+            asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($1), %rax, %rbx
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
+                                j * 8,
+                                8 + ((j+i) % limbs),
+                                8 + ((j+i+1) % limbs)));
+        }
+        asm_string = format!("{}{}", asm_string, format!("
+                                mulxq {}($1), %rax, %r{2}
+                                mov $2, %rbx
+                                adcxq %rax, %r{}
+                                adoxq %rcx, %r{2}
+                                adcxq %rbx, %r{2}",
+                                (limbs-1)*8,
+                                8 + ((i+limbs-1) % limbs),
+                                8 + ((i) % limbs)));
+    }
+    for i in 0..limbs {
+        asm_string = format!("{}{}", asm_string, format!("
+                            movq %r{}, {}($0)", 8+(i % limbs), i*8));
+    }
+    format!("{}{}",asm_string, "\"")
+}
diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index 2aa76f123..5e2dc9c01 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -1,7 +1,7 @@
 macro_rules! impl_Fp {
     ($Fp:ident, $FpParameters:ident, $limbs:expr) => {
         pub trait $FpParameters: FpParameters<BigInt = BigInteger> {}
-        include!(concat!(env!("OUT_DIR"), "/assembly.rs"));
+        include!(concat!(env!("OUT_DIR"), "/field_assembly.rs"));
 
         #[derive(Derivative)]
         #[derivative(
@@ -438,6 +438,7 @@ macro_rules! impl_Fp {
 /// [here](https://hackmd.io/@zkteam/modular_multiplication) if
 /// `P::MODULUS` has (a) a non-zero MSB, and (b) at least one
 /// zero bit in the rest of the modulus.
+
 macro_rules! impl_field_mul_assign {
     ($limbs:expr) => {
         #[inline]
@@ -451,11 +452,10 @@ macro_rules! impl_field_mul_assign {
             }
             let no_carry: bool = !(first_bit_set || all_bits_set);
 
-
             // No-carry optimisation applied to CIOS
             if no_carry {
-                if $limbs <= 8 {//== 4 {//
-                    asm_mul!((self.0).0, (other.0).0, $limbs, P::MODULUS.0, P::INV);
+                if $limbs <= 8 {//== 4 {//true {//
+                    asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
                     self.reduce();
                 } else {
                     let mut r = [0u64; $limbs];
@@ -537,7 +537,7 @@ macro_rules! impl_field_square_in_place {
         #[unroll_for_loops]
         fn square_in_place(&mut self) -> &mut Self {
             if $limbs <= 8 {
-                asm_square!((self.0).0, $limbs, P::MODULUS.0, P::INV);
+                asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
                 self.reduce();
                 self
             } else {

From 53e78e7b99e207c9974179d5f22f8588049b86c1 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Fri, 10 Apr 2020 18:51:37 +0800
Subject: [PATCH 03/43] Failed attempt at data movement through swaps and
 solving corner cases

---
 algebra-core/field-assembly/src/lib.rs | 236 +++++++++++++++++++------
 1 file changed, 180 insertions(+), 56 deletions(-)

diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index bbe29462d..b667b5ea1 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -1,8 +1,12 @@
 extern crate std;
 
-const MAX_REGS: usize = 8;
+const MAX_REGS: usize = 5;
 
+// Only works for up to
 pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
+    if (num_limbs > 2 * MAX_REGS) || (MAX_REGS < 4) {
+        panic!("Number of limbs must be <= {} and MAX_REGS >= 4", 2*MAX_REGS);
+    }
     let mut macro_string = String::from(
     "macro_rules! asm_mul {
         ($limbs:expr, $a:expr, $b:expr, $modulus:expr, $inverse:expr) => {
@@ -19,13 +23,12 @@ pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
 
 fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -> String {
     for i in 2..(num_limbs+1) {
-        let mut rs_clobber = String::from("");
-        let (mut b_declare, mut regs_declare, mut b, mut regs) = ("                   // $3", String::from(""), "$0", "");
-
-        // logic to format macro based on how many limbs there are, whether it is a mul
-        for k in 0..i { rs_clobber = format!("{}{}", rs_clobber, format!("\"r{}\", ", 8+k)); }
         let mut limb_specialisation = format!("
                 {} => {{", i);
+        // logic to format macro based on how many limbs there are, whether it is a mul
+        let (mut b_declare, mut regs_declare, mut b, mut regs) = ("                   // $3", String::from(""), "$0", "");
+        let mut rs_clobber = String::from("");
+        for k in 0..i { rs_clobber = format!("{}{}", rs_clobber, format!("\"r{}\", ", 8+k)); }
         if is_mul {
             b_declare = ",                  // $3
                               \"r\"(&$b)";
@@ -70,14 +73,9 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
     macro_string
 }
 
-fn generate_asm_mul_string (limbs: usize, a: &str, b: &str, regs: &str) -> String {
-    let extra_reg = if limbs <= MAX_REGS { 0 } else { limbs - MAX_REGS };
-    let reg_max = std::cmp::min(limbs, MAX_REGS);
-    let block_size = if limbs <= MAX_REGS { 0 } else if limbs <= 2*MAX_REGS { limbs-MAX_REGS } else { MAX_REGS };
-    let n_spill_blocks = 1 + limbs / MAX_REGS;
-
+fn generate_asm_mul_string (limbs: usize, a: &str, b: &str, spill: &str) -> String {
     let mut asm_string = String::from("");
-    let mut store = "";
+    let spilled = std::collections::HashMap::new();
 
     for i in 0..limbs {
         // First inner loop
@@ -87,84 +85,210 @@ fn generate_asm_mul_string (limbs: usize, a: &str, b: &str, regs: &str) -> Strin
                             xorq %rcx, %rcx
                                 mulxq 0({b}), %r8, %r9",
                                 a=a, b=b));
+            if is_spill(limbs, 0) {
+                asm_string = spill_swap(asm_string, 0, limbs, spill);
+                spilled.insert("%r8", 0);
+            }
+
             for j in 1..limbs-1 {
+
                 asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}({b}), %rax, %r{}
-                                adcxq %rax, %r{}",
-                                j*8, 8 + ((j+1) % limbs), 8+j, b=b));
+                                mulxq {}({b}), %rax, {}
+                                adcxq %rax, {}",
+                                j*8, reg_from_index(limbs, (j+1) % limbs),
+                                reg_from_index(limbs, j), b=b));
+                if is_spill(limbs, j) {
+                    asm_string = spill_swap(asm_string, j, limbs, spill);
+                    spilled.insert(&reg_from_index(limbs, (j+1) % limbs), j);
+                }
             }
             asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}({b}), %rax, %rcx
                                 mov $2, %rbx
-                                adcxq %rax, %r{}
+                                adcxq %rax, {}
                                 adcxq %rbx, %rcx               // %rcx is carry1",
-                                (limbs-1)*8, 8+limbs-1, b=b));
+                                (limbs-1)*8, reg_from_index(limbs, limbs-1), b=b));
+            if is_spill(limbs, limbs-1) {
+                asm_string = spill_swap(asm_string, limbs-1, limbs, spill);
+                spilled.insert(&reg_from_index(limbs, limbs-1), limbs-1);
+            }
         } else {
             asm_string = format!("{}{}", asm_string, format!("
                             movq {}($0), %rdx", i * 8));
             for j in 0..limbs-1 {
-                let index_lo = (j+i) % reg_max;
-                let index_hi = (j+i+1) % reg_max;
                 asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}({b}), %rax, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rbx, %r{}",
-                                j * 8, 8 + index_lo, 8 + index_hi, b=b));
-                // Store the lower word if register spills
-                if index_lo < extra_reg {
-                    let reg_index = (j+i) % limbs;
-                    let block_index = reg_index / MAX_REGS;
-                    asm_string = format!("{}{}", asm_string, format!("
-                                mov %r{r}, {}({regs})
-                                mov {}({regs}), %r{r}",
-                                8 * (block_index*block_size + index_lo),
-                                8 * (((block_index+block_size) % n_spill_blocks) + index_lo),
-                                r = 8 + index_lo, regs=regs));
+                                adcxq %rax, {}
+                                adoxq %rbx, {}",
+                                j * 8, reg_from_index(limbs, (j+i) % limbs), reg_from_index(limbs, (j+i+1) % limbs), b=b));
+                if is_spill(limbs, (j+i) % limbs) {
+                    asm_string = spill_swap(asm_string, (j+i) % limbs, limbs, spill);
+                    spilled.insert(&reg_from_index(limbs, (j+i) % limbs), (j+i) % limbs);
                 }
             }
-            let index_lo = (i+limbs-1) % reg_max;
             asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}({b}), %rax, %rcx
                                 mov $2, %rbx
-                                adcxq %rax, %r{}
+                                adcxq %rax, {}
                                 adoxq %rbx, %rcx
                                 adcxq %rbx, %rcx",
-                                (limbs-1) * 8,
-                                8 + index_lo,
-                                b=b));
+                                (limbs-1) * 8, reg_from_index(limbs, (i+limbs-1) % limbs), b=b));
+            if is_spill(limbs, (i+limbs-1) % limbs) { asm_string = spill_swap(asm_string, (i+limbs-1) % limbs, limbs, spill); }
         }
-        // Second inner loop
+            // Second inner loop
         asm_string = format!("{}{}", asm_string, format!("
                             movq $3, %rdx
                             mulxq %r{}, %rdx, %rax            // wrapping_mul", 8+i));
         asm_string = format!("{}{}", asm_string, format!("
                                 mulxq 0($1), %rax, %rbx
-                                adcxq %r{}, %rax              // put junk in rax
-                                adoxq %rbx, %r{}",
-                                8 + (i % limbs),
-                                8 + ((i+1) % limbs)));
+                                adcxq {}, %rax              // put junk in rax
+                                adoxq %rbx, {}",
+                                reg_from_index(limbs, i % limbs),
+                                reg_from_index(limbs, (i+1) % limbs)));
         for j in 1..limbs-1 {
             asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}($1), %rax, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rbx, %r{}",
+                                adcxq %rax, {}
+                                adoxq %rbx, {}",
                                 j * 8,
-                                8 + ((j+i) % limbs),
-                                8 + ((j+i+1) % limbs)));
+                                reg_from_index(limbs, (j+i) % limbs),
+                                reg_from_index(limbs, (j+i+1) % limbs)));
+            if i == limbs-1 {
+                if is_spill(limbs, (j+i) % limbs) { asm_string = final_swap(asm_string, (j+i) % limbs, limbs, spill, a);}
+            } else {
+                if is_spill(limbs, (j+i) % limbs) { asm_string = spill_swap(asm_string, (j+i) % limbs, limbs, spill); }
+            }
+
         }
         asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, %r{2}
+                                mulxq {}($1), %rax, {2}
                                 mov $2, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rcx, %r{2}
-                                adcxq %rbx, %r{2}",
+                                adcxq %rax, {}
+                                adoxq %rcx, {2}
+                                adcxq %rbx, {2}",
                                 (limbs-1)*8,
-                                8 + ((i+limbs-1) % limbs),
-                                8 + ((i) % limbs)));
+                                reg_from_index(limbs, (i+limbs-1) % limbs),
+                                reg_from_index(limbs, i % limbs)));
+        if i == limbs-1 {
+            if is_spill(limbs, (i+limbs-1) % limbs) { asm_string = final_swap(asm_string, (i+limbs-1) % limbs, limbs, spill, a); }
+            if is_spill(limbs, i % limbs) { asm_string = final_swap(asm_string, i % limbs, limbs, spill, a); }
+        } else {
+            if is_spill(limbs, (i+limbs-1) % limbs) { asm_string = spill_swap(asm_string, (i+limbs-1) % limbs, limbs, spill); }
+            if is_spill(limbs, i % limbs) { asm_string = spill_swap(asm_string, i % limbs, limbs, spill); }
+        }
     }
     for i in 0..limbs {
-        asm_string = format!("{}{}", asm_string, format!("
-                            movq %r{}, {}($0)", 8+(i % limbs), i*8));
+        if !is_spill(limbs, i) {
+            asm_string = format!("{}{}", asm_string, format!("
+                                movq %r{}, {}($0)", 8+(i % limbs), i*8));
+        }
+    }
+    format!("{}{}", asm_string, "\"")
+}
+
+fn reg_from_index (limbs: usize, index: usize) -> String {
+    let index = get_index(limbs, index);
+    if index < 8 {
+        format!("%r{}", index+8)
+    } else {
+        match index {
+            8 => String::from("%rsi"),
+            9 => String::from("%rdi"),
+            _ => panic!("More than 10 registers is not supported")
+        }
+    }
+}
+
+fn is_spill(limbs: usize, index: usize) -> bool {
+    let half = 1 + (MAX_REGS / 2);
+    if limbs <= MAX_REGS { false } else if limbs <= (MAX_REGS+3) {
+        if limbs == (MAX_REGS+1) {
+            index % half == 0
+        } else if limbs == (MAX_REGS+2) {
+            (index % half == 0) | (index % half == 1)
+        } else {
+            (index % (half + 1) == 0) | (index % (half + 1) == 1) | (index % (half + 1) == 2)
+        }
+    } else { true }
+}
+
+fn get_index(limbs: usize, index: usize) -> usize {
+    let half = 1 + (MAX_REGS / 2);
+    if limbs <= MAX_REGS { index } else if limbs <= (MAX_REGS+3) {
+        if limbs == (MAX_REGS+1) {
+            if is_spill(limbs, index) { index % half } else {
+                if index > half { index - 1 } else { index }
+            }
+        } else if limbs == (MAX_REGS+2) {
+            if is_spill(limbs, index) { index % half } else {
+                if index > half { index - 2 } else { index }
+            }
+        } else {
+            if is_spill(limbs, index) { index % (half + 1) } else {
+                if index > (half + 1) { index - 3 } else { index }
+            }
+        }
+    } else { index % MAX_REGS }
+}
+
+fn get_spill_index(limbs: usize, index: usize) -> usize {
+    let half = 1 + (MAX_REGS / 2);
+    if limbs <= MAX_REGS {
+        panic!("no spill for {} limbs", limbs);
+    } else if limbs <= (MAX_REGS+3) {
+        if limbs == (MAX_REGS+1) {
+            if index >= half { 1 } else { index }
+        } else if limbs == (MAX_REGS+2) {
+            if index >= half { index - half + 2 } else { index }
+        } else {
+            if index >= (half + 1) { index - (half + 1) + 3 } else { index }
+        }
+    } else if index >= MAX_REGS {
+        (index - MAX_REGS) + (limbs % MAX_REGS)
+    } else {
+        index
+    }
+}
+
+fn swap_spill_index(limbs: usize, index: usize) -> usize {
+    let half = 1 + (MAX_REGS / 2);
+    if limbs <= MAX_REGS {
+        panic!("no spill for {} limbs", limbs);
+    } else if limbs <= (MAX_REGS+3) {
+        if limbs == (MAX_REGS+1) {
+            if index >= half { 0 } else { 1 }
+        } else if limbs == (MAX_REGS+2) {
+            if index >= half { index - half } else { index + 2 }
+        } else {
+            if index >= (half + 1) { index - (half + 1) } else { index + 3 }
+        }
+    } else if index >= MAX_REGS {
+        index - MAX_REGS
+    } else {
+        index  + (limbs % MAX_REGS)
+    }
+}
+
+fn spill_swap (asm_string: String, index: usize, limbs: usize, spill: &str) -> String {
+    format!("{}{}", asm_string, format!("
+                                mov {r}, {}({spill})
+                                mov {}({spill}), {r}",
+                                8 * get_spill_index(limbs, index),
+                                8 * swap_spill_index(limbs, index),
+                                r = reg_from_index(limbs, index), spill=spill))
+    }
+
+fn final_swap (asm_string: String, index: usize, limbs: usize, spill: &str, a: &str) -> String {
+    if get_spill_index(limbs, index) < swap_spill_index(limbs, index) {
+        format!("{}{}", asm_string, format!("
+                                mov {r}, {}({a})
+                                mov {}({spill}), {r}",
+                                8 * index,
+                                8 * swap_spill_index(limbs, index),
+                                r = reg_from_index(limbs, index), a=a, spill=spill))
+    } else {
+        format!("{}{}", asm_string, format!("
+                                mov {r}, {}({a})",
+                                8 * index,r = reg_from_index(limbs, index), a=a))
     }
-    format!("{}{}",asm_string, "\"")
 }

From 1b4ed948e6c08d05b7643fc2b963027a5f37ee89 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Sat, 11 Apr 2020 04:14:33 +0800
Subject: [PATCH 04/43] data movement milestone - max 12 limbs

---
 algebra-core/field-assembly/Cargo.toml |   1 +
 algebra-core/field-assembly/src/lib.rs | 312 +++++++++++--------------
 2 files changed, 140 insertions(+), 173 deletions(-)

diff --git a/algebra-core/field-assembly/Cargo.toml b/algebra-core/field-assembly/Cargo.toml
index c8879c640..c71b11b70 100644
--- a/algebra-core/field-assembly/Cargo.toml
+++ b/algebra-core/field-assembly/Cargo.toml
@@ -7,3 +7,4 @@ edition = "2018"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+regex = ""
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index b667b5ea1..fe51711ec 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -1,11 +1,13 @@
 extern crate std;
+extern crate regex;
+use regex::Regex;
 
-const MAX_REGS: usize = 5;
+const MAX_REGS: usize = 6;
 
 // Only works for up to
 pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
-    if (num_limbs > 2 * MAX_REGS) || (MAX_REGS < 4) {
-        panic!("Number of limbs must be <= {} and MAX_REGS >= 4", 2*MAX_REGS);
+    if (num_limbs > 2 * MAX_REGS) || (MAX_REGS < 6) {
+        panic!("Number of limbs must be <= {} and MAX_REGS >= 6", 2*MAX_REGS);
     }
     let mut macro_string = String::from(
     "macro_rules! asm_mul {
@@ -26,22 +28,22 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
         let mut limb_specialisation = format!("
                 {} => {{", i);
         // logic to format macro based on how many limbs there are, whether it is a mul
-        let (mut b_declare, mut regs_declare, mut b, mut regs) = ("                   // $3", String::from(""), "$0", "");
+        let (mut b_declare, mut spills_declare, mut b, mut spills) = ("                   // $3", String::from(""), "$0", "");
         let mut rs_clobber = String::from("");
-        for k in 0..i { rs_clobber = format!("{}{}", rs_clobber, format!("\"r{}\", ", 8+k)); }
+        for k in 0..std::cmp::min(i, MAX_REGS) { rs_clobber = format!("{}{}", rs_clobber, format!("\"r{}\", ", 8+k)); }
         if is_mul {
             b_declare = ",                  // $3
                               \"r\"(&$b)";
             b = "$4";
-            regs_declare = String::from("                        // $4");
+            spills_declare = String::from("                        // $4");
         }
         if i > MAX_REGS {
             let extra_reg = if i <= 2*MAX_REGS { 2*(i-MAX_REGS) } else { i };
             limb_specialisation = format!("{}{}", limb_specialisation, format!("
-                    let mut regs = [0u64; {}];", extra_reg));
-            if is_mul { regs = "$5"; } else { regs = "$4";}
-            regs_declare = format!(",                       // ${}
-                              \"r\"(&mut regs)                  // {}", 3+(is_mul as usize), regs);
+                    let mut spills = [0u64; {}];", extra_reg));
+            if is_mul { spills = "$5"; } else { spills = "$4";}
+            spills_declare = format!(",                       // ${}
+                              \"r\"(&mut spills)                // {}", 3+(is_mul as usize), spills);
         }
 
         // Actual asm declaration
@@ -52,15 +54,15 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
                             : \"r\"(&mut $a),                   // $0
                               \"r\"(&$modulus),                 // $1
                               \"i\"(0u64),                      // $2
-                              \"i\"($inverse){b_declare}{regs_declare}
+                              \"i\"($inverse){b_declare}{spills_declare}
                             : \"rcx\", \"rbx\", \"rdx\", \"rax\", {rs_clobber}\"cc\", \"memory\"
                         );
                     }}
                 }}",
-                asm_string = generate_asm_mul_string(i, "$0", b, regs),
+                asm_string = transform_asm_mul_string(i, generate_asm_mul_string(i, "$0", b), spills, "$0"),
                 rs_clobber=rs_clobber,
                 b_declare=b_declare,
-                regs_declare=regs_declare));
+                spills_declare=spills_declare));
         macro_string = format!("{}{}", macro_string, limb_specialisation);
     }
     macro_string = format!("{}{}", macro_string, format!("
@@ -73,10 +75,8 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
     macro_string
 }
 
-fn generate_asm_mul_string (limbs: usize, a: &str, b: &str, spill: &str) -> String {
+fn generate_asm_mul_string (limbs: usize, a: &str, b: &str) -> String {
     let mut asm_string = String::from("");
-    let spilled = std::collections::HashMap::new();
-
     for i in 0..limbs {
         // First inner loop
         if i == 0 {
@@ -85,210 +85,176 @@ fn generate_asm_mul_string (limbs: usize, a: &str, b: &str, spill: &str) -> Stri
                             xorq %rcx, %rcx
                                 mulxq 0({b}), %r8, %r9",
                                 a=a, b=b));
-            if is_spill(limbs, 0) {
-                asm_string = spill_swap(asm_string, 0, limbs, spill);
-                spilled.insert("%r8", 0);
-            }
-
             for j in 1..limbs-1 {
-
                 asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}({b}), %rax, {}
-                                adcxq %rax, {}",
-                                j*8, reg_from_index(limbs, (j+1) % limbs),
-                                reg_from_index(limbs, j), b=b));
-                if is_spill(limbs, j) {
-                    asm_string = spill_swap(asm_string, j, limbs, spill);
-                    spilled.insert(&reg_from_index(limbs, (j+1) % limbs), j);
-                }
+                                mulxq {}({b}), %rax, %r{}
+                                adcxq %rax, %r{}",
+                                j*8, 8 + ((j+1) % limbs), 8+j, b=b));
             }
             asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}({b}), %rax, %rcx
                                 mov $2, %rbx
-                                adcxq %rax, {}
+                                adcxq %rax, %r{}
                                 adcxq %rbx, %rcx               // %rcx is carry1",
-                                (limbs-1)*8, reg_from_index(limbs, limbs-1), b=b));
-            if is_spill(limbs, limbs-1) {
-                asm_string = spill_swap(asm_string, limbs-1, limbs, spill);
-                spilled.insert(&reg_from_index(limbs, limbs-1), limbs-1);
-            }
+                                (limbs-1)*8, 8+limbs-1, b=b));
         } else {
             asm_string = format!("{}{}", asm_string, format!("
                             movq {}($0), %rdx", i * 8));
             for j in 0..limbs-1 {
                 asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}({b}), %rax, %rbx
-                                adcxq %rax, {}
-                                adoxq %rbx, {}",
-                                j * 8, reg_from_index(limbs, (j+i) % limbs), reg_from_index(limbs, (j+i+1) % limbs), b=b));
-                if is_spill(limbs, (j+i) % limbs) {
-                    asm_string = spill_swap(asm_string, (j+i) % limbs, limbs, spill);
-                    spilled.insert(&reg_from_index(limbs, (j+i) % limbs), (j+i) % limbs);
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
+                                j * 8, 8 + ((j+i) % limbs), 8 + ((j+i+1) % limbs), b=b));
                 }
-            }
             asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}({b}), %rax, %rcx
                                 mov $2, %rbx
-                                adcxq %rax, {}
+                                adcxq %rax, %r{}
                                 adoxq %rbx, %rcx
                                 adcxq %rbx, %rcx",
-                                (limbs-1) * 8, reg_from_index(limbs, (i+limbs-1) % limbs), b=b));
-            if is_spill(limbs, (i+limbs-1) % limbs) { asm_string = spill_swap(asm_string, (i+limbs-1) % limbs, limbs, spill); }
+                                (limbs-1) * 8,
+                                8 + ((i+limbs-1) % limbs),
+                                b=b));
         }
-            // Second inner loop
+        // Second inner loop
         asm_string = format!("{}{}", asm_string, format!("
                             movq $3, %rdx
                             mulxq %r{}, %rdx, %rax            // wrapping_mul", 8+i));
         asm_string = format!("{}{}", asm_string, format!("
                                 mulxq 0($1), %rax, %rbx
-                                adcxq {}, %rax              // put junk in rax
-                                adoxq %rbx, {}",
-                                reg_from_index(limbs, i % limbs),
-                                reg_from_index(limbs, (i+1) % limbs)));
+                                adcxq %r{}, %rax              // put junk in rax
+                                adoxq %rbx, %r{}",
+                                8 + (i % limbs),
+                                8 + ((i+1) % limbs)));
         for j in 1..limbs-1 {
             asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}($1), %rax, %rbx
-                                adcxq %rax, {}
-                                adoxq %rbx, {}",
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
                                 j * 8,
-                                reg_from_index(limbs, (j+i) % limbs),
-                                reg_from_index(limbs, (j+i+1) % limbs)));
-            if i == limbs-1 {
-                if is_spill(limbs, (j+i) % limbs) { asm_string = final_swap(asm_string, (j+i) % limbs, limbs, spill, a);}
-            } else {
-                if is_spill(limbs, (j+i) % limbs) { asm_string = spill_swap(asm_string, (j+i) % limbs, limbs, spill); }
-            }
-
+                                8 + ((j+i) % limbs),
+                                8 + ((j+i+1) % limbs)));
         }
         asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, {2}
+                                mulxq {}($1), %rax, %r{2}
                                 mov $2, %rbx
-                                adcxq %rax, {}
-                                adoxq %rcx, {2}
-                                adcxq %rbx, {2}",
+                                adcxq %rax, %r{}
+                                adoxq %rcx, %r{2}
+                                adcxq %rbx, %r{2}",
                                 (limbs-1)*8,
-                                reg_from_index(limbs, (i+limbs-1) % limbs),
-                                reg_from_index(limbs, i % limbs)));
-        if i == limbs-1 {
-            if is_spill(limbs, (i+limbs-1) % limbs) { asm_string = final_swap(asm_string, (i+limbs-1) % limbs, limbs, spill, a); }
-            if is_spill(limbs, i % limbs) { asm_string = final_swap(asm_string, i % limbs, limbs, spill, a); }
-        } else {
-            if is_spill(limbs, (i+limbs-1) % limbs) { asm_string = spill_swap(asm_string, (i+limbs-1) % limbs, limbs, spill); }
-            if is_spill(limbs, i % limbs) { asm_string = spill_swap(asm_string, i % limbs, limbs, spill); }
-        }
+                                8 + ((i+limbs-1) % limbs),
+                                8 + ((i) % limbs)));
     }
     for i in 0..limbs {
-        if !is_spill(limbs, i) {
-            asm_string = format!("{}{}", asm_string, format!("
-                                movq %r{}, {}($0)", 8+(i % limbs), i*8));
-        }
+        asm_string = format!("{}{}", asm_string, format!("
+                            movq %r{}, {}($0)", 8+(i % limbs), i*8));
     }
-    format!("{}{}", asm_string, "\"")
+    format!("{}{}", asm_string, "
+                        \"")
 }
 
-fn reg_from_index (limbs: usize, index: usize) -> String {
-    let index = get_index(limbs, index);
-    if index < 8 {
-        format!("%r{}", index+8)
+
+fn get_registers (limbs: usize) -> (usize, Vec<Vec<usize>>) {
+    assert!(limbs <= 2*MAX_REGS);
+
+    if limbs <= MAX_REGS {
+        (0, Vec::new())
+    } else if limbs == MAX_REGS + 1 {
+        (1, vec![
+                vec![MAX_REGS/2, MAX_REGS]
+                ])
+    } else if limbs == MAX_REGS + 2 {
+        (2, vec![
+                vec![MAX_REGS/2, MAX_REGS],
+                vec![MAX_REGS/2+1, MAX_REGS+1]
+                ])
+    } else if limbs == MAX_REGS + 3 {
+        (3, vec![
+                vec![MAX_REGS/2, MAX_REGS],
+                vec![MAX_REGS/2+1, MAX_REGS+1],
+                vec![MAX_REGS/2+2, MAX_REGS+2]
+                ])
     } else {
-        match index {
-            8 => String::from("%rsi"),
-            9 => String::from("%rdi"),
-            _ => panic!("More than 10 registers is not supported")
+        let n_spills = limbs - MAX_REGS;
+        let mut values = Vec::new();
+        for i in 0..n_spills {
+            values.push(vec![i, MAX_REGS+i]);
         }
+        (n_spills, values)
     }
 }
 
-fn is_spill(limbs: usize, index: usize) -> bool {
-    let half = 1 + (MAX_REGS / 2);
-    if limbs <= MAX_REGS { false } else if limbs <= (MAX_REGS+3) {
-        if limbs == (MAX_REGS+1) {
-            index % half == 0
-        } else if limbs == (MAX_REGS+2) {
-            (index % half == 0) | (index % half == 1)
-        } else {
-            (index % (half + 1) == 0) | (index % (half + 1) == 1) | (index % (half + 1) == 2)
-        }
-    } else { true }
-}
+// This is a compilation pass which converts abstract
+// register numbers into x64 registers with spills
+fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a: &str) -> String {
+    // println!("{}", asm_string);
+    let (n_spills, spillable) = get_registers(limbs);
+    let mut lines = asm_string.split("\n");
 
-fn get_index(limbs: usize, index: usize) -> usize {
-    let half = 1 + (MAX_REGS / 2);
-    if limbs <= MAX_REGS { index } else if limbs <= (MAX_REGS+3) {
-        if limbs == (MAX_REGS+1) {
-            if is_spill(limbs, index) { index % half } else {
-                if index > half { index - 1 } else { index }
-            }
-        } else if limbs == (MAX_REGS+2) {
-            if is_spill(limbs, index) { index % half } else {
-                if index > half { index - 2 } else { index }
-            }
-        } else {
-            if is_spill(limbs, index) { index % (half + 1) } else {
-                if index > (half + 1) { index - 3 } else { index }
-            }
-        }
-    } else { index % MAX_REGS }
-}
+    let re = Regex::new(r"%r\d+").unwrap();
+    let number = Regex::new(r"\d+").unwrap();
 
-fn get_spill_index(limbs: usize, index: usize) -> usize {
-    let half = 1 + (MAX_REGS / 2);
-    if limbs <= MAX_REGS {
-        panic!("no spill for {} limbs", limbs);
-    } else if limbs <= (MAX_REGS+3) {
-        if limbs == (MAX_REGS+1) {
-            if index >= half { 1 } else { index }
-        } else if limbs == (MAX_REGS+2) {
-            if index >= half { index - half + 2 } else { index }
-        } else {
-            if index >= (half + 1) { index - (half + 1) + 3 } else { index }
-        }
-    } else if index >= MAX_REGS {
-        (index - MAX_REGS) + (limbs % MAX_REGS)
-    } else {
-        index
-    }
-}
+    let mut line_number = 0;
+    let mut reg_sequence: Vec<Vec<(usize, usize)>> = std::iter::repeat(vec![]).take(n_spills).collect::<Vec<_>>();
 
-fn swap_spill_index(limbs: usize, index: usize) -> usize {
-    let half = 1 + (MAX_REGS / 2);
-    if limbs <= MAX_REGS {
-        panic!("no spill for {} limbs", limbs);
-    } else if limbs <= (MAX_REGS+3) {
-        if limbs == (MAX_REGS+1) {
-            if index >= half { 0 } else { 1 }
-        } else if limbs == (MAX_REGS+2) {
-            if index >= half { index - half } else { index + 2 }
-        } else {
-            if index >= (half + 1) { index - (half + 1) } else { index + 3 }
-        }
-    } else if index >= MAX_REGS {
-        index - MAX_REGS
-    } else {
-        index  + (limbs % MAX_REGS)
-    }
-}
+    let mut edited_lines: Vec<String> = Vec::new();
 
-fn spill_swap (asm_string: String, index: usize, limbs: usize, spill: &str) -> String {
-    format!("{}{}", asm_string, format!("
-                                mov {r}, {}({spill})
-                                mov {}({spill}), {r}",
-                                8 * get_spill_index(limbs, index),
-                                8 * swap_spill_index(limbs, index),
-                                r = reg_from_index(limbs, index), spill=spill))
-    }
+    for line in lines {
+        edited_lines.push(line.to_string());
+        line_number += 1;
+        if re.is_match(&line.to_string()) {
+            let words = line.split(" ");
+            for word in words {
+                if re.is_match(&word.to_string()) {
+                    let num = number.captures(word).unwrap();
+                    let reg_num = &num[0].parse::<usize>().unwrap();
+                    for i in 0..n_spills {
+                        if spillable[i].contains(&(*reg_num-8)) {
+                            reg_sequence[i].push((line_number, *reg_num-8));
+    }    }    }    }    }    }
 
-fn final_swap (asm_string: String, index: usize, limbs: usize, spill: &str, a: &str) -> String {
-    if get_spill_index(limbs, index) < swap_spill_index(limbs, index) {
-        format!("{}{}", asm_string, format!("
-                                mov {r}, {}({a})
-                                mov {}({spill}), {r}",
-                                8 * index,
-                                8 * swap_spill_index(limbs, index),
-                                r = reg_from_index(limbs, index), a=a, spill=spill))
-    } else {
-        format!("{}{}", asm_string, format!("
-                                mov {r}, {}({a})",
-                                8 * index,r = reg_from_index(limbs, index), a=a))
+    let mut swap_sequence: Vec<Vec<(usize, usize, usize)>> = std::iter::repeat(vec![]).take(n_spills).collect::<Vec<_>>();
+    for i in 0..n_spills {
+        let length = reg_sequence[i].len();
+        if length > 0 {
+            for j in 0..reg_sequence[i].len()-1 {
+                if reg_sequence[i][j].1 != reg_sequence[i][j+1].1 {
+                    swap_sequence[i].push((reg_sequence[i][j].0,
+                                           reg_sequence[i][j].1,
+                                           reg_sequence[i][j+1].1));
+                }
+            }
+        swap_sequence[i].push((reg_sequence[i][length-1].0,
+                               reg_sequence[i][length-1].1,
+                               reg_sequence[i][length-1].1));
+    }
+        let length = swap_sequence[i].len();
+        if length > 1 {
+            for j in 0..length {
+                let swap = &swap_sequence[i][j];
+                if j < length - 3 {
+                    let index1 = if swap.1 >= MAX_REGS { n_spills + i } else { i };
+                    let index2 = if swap.2 >= MAX_REGS { n_spills + i } else { i };
+                    edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
+                                movq %r{reg}, {index1}({dest})
+                                movq {index2}({spills}), %r{reg}",
+                                reg=8+spillable[i][0], index1=index1*8, index2=index2*8,
+                                dest=if j!=length-4 {spills} else {a}, spills=spills));
+                }
+            }
+            let swap = &swap_sequence[i][length-3];
+            let index1 = if swap.1 >= MAX_REGS { n_spills + i } else { i };
+            edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
+                                movq %r{reg}, {index1}({dest})",
+                                reg=8+spillable[i][0], index1=index1*8, dest=a));
+            edited_lines[&swap_sequence[i][length-2].0-1] = "".to_string();
+            edited_lines[&swap_sequence[i][length-1].0-1] = "".to_string();
+        }
+    }
+    let mut interspersed = edited_lines[..(edited_lines.len())].join("\n");
+    for i in 0..n_spills {
+        interspersed = interspersed.replace(&format!("%r{}", 8+spillable[i][1]), &format!("%r{}", 8+spillable[i][0]));
     }
+    interspersed
 }

From d3c1414e0411685f01956f2d93c74efc7d241860 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Sat, 11 Apr 2020 04:18:31 +0800
Subject: [PATCH 05/43] data movement milestone - max 12 limbs

---
 algebra-core/build.rs                  |   2 +-
 algebra-core/field-assembly/Cargo.toml |   1 +
 algebra-core/field-assembly/src/lib.rs | 312 +++++++++++--------------
 algebra-core/src/fields/macros.rs      |   4 +-
 4 files changed, 143 insertions(+), 176 deletions(-)

diff --git a/algebra-core/build.rs b/algebra-core/build.rs
index 9c663ad07..a00a52027 100644
--- a/algebra-core/build.rs
+++ b/algebra-core/build.rs
@@ -4,7 +4,7 @@ use std::path::Path;
 
 use field_assembly::generate_macro_string;
 
-const NUM_LIMBS: usize = 16;
+const NUM_LIMBS: usize = 12;
 
 fn main() {
     let out_dir = env::var_os("OUT_DIR").unwrap();
diff --git a/algebra-core/field-assembly/Cargo.toml b/algebra-core/field-assembly/Cargo.toml
index c8879c640..c71b11b70 100644
--- a/algebra-core/field-assembly/Cargo.toml
+++ b/algebra-core/field-assembly/Cargo.toml
@@ -7,3 +7,4 @@ edition = "2018"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
+regex = ""
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index b667b5ea1..fe51711ec 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -1,11 +1,13 @@
 extern crate std;
+extern crate regex;
+use regex::Regex;
 
-const MAX_REGS: usize = 5;
+const MAX_REGS: usize = 6;
 
 // Only works for up to
 pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
-    if (num_limbs > 2 * MAX_REGS) || (MAX_REGS < 4) {
-        panic!("Number of limbs must be <= {} and MAX_REGS >= 4", 2*MAX_REGS);
+    if (num_limbs > 2 * MAX_REGS) || (MAX_REGS < 6) {
+        panic!("Number of limbs must be <= {} and MAX_REGS >= 6", 2*MAX_REGS);
     }
     let mut macro_string = String::from(
     "macro_rules! asm_mul {
@@ -26,22 +28,22 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
         let mut limb_specialisation = format!("
                 {} => {{", i);
         // logic to format macro based on how many limbs there are, whether it is a mul
-        let (mut b_declare, mut regs_declare, mut b, mut regs) = ("                   // $3", String::from(""), "$0", "");
+        let (mut b_declare, mut spills_declare, mut b, mut spills) = ("                   // $3", String::from(""), "$0", "");
         let mut rs_clobber = String::from("");
-        for k in 0..i { rs_clobber = format!("{}{}", rs_clobber, format!("\"r{}\", ", 8+k)); }
+        for k in 0..std::cmp::min(i, MAX_REGS) { rs_clobber = format!("{}{}", rs_clobber, format!("\"r{}\", ", 8+k)); }
         if is_mul {
             b_declare = ",                  // $3
                               \"r\"(&$b)";
             b = "$4";
-            regs_declare = String::from("                        // $4");
+            spills_declare = String::from("                        // $4");
         }
         if i > MAX_REGS {
             let extra_reg = if i <= 2*MAX_REGS { 2*(i-MAX_REGS) } else { i };
             limb_specialisation = format!("{}{}", limb_specialisation, format!("
-                    let mut regs = [0u64; {}];", extra_reg));
-            if is_mul { regs = "$5"; } else { regs = "$4";}
-            regs_declare = format!(",                       // ${}
-                              \"r\"(&mut regs)                  // {}", 3+(is_mul as usize), regs);
+                    let mut spills = [0u64; {}];", extra_reg));
+            if is_mul { spills = "$5"; } else { spills = "$4";}
+            spills_declare = format!(",                       // ${}
+                              \"r\"(&mut spills)                // {}", 3+(is_mul as usize), spills);
         }
 
         // Actual asm declaration
@@ -52,15 +54,15 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
                             : \"r\"(&mut $a),                   // $0
                               \"r\"(&$modulus),                 // $1
                               \"i\"(0u64),                      // $2
-                              \"i\"($inverse){b_declare}{regs_declare}
+                              \"i\"($inverse){b_declare}{spills_declare}
                             : \"rcx\", \"rbx\", \"rdx\", \"rax\", {rs_clobber}\"cc\", \"memory\"
                         );
                     }}
                 }}",
-                asm_string = generate_asm_mul_string(i, "$0", b, regs),
+                asm_string = transform_asm_mul_string(i, generate_asm_mul_string(i, "$0", b), spills, "$0"),
                 rs_clobber=rs_clobber,
                 b_declare=b_declare,
-                regs_declare=regs_declare));
+                spills_declare=spills_declare));
         macro_string = format!("{}{}", macro_string, limb_specialisation);
     }
     macro_string = format!("{}{}", macro_string, format!("
@@ -73,10 +75,8 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
     macro_string
 }
 
-fn generate_asm_mul_string (limbs: usize, a: &str, b: &str, spill: &str) -> String {
+fn generate_asm_mul_string (limbs: usize, a: &str, b: &str) -> String {
     let mut asm_string = String::from("");
-    let spilled = std::collections::HashMap::new();
-
     for i in 0..limbs {
         // First inner loop
         if i == 0 {
@@ -85,210 +85,176 @@ fn generate_asm_mul_string (limbs: usize, a: &str, b: &str, spill: &str) -> Stri
                             xorq %rcx, %rcx
                                 mulxq 0({b}), %r8, %r9",
                                 a=a, b=b));
-            if is_spill(limbs, 0) {
-                asm_string = spill_swap(asm_string, 0, limbs, spill);
-                spilled.insert("%r8", 0);
-            }
-
             for j in 1..limbs-1 {
-
                 asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}({b}), %rax, {}
-                                adcxq %rax, {}",
-                                j*8, reg_from_index(limbs, (j+1) % limbs),
-                                reg_from_index(limbs, j), b=b));
-                if is_spill(limbs, j) {
-                    asm_string = spill_swap(asm_string, j, limbs, spill);
-                    spilled.insert(&reg_from_index(limbs, (j+1) % limbs), j);
-                }
+                                mulxq {}({b}), %rax, %r{}
+                                adcxq %rax, %r{}",
+                                j*8, 8 + ((j+1) % limbs), 8+j, b=b));
             }
             asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}({b}), %rax, %rcx
                                 mov $2, %rbx
-                                adcxq %rax, {}
+                                adcxq %rax, %r{}
                                 adcxq %rbx, %rcx               // %rcx is carry1",
-                                (limbs-1)*8, reg_from_index(limbs, limbs-1), b=b));
-            if is_spill(limbs, limbs-1) {
-                asm_string = spill_swap(asm_string, limbs-1, limbs, spill);
-                spilled.insert(&reg_from_index(limbs, limbs-1), limbs-1);
-            }
+                                (limbs-1)*8, 8+limbs-1, b=b));
         } else {
             asm_string = format!("{}{}", asm_string, format!("
                             movq {}($0), %rdx", i * 8));
             for j in 0..limbs-1 {
                 asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}({b}), %rax, %rbx
-                                adcxq %rax, {}
-                                adoxq %rbx, {}",
-                                j * 8, reg_from_index(limbs, (j+i) % limbs), reg_from_index(limbs, (j+i+1) % limbs), b=b));
-                if is_spill(limbs, (j+i) % limbs) {
-                    asm_string = spill_swap(asm_string, (j+i) % limbs, limbs, spill);
-                    spilled.insert(&reg_from_index(limbs, (j+i) % limbs), (j+i) % limbs);
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
+                                j * 8, 8 + ((j+i) % limbs), 8 + ((j+i+1) % limbs), b=b));
                 }
-            }
             asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}({b}), %rax, %rcx
                                 mov $2, %rbx
-                                adcxq %rax, {}
+                                adcxq %rax, %r{}
                                 adoxq %rbx, %rcx
                                 adcxq %rbx, %rcx",
-                                (limbs-1) * 8, reg_from_index(limbs, (i+limbs-1) % limbs), b=b));
-            if is_spill(limbs, (i+limbs-1) % limbs) { asm_string = spill_swap(asm_string, (i+limbs-1) % limbs, limbs, spill); }
+                                (limbs-1) * 8,
+                                8 + ((i+limbs-1) % limbs),
+                                b=b));
         }
-            // Second inner loop
+        // Second inner loop
         asm_string = format!("{}{}", asm_string, format!("
                             movq $3, %rdx
                             mulxq %r{}, %rdx, %rax            // wrapping_mul", 8+i));
         asm_string = format!("{}{}", asm_string, format!("
                                 mulxq 0($1), %rax, %rbx
-                                adcxq {}, %rax              // put junk in rax
-                                adoxq %rbx, {}",
-                                reg_from_index(limbs, i % limbs),
-                                reg_from_index(limbs, (i+1) % limbs)));
+                                adcxq %r{}, %rax              // put junk in rax
+                                adoxq %rbx, %r{}",
+                                8 + (i % limbs),
+                                8 + ((i+1) % limbs)));
         for j in 1..limbs-1 {
             asm_string = format!("{}{}", asm_string, format!("
                                 mulxq {}($1), %rax, %rbx
-                                adcxq %rax, {}
-                                adoxq %rbx, {}",
+                                adcxq %rax, %r{}
+                                adoxq %rbx, %r{}",
                                 j * 8,
-                                reg_from_index(limbs, (j+i) % limbs),
-                                reg_from_index(limbs, (j+i+1) % limbs)));
-            if i == limbs-1 {
-                if is_spill(limbs, (j+i) % limbs) { asm_string = final_swap(asm_string, (j+i) % limbs, limbs, spill, a);}
-            } else {
-                if is_spill(limbs, (j+i) % limbs) { asm_string = spill_swap(asm_string, (j+i) % limbs, limbs, spill); }
-            }
-
+                                8 + ((j+i) % limbs),
+                                8 + ((j+i+1) % limbs)));
         }
         asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, {2}
+                                mulxq {}($1), %rax, %r{2}
                                 mov $2, %rbx
-                                adcxq %rax, {}
-                                adoxq %rcx, {2}
-                                adcxq %rbx, {2}",
+                                adcxq %rax, %r{}
+                                adoxq %rcx, %r{2}
+                                adcxq %rbx, %r{2}",
                                 (limbs-1)*8,
-                                reg_from_index(limbs, (i+limbs-1) % limbs),
-                                reg_from_index(limbs, i % limbs)));
-        if i == limbs-1 {
-            if is_spill(limbs, (i+limbs-1) % limbs) { asm_string = final_swap(asm_string, (i+limbs-1) % limbs, limbs, spill, a); }
-            if is_spill(limbs, i % limbs) { asm_string = final_swap(asm_string, i % limbs, limbs, spill, a); }
-        } else {
-            if is_spill(limbs, (i+limbs-1) % limbs) { asm_string = spill_swap(asm_string, (i+limbs-1) % limbs, limbs, spill); }
-            if is_spill(limbs, i % limbs) { asm_string = spill_swap(asm_string, i % limbs, limbs, spill); }
-        }
+                                8 + ((i+limbs-1) % limbs),
+                                8 + ((i) % limbs)));
     }
     for i in 0..limbs {
-        if !is_spill(limbs, i) {
-            asm_string = format!("{}{}", asm_string, format!("
-                                movq %r{}, {}($0)", 8+(i % limbs), i*8));
-        }
+        asm_string = format!("{}{}", asm_string, format!("
+                            movq %r{}, {}($0)", 8+(i % limbs), i*8));
     }
-    format!("{}{}", asm_string, "\"")
+    format!("{}{}", asm_string, "
+                        \"")
 }
 
-fn reg_from_index (limbs: usize, index: usize) -> String {
-    let index = get_index(limbs, index);
-    if index < 8 {
-        format!("%r{}", index+8)
+
+fn get_registers (limbs: usize) -> (usize, Vec<Vec<usize>>) {
+    assert!(limbs <= 2*MAX_REGS);
+
+    if limbs <= MAX_REGS {
+        (0, Vec::new())
+    } else if limbs == MAX_REGS + 1 {
+        (1, vec![
+                vec![MAX_REGS/2, MAX_REGS]
+                ])
+    } else if limbs == MAX_REGS + 2 {
+        (2, vec![
+                vec![MAX_REGS/2, MAX_REGS],
+                vec![MAX_REGS/2+1, MAX_REGS+1]
+                ])
+    } else if limbs == MAX_REGS + 3 {
+        (3, vec![
+                vec![MAX_REGS/2, MAX_REGS],
+                vec![MAX_REGS/2+1, MAX_REGS+1],
+                vec![MAX_REGS/2+2, MAX_REGS+2]
+                ])
     } else {
-        match index {
-            8 => String::from("%rsi"),
-            9 => String::from("%rdi"),
-            _ => panic!("More than 10 registers is not supported")
+        let n_spills = limbs - MAX_REGS;
+        let mut values = Vec::new();
+        for i in 0..n_spills {
+            values.push(vec![i, MAX_REGS+i]);
         }
+        (n_spills, values)
     }
 }
 
-fn is_spill(limbs: usize, index: usize) -> bool {
-    let half = 1 + (MAX_REGS / 2);
-    if limbs <= MAX_REGS { false } else if limbs <= (MAX_REGS+3) {
-        if limbs == (MAX_REGS+1) {
-            index % half == 0
-        } else if limbs == (MAX_REGS+2) {
-            (index % half == 0) | (index % half == 1)
-        } else {
-            (index % (half + 1) == 0) | (index % (half + 1) == 1) | (index % (half + 1) == 2)
-        }
-    } else { true }
-}
+// This is a compilation pass which converts abstract
+// register numbers into x64 registers with spills
+fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a: &str) -> String {
+    // println!("{}", asm_string);
+    let (n_spills, spillable) = get_registers(limbs);
+    let mut lines = asm_string.split("\n");
 
-fn get_index(limbs: usize, index: usize) -> usize {
-    let half = 1 + (MAX_REGS / 2);
-    if limbs <= MAX_REGS { index } else if limbs <= (MAX_REGS+3) {
-        if limbs == (MAX_REGS+1) {
-            if is_spill(limbs, index) { index % half } else {
-                if index > half { index - 1 } else { index }
-            }
-        } else if limbs == (MAX_REGS+2) {
-            if is_spill(limbs, index) { index % half } else {
-                if index > half { index - 2 } else { index }
-            }
-        } else {
-            if is_spill(limbs, index) { index % (half + 1) } else {
-                if index > (half + 1) { index - 3 } else { index }
-            }
-        }
-    } else { index % MAX_REGS }
-}
+    let re = Regex::new(r"%r\d+").unwrap();
+    let number = Regex::new(r"\d+").unwrap();
 
-fn get_spill_index(limbs: usize, index: usize) -> usize {
-    let half = 1 + (MAX_REGS / 2);
-    if limbs <= MAX_REGS {
-        panic!("no spill for {} limbs", limbs);
-    } else if limbs <= (MAX_REGS+3) {
-        if limbs == (MAX_REGS+1) {
-            if index >= half { 1 } else { index }
-        } else if limbs == (MAX_REGS+2) {
-            if index >= half { index - half + 2 } else { index }
-        } else {
-            if index >= (half + 1) { index - (half + 1) + 3 } else { index }
-        }
-    } else if index >= MAX_REGS {
-        (index - MAX_REGS) + (limbs % MAX_REGS)
-    } else {
-        index
-    }
-}
+    let mut line_number = 0;
+    let mut reg_sequence: Vec<Vec<(usize, usize)>> = std::iter::repeat(vec![]).take(n_spills).collect::<Vec<_>>();
 
-fn swap_spill_index(limbs: usize, index: usize) -> usize {
-    let half = 1 + (MAX_REGS / 2);
-    if limbs <= MAX_REGS {
-        panic!("no spill for {} limbs", limbs);
-    } else if limbs <= (MAX_REGS+3) {
-        if limbs == (MAX_REGS+1) {
-            if index >= half { 0 } else { 1 }
-        } else if limbs == (MAX_REGS+2) {
-            if index >= half { index - half } else { index + 2 }
-        } else {
-            if index >= (half + 1) { index - (half + 1) } else { index + 3 }
-        }
-    } else if index >= MAX_REGS {
-        index - MAX_REGS
-    } else {
-        index  + (limbs % MAX_REGS)
-    }
-}
+    let mut edited_lines: Vec<String> = Vec::new();
 
-fn spill_swap (asm_string: String, index: usize, limbs: usize, spill: &str) -> String {
-    format!("{}{}", asm_string, format!("
-                                mov {r}, {}({spill})
-                                mov {}({spill}), {r}",
-                                8 * get_spill_index(limbs, index),
-                                8 * swap_spill_index(limbs, index),
-                                r = reg_from_index(limbs, index), spill=spill))
-    }
+    for line in lines {
+        edited_lines.push(line.to_string());
+        line_number += 1;
+        if re.is_match(&line.to_string()) {
+            let words = line.split(" ");
+            for word in words {
+                if re.is_match(&word.to_string()) {
+                    let num = number.captures(word).unwrap();
+                    let reg_num = &num[0].parse::<usize>().unwrap();
+                    for i in 0..n_spills {
+                        if spillable[i].contains(&(*reg_num-8)) {
+                            reg_sequence[i].push((line_number, *reg_num-8));
+    }    }    }    }    }    }
 
-fn final_swap (asm_string: String, index: usize, limbs: usize, spill: &str, a: &str) -> String {
-    if get_spill_index(limbs, index) < swap_spill_index(limbs, index) {
-        format!("{}{}", asm_string, format!("
-                                mov {r}, {}({a})
-                                mov {}({spill}), {r}",
-                                8 * index,
-                                8 * swap_spill_index(limbs, index),
-                                r = reg_from_index(limbs, index), a=a, spill=spill))
-    } else {
-        format!("{}{}", asm_string, format!("
-                                mov {r}, {}({a})",
-                                8 * index,r = reg_from_index(limbs, index), a=a))
+    let mut swap_sequence: Vec<Vec<(usize, usize, usize)>> = std::iter::repeat(vec![]).take(n_spills).collect::<Vec<_>>();
+    for i in 0..n_spills {
+        let length = reg_sequence[i].len();
+        if length > 0 {
+            for j in 0..reg_sequence[i].len()-1 {
+                if reg_sequence[i][j].1 != reg_sequence[i][j+1].1 {
+                    swap_sequence[i].push((reg_sequence[i][j].0,
+                                           reg_sequence[i][j].1,
+                                           reg_sequence[i][j+1].1));
+                }
+            }
+        swap_sequence[i].push((reg_sequence[i][length-1].0,
+                               reg_sequence[i][length-1].1,
+                               reg_sequence[i][length-1].1));
+    }
+        let length = swap_sequence[i].len();
+        if length > 1 {
+            for j in 0..length {
+                let swap = &swap_sequence[i][j];
+                if j < length - 3 {
+                    let index1 = if swap.1 >= MAX_REGS { n_spills + i } else { i };
+                    let index2 = if swap.2 >= MAX_REGS { n_spills + i } else { i };
+                    edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
+                                movq %r{reg}, {index1}({dest})
+                                movq {index2}({spills}), %r{reg}",
+                                reg=8+spillable[i][0], index1=index1*8, index2=index2*8,
+                                dest=if j!=length-4 {spills} else {a}, spills=spills));
+                }
+            }
+            let swap = &swap_sequence[i][length-3];
+            let index1 = if swap.1 >= MAX_REGS { n_spills + i } else { i };
+            edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
+                                movq %r{reg}, {index1}({dest})",
+                                reg=8+spillable[i][0], index1=index1*8, dest=a));
+            edited_lines[&swap_sequence[i][length-2].0-1] = "".to_string();
+            edited_lines[&swap_sequence[i][length-1].0-1] = "".to_string();
+        }
+    }
+    let mut interspersed = edited_lines[..(edited_lines.len())].join("\n");
+    for i in 0..n_spills {
+        interspersed = interspersed.replace(&format!("%r{}", 8+spillable[i][1]), &format!("%r{}", 8+spillable[i][0]));
     }
+    interspersed
 }
diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index 5e2dc9c01..b758b3b4b 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -454,7 +454,7 @@ macro_rules! impl_field_mul_assign {
 
             // No-carry optimisation applied to CIOS
             if no_carry {
-                if $limbs <= 8 {//== 4 {//true {//
+                if $limbs <= 13 {//== 4 {//true {//
                     asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
                     self.reduce();
                 } else {
@@ -536,7 +536,7 @@ macro_rules! impl_field_square_in_place {
         #[inline]
         #[unroll_for_loops]
         fn square_in_place(&mut self) -> &mut Self {
-            if $limbs <= 8 {
+            if $limbs <= 12 {
                 asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
                 self.reduce();
                 self

From a4dfc3b010e673c32e711fd47efe97ed155c7997 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Sat, 11 Apr 2020 11:35:55 +0800
Subject: [PATCH 06/43] 18 limbs

---
 algebra-core/build.rs                  |  2 +-
 algebra-core/field-assembly/src/lib.rs | 60 +++++++++++++++++++++-----
 algebra-core/src/fields/macros.rs      |  4 +-
 3 files changed, 52 insertions(+), 14 deletions(-)

diff --git a/algebra-core/build.rs b/algebra-core/build.rs
index a00a52027..5d2463640 100644
--- a/algebra-core/build.rs
+++ b/algebra-core/build.rs
@@ -4,7 +4,7 @@ use std::path::Path;
 
 use field_assembly::generate_macro_string;
 
-const NUM_LIMBS: usize = 12;
+const NUM_LIMBS: usize = 18;
 
 fn main() {
     let out_dir = env::var_os("OUT_DIR").unwrap();
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index fe51711ec..541e7ed31 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -6,8 +6,8 @@ const MAX_REGS: usize = 6;
 
 // Only works for up to
 pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
-    if (num_limbs > 2 * MAX_REGS) || (MAX_REGS < 6) {
-        panic!("Number of limbs must be <= {} and MAX_REGS >= 6", 2*MAX_REGS);
+    if (num_limbs > 3 * MAX_REGS) {//|| (MAX_REGS < 6) {
+        panic!("Number of limbs must be <= {} and MAX_REGS >= 6", 3*MAX_REGS);
     }
     let mut macro_string = String::from(
     "macro_rules! asm_mul {
@@ -156,7 +156,7 @@ fn generate_asm_mul_string (limbs: usize, a: &str, b: &str) -> String {
 
 
 fn get_registers (limbs: usize) -> (usize, Vec<Vec<usize>>) {
-    assert!(limbs <= 2*MAX_REGS);
+    assert!(limbs <= 3*MAX_REGS);
 
     if limbs <= MAX_REGS {
         (0, Vec::new())
@@ -175,18 +175,29 @@ fn get_registers (limbs: usize) -> (usize, Vec<Vec<usize>>) {
                 vec![MAX_REGS/2+1, MAX_REGS+1],
                 vec![MAX_REGS/2+2, MAX_REGS+2]
                 ])
-    } else {
+    } else if limbs <= MAX_REGS * 2 {
         let n_spills = limbs - MAX_REGS;
         let mut values = Vec::new();
         for i in 0..n_spills {
             values.push(vec![i, MAX_REGS+i]);
         }
         (n_spills, values)
+    } else { // (if limbs <= MAX_REGS * 3)
+        let mut values = Vec::new();
+        for i in 0..MAX_REGS {
+            if i < limbs - 2*MAX_REGS {
+                values.push(vec![i, MAX_REGS+i, 2*MAX_REGS+i]);
+            } else {
+                values.push(vec![i, MAX_REGS+i]);
+            }
+        }
+        (MAX_REGS, values)
     }
 }
 
 // This is a compilation pass which converts abstract
-// register numbers into x64 registers with spills
+// register numbers into x64 registers with spills.
+// Rather hacky at this stage
 fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a: &str) -> String {
     // println!("{}", asm_string);
     let (n_spills, spillable) = get_registers(limbs);
@@ -220,9 +231,9 @@ fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a:
         if length > 0 {
             for j in 0..reg_sequence[i].len()-1 {
                 if reg_sequence[i][j].1 != reg_sequence[i][j+1].1 {
-                    swap_sequence[i].push((reg_sequence[i][j].0,
-                                           reg_sequence[i][j].1,
-                                           reg_sequence[i][j+1].1));
+                    swap_sequence[i].push((reg_sequence[i][j].0,        // line number
+                                           reg_sequence[i][j].1,        // current reg index
+                                           reg_sequence[i][j+1].1));    // next reg index
                 }
             }
         swap_sequence[i].push((reg_sequence[i][length-1].0,
@@ -230,12 +241,12 @@ fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a:
                                reg_sequence[i][length-1].1));
     }
         let length = swap_sequence[i].len();
-        if length > 1 {
+        if length > 1 && spillable[i].len() <= 2 {
             for j in 0..length {
                 let swap = &swap_sequence[i][j];
                 if j < length - 3 {
                     let index1 = if swap.1 >= MAX_REGS { n_spills + i } else { i };
-                    let index2 = if swap.2 >= MAX_REGS { n_spills + i } else { i };
+                    let index2 =  if swap.2 >= MAX_REGS { n_spills + i } else { i };
                     edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
                                 movq %r{reg}, {index1}({dest})
                                 movq {index2}({spills}), %r{reg}",
@@ -250,11 +261,38 @@ fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a:
                                 reg=8+spillable[i][0], index1=index1*8, dest=a));
             edited_lines[&swap_sequence[i][length-2].0-1] = "".to_string();
             edited_lines[&swap_sequence[i][length-1].0-1] = "".to_string();
+        } else {
+            for j in 0..length {
+                let swap = &swap_sequence[i][j];
+                if j < length - 4 {
+                    edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
+                                movq %r{reg}, {index1}({dest})
+                                movq {index2}({spills}), %r{reg}",
+                                reg=8+spillable[i][0], index1=swap.1*8, index2=swap.2*8,
+                                dest=if j!=length-5 && j!=length-6 {spills} else {a}, spills=spills));
+                }
+            }
+            let swap = &swap_sequence[i][length-4];
+            edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
+                                movq %r{reg}, {index1}({dest})",
+                                reg=8+spillable[i][0], index1=swap.1*8, dest=a));
+            edited_lines[&swap_sequence[i][length-3].0-1] = "".to_string();
+            edited_lines[&swap_sequence[i][length-2].0-1] = "".to_string();
+            edited_lines[&swap_sequence[i][length-1].0-1] = "".to_string();
+        }
+    }
+    let length = edited_lines.len();
+    for i in 0..limbs+1 {
+        if edited_lines[length-1-i] == "" {
+            edited_lines.remove(length-1-i);
         }
     }
-    let mut interspersed = edited_lines[..(edited_lines.len())].join("\n");
+    let mut interspersed = edited_lines[..].join("\n");
     for i in 0..n_spills {
         interspersed = interspersed.replace(&format!("%r{}", 8+spillable[i][1]), &format!("%r{}", 8+spillable[i][0]));
+        if spillable[i].len() == 3 {
+            interspersed = interspersed.replace(&format!("%r{}", 8+spillable[i][2]), &format!("%r{}", 8+spillable[i][0]));
+        }
     }
     interspersed
 }
diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index b758b3b4b..c78e8469f 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -454,7 +454,7 @@ macro_rules! impl_field_mul_assign {
 
             // No-carry optimisation applied to CIOS
             if no_carry {
-                if $limbs <= 13 {//== 4 {//true {//
+                if $limbs <= 18 {
                     asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
                     self.reduce();
                 } else {
@@ -536,7 +536,7 @@ macro_rules! impl_field_square_in_place {
         #[inline]
         #[unroll_for_loops]
         fn square_in_place(&mut self) -> &mut Self {
-            if $limbs <= 12 {
+            if $limbs <= 18 {
                 asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
                 self.reduce();
                 self

From 80e7d6aec144ef499c8b8df15879e0642d847ec9 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 15 Apr 2020 19:24:21 +0800
Subject: [PATCH 07/43] break up into: assembler and arithmetic

---
 algebra-core/field-assembly/src/arithmetic.rs |  50 ++++++
 algebra-core/field-assembly/src/assembler.rs  |  79 +++++++++
 algebra-core/field-assembly/src/lib.rs        | 166 +++++++-----------
 3 files changed, 196 insertions(+), 99 deletions(-)
 create mode 100644 algebra-core/field-assembly/src/arithmetic.rs
 create mode 100644 algebra-core/field-assembly/src/assembler.rs

diff --git a/algebra-core/field-assembly/src/arithmetic.rs b/algebra-core/field-assembly/src/arithmetic.rs
new file mode 100644
index 000000000..22c44b02a
--- /dev/null
+++ b/algebra-core/field-assembly/src/arithmetic.rs
@@ -0,0 +1,50 @@
+use crate::assembler::*;
+use std::rc::Rc;
+
+// Computes [rcx, r(8+limbs), ..., r8] =  0($a) * [r(8+limbs), ..., r8]
+pub fn mul_1 (asm: &mut Assembler, a: &str, b: &Vec<&str>) {
+    asm.movq(a, RDX);
+    asm.xorq(RCX, RCX);
+    asm.mulxq(b[0], R[0], R[1]);
+    for j in 1..asm.limbs-1 {
+        asm.mulxq(b[j], RAX, R[((j + 1) % asm.limbs)]);
+        asm.adcxq(RAX, R[j]);
+    }
+    asm.mulxq(b[asm.limbs-1], RAX, RCX);
+    asm.movq("$2", RBX);
+    asm.adcxq(RAX, R[asm.limbs-1]);
+    asm.adcxq(RBX, RCX);
+}
+
+
+pub fn mul_add_1 (asm: &mut Assembler, a: &Vec<&str>, b: &Vec<&str>, i: usize) {
+    asm.movq(a[i], RDX);
+    for j in 0..asm.limbs-1 {
+        asm.mulxq(b[j], RAX, RBX);
+        asm.adcxq(RAX, R[(j+i) % asm.limbs]);
+        asm.adoxq(RBX, R[(j+i+1) % asm.limbs]);
+    }
+    asm.mulxq(b[asm.limbs-1], RAX, RCX);
+    asm.movq("$2", RBX);
+    asm.adcxq(RAX, R[(i+asm.limbs-1) % asm.limbs]);
+    asm.adoxq(RBX, RCX);
+    asm.adcxq(RBX, RCX);
+}
+
+pub fn mul_add_shift_1 (asm: &mut Assembler, a: &Vec<&str>, i: usize) {
+    asm.movq("$3", RDX);
+    asm.mulxq(R[i], RDX, RAX);
+    asm.mulxq(a[0], RAX, RBX);
+    asm.adcxq(R[i % asm.limbs], RAX);
+    asm.adoxq(RBX, R[(i+1) % asm.limbs]);
+    for j in 1..asm.limbs-1 {
+        asm.mulxq(a[j], RAX, RBX);
+        asm.adcxq(RAX, R[(j+i) % asm.limbs]);
+        asm.adoxq(RBX, R[(j+i+1) % asm.limbs]);
+    }
+    asm.mulxq(a[asm.limbs-1], RAX, R[i % asm.limbs]);
+    asm.movq("$2", RBX);
+    asm.adcxq(RAX, R[(i+asm.limbs-1) % asm.limbs]);
+    asm.adoxq(RCX, R[i % asm.limbs]);
+    asm.adcxq(RBX, R[i % asm.limbs]);
+}
diff --git a/algebra-core/field-assembly/src/assembler.rs b/algebra-core/field-assembly/src/assembler.rs
new file mode 100644
index 000000000..6c419ee2c
--- /dev/null
+++ b/algebra-core/field-assembly/src/assembler.rs
@@ -0,0 +1,79 @@
+use std::rc::Rc;
+
+pub const RAX: &'static str = "%rax";
+pub const RBX: &'static str = "%rbx";
+pub const RCX: &'static str = "%rcx";
+pub const RDX: &'static str = "%rdx";
+pub const RDI: &'static str = "%rdi";
+pub const RSI: &'static str = "%rsi";
+pub const R: [&'static str; 18] = ["%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
+                                    "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", "%r24", "%r25"];
+
+pub struct Assembler {
+    pub limbs: usize,
+    asm_string: Rc<String>,
+
+}
+
+impl<'a> Assembler {
+    pub fn new (limbs: usize) -> Assembler {
+        Assembler {
+            limbs: limbs,
+            asm_string: Rc::new(String::new()),
+        }
+    }
+
+    pub fn get_asm_string (&mut self) -> String {
+        Rc::make_mut(&mut self.asm_string).to_string()
+    }
+
+    pub fn begin (&mut self) {
+        self.asm_string = Rc::new("\"".to_string());
+    }
+    pub fn end (&mut self) {
+        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), "
+                                \"".to_string()));
+    }
+
+    pub fn comment (&mut self, comment: &str) {
+        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("         // {}", comment)));
+    }
+
+    pub fn mulxq (&mut self, a: &str, b: &str, c: &str) {
+        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
+                                    mulxq {}, {}, {}", a, b, c)));
+    }
+
+    pub fn adcxq (&mut self, a: &str, b: &str) {
+        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
+                                    adcxq {}, {}", a, b)));
+    }
+
+    pub fn adoxq (&mut self, a: &str, b: &str) {
+        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
+                                    adoxq {}, {}", a, b)));
+    }
+
+    pub fn movq (&mut self, a: &str, b: &str) {
+        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
+                                    movq {}, {}", a, b)));
+    }
+
+    pub fn xorq (&mut self, a: &str, b: &str) {
+        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
+                                    xorq {}, {}", a, b)));
+    }
+}
+
+macro_rules! generate_array {
+    ($a_0:ident, $a_1:ident, $a:ident, $range:expr) => {
+        let mut $a_0 = Vec::new();
+        let mut $a_1 = Vec::new();
+        for i in 0..$range {
+            $a_0.push(format!("{}({})", i*8, $a));
+        }
+        for i in 0..$range {
+            $a_1.push(&*$a_0[i]);
+        }
+    }
+}
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index 541e7ed31..d1f3fa8bc 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -2,11 +2,19 @@ extern crate std;
 extern crate regex;
 use regex::Regex;
 
-const MAX_REGS: usize = 6;
+#[macro_use]
+pub mod assembler;
+pub mod arithmetic;
+
+use assembler::*;
+use arithmetic::*;
+
+use std::rc::Rc;
+
+const MAX_REGS: usize = 7;
 
-// Only works for up to
 pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
-    if (num_limbs > 3 * MAX_REGS) {//|| (MAX_REGS < 6) {
+    if (num_limbs > 3 * MAX_REGS) {
         panic!("Number of limbs must be <= {} and MAX_REGS >= 6", 3*MAX_REGS);
     }
     let mut macro_string = String::from(
@@ -23,6 +31,30 @@ pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
     macro_string
 }
 
+fn generate_asm_mul_string (limbs: usize, a: &str, b: &str) -> String {
+    let modulus = "$1";
+    generate_array!(a0, a1, a, limbs);
+    generate_array!(b0, b1, b, limbs);
+    generate_array!(m, m1, modulus, limbs);
+
+    let mut asm = Assembler::new(limbs);
+
+    asm.begin();
+    for i in 0..limbs {
+        if i == 0 {
+            arithmetic::mul_1(&mut asm, a1[0], &b1);
+        } else {
+            arithmetic::mul_add_1(&mut asm, &a1, &b1, i);
+        }
+        arithmetic::mul_add_shift_1(&mut asm, &m1, i);
+    }
+    for i in 0..asm.limbs {
+        asm.movq(R[i], a1[i]);
+    }
+    asm.end();
+    asm.get_asm_string()
+}
+
 fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -> String {
     for i in 2..(num_limbs+1) {
         let mut limb_specialisation = format!("
@@ -37,14 +69,14 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
             b = "$4";
             spills_declare = String::from("                        // $4");
         }
-        if i > MAX_REGS {
-            let extra_reg = if i <= 2*MAX_REGS { 2*(i-MAX_REGS) } else { i };
-            limb_specialisation = format!("{}{}", limb_specialisation, format!("
-                    let mut spills = [0u64; {}];", extra_reg));
-            if is_mul { spills = "$5"; } else { spills = "$4";}
-            spills_declare = format!(",                       // ${}
-                              \"r\"(&mut spills)                // {}", 3+(is_mul as usize), spills);
-        }
+        // if i > MAX_REGS {
+        //     let extra_reg = if i <= 2*MAX_REGS { 2*(i-MAX_REGS) } else { i };
+        //     limb_specialisation = format!("{}{}", limb_specialisation, format!("
+        //             let mut spills = [0u64; {}];", extra_reg));
+        //     if is_mul { spills = "$5"; } else { spills = "$4";}
+        //     spills_declare = format!(",                       // ${}
+        //                       \"r\"(&mut spills)                // {}", 3+(is_mul as usize), spills);
+        // }
 
         // Actual asm declaration
         limb_specialisation = format!("{}{}", limb_specialisation, format!("
@@ -59,7 +91,7 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
                         );
                     }}
                 }}",
-                asm_string = transform_asm_mul_string(i, generate_asm_mul_string(i, "$0", b), spills, "$0"),
+                asm_string = transform_asm_mul_string(i, generate_asm_mul_string(i, "$0", b), "%rsp", "$0"),
                 rs_clobber=rs_clobber,
                 b_declare=b_declare,
                 spills_declare=spills_declare));
@@ -75,86 +107,8 @@ fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -
     macro_string
 }
 
-fn generate_asm_mul_string (limbs: usize, a: &str, b: &str) -> String {
-    let mut asm_string = String::from("");
-    for i in 0..limbs {
-        // First inner loop
-        if i == 0 {
-            asm_string = format!("{}{}", asm_string,format!("\"
-                            movq 0({a}), %rdx
-                            xorq %rcx, %rcx
-                                mulxq 0({b}), %r8, %r9",
-                                a=a, b=b));
-            for j in 1..limbs-1 {
-                asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}({b}), %rax, %r{}
-                                adcxq %rax, %r{}",
-                                j*8, 8 + ((j+1) % limbs), 8+j, b=b));
-            }
-            asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}({b}), %rax, %rcx
-                                mov $2, %rbx
-                                adcxq %rax, %r{}
-                                adcxq %rbx, %rcx               // %rcx is carry1",
-                                (limbs-1)*8, 8+limbs-1, b=b));
-        } else {
-            asm_string = format!("{}{}", asm_string, format!("
-                            movq {}($0), %rdx", i * 8));
-            for j in 0..limbs-1 {
-                asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}({b}), %rax, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rbx, %r{}",
-                                j * 8, 8 + ((j+i) % limbs), 8 + ((j+i+1) % limbs), b=b));
-                }
-            asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}({b}), %rax, %rcx
-                                mov $2, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rbx, %rcx
-                                adcxq %rbx, %rcx",
-                                (limbs-1) * 8,
-                                8 + ((i+limbs-1) % limbs),
-                                b=b));
-        }
-        // Second inner loop
-        asm_string = format!("{}{}", asm_string, format!("
-                            movq $3, %rdx
-                            mulxq %r{}, %rdx, %rax            // wrapping_mul", 8+i));
-        asm_string = format!("{}{}", asm_string, format!("
-                                mulxq 0($1), %rax, %rbx
-                                adcxq %r{}, %rax              // put junk in rax
-                                adoxq %rbx, %r{}",
-                                8 + (i % limbs),
-                                8 + ((i+1) % limbs)));
-        for j in 1..limbs-1 {
-            asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rbx, %r{}",
-                                j * 8,
-                                8 + ((j+i) % limbs),
-                                8 + ((j+i+1) % limbs)));
-        }
-        asm_string = format!("{}{}", asm_string, format!("
-                                mulxq {}($1), %rax, %r{2}
-                                mov $2, %rbx
-                                adcxq %rax, %r{}
-                                adoxq %rcx, %r{2}
-                                adcxq %rbx, %r{2}",
-                                (limbs-1)*8,
-                                8 + ((i+limbs-1) % limbs),
-                                8 + ((i) % limbs)));
-    }
-    for i in 0..limbs {
-        asm_string = format!("{}{}", asm_string, format!("
-                            movq %r{}, {}($0)", 8+(i % limbs), i*8));
-    }
-    format!("{}{}", asm_string, "
-                        \"")
-}
-
-
+// This hacky way of partitioning registers can be
+// replaced by something more generic like graph colouring
 fn get_registers (limbs: usize) -> (usize, Vec<Vec<usize>>) {
     assert!(limbs <= 3*MAX_REGS);
 
@@ -182,7 +136,7 @@ fn get_registers (limbs: usize) -> (usize, Vec<Vec<usize>>) {
             values.push(vec![i, MAX_REGS+i]);
         }
         (n_spills, values)
-    } else { // (if limbs <= MAX_REGS * 3)
+    } else { // if limbs <= MAX_REGS * 3
         let mut values = Vec::new();
         for i in 0..MAX_REGS {
             if i < limbs - 2*MAX_REGS {
@@ -195,11 +149,19 @@ fn get_registers (limbs: usize) -> (usize, Vec<Vec<usize>>) {
     }
 }
 
+// fn get_registers_ (limbs: usize) -> {
+//     if limbs > MAX_REGS {
+//         let mut values = Vec::new();
+//         for i in 0..limbs-MAX_REGS {
+//
+//         }
+//         (limbs-MAX_REGS, )
+//     }
+// }
+
 // This is a compilation pass which converts abstract
-// register numbers into x64 registers with spills.
-// Rather hacky at this stage
+// register numbers into x64 registers with spills. (Unfortunately, rather hacky)
 fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a: &str) -> String {
-    // println!("{}", asm_string);
     let (n_spills, spillable) = get_registers(limbs);
     let mut lines = asm_string.split("\n");
 
@@ -211,6 +173,8 @@ fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a:
 
     let mut edited_lines: Vec<String> = Vec::new();
 
+    // For every given register equivalence class,
+    // we collect a list of their occurance in sequential order
     for line in lines {
         edited_lines.push(line.to_string());
         line_number += 1;
@@ -225,6 +189,8 @@ fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a:
                             reg_sequence[i].push((line_number, *reg_num-8));
     }    }    }    }    }    }
 
+    // We then extract a sequence of swap points,
+    // where there are switches between the registers marked for the same colour
     let mut swap_sequence: Vec<Vec<(usize, usize, usize)>> = std::iter::repeat(vec![]).take(n_spills).collect::<Vec<_>>();
     for i in 0..n_spills {
         let length = reg_sequence[i].len();
@@ -239,7 +205,8 @@ fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a:
         swap_sequence[i].push((reg_sequence[i][length-1].0,
                                reg_sequence[i][length-1].1,
                                reg_sequence[i][length-1].1));
-    }
+        }
+        // Finally, we insert code to swap the registers to and from memory
         let length = swap_sequence[i].len();
         if length > 1 && spillable[i].len() <= 2 {
             for j in 0..length {
@@ -261,7 +228,7 @@ fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a:
                                 reg=8+spillable[i][0], index1=index1*8, dest=a));
             edited_lines[&swap_sequence[i][length-2].0-1] = "".to_string();
             edited_lines[&swap_sequence[i][length-1].0-1] = "".to_string();
-        } else {
+        } else { // If we have 3 virtual registers allocated to our given register
             for j in 0..length {
                 let swap = &swap_sequence[i][j];
                 if j < length - 4 {
@@ -281,6 +248,7 @@ fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a:
             edited_lines[&swap_sequence[i][length-1].0-1] = "".to_string();
         }
     }
+    // Remove lines marked for removal
     let length = edited_lines.len();
     for i in 0..limbs+1 {
         if edited_lines[length-1-i] == "" {
@@ -288,7 +256,7 @@ fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a:
         }
     }
     let mut interspersed = edited_lines[..].join("\n");
-    for i in 0..n_spills {
+    for i in 0..n_spills { // Replace virtual register numbers with physical registers
         interspersed = interspersed.replace(&format!("%r{}", 8+spillable[i][1]), &format!("%r{}", 8+spillable[i][0]));
         if spillable[i].len() == 3 {
             interspersed = interspersed.replace(&format!("%r{}", 8+spillable[i][2]), &format!("%r{}", 8+spillable[i][0]));

From c46481b0e1bfb65e737eab67f94e8bac43715e40 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 16 Apr 2020 06:53:43 +0800
Subject: [PATCH 08/43] Add: context.

---
 algebra-core/build.rs                         |   2 +-
 algebra-core/field-assembly/src/arithmetic.rs |  31 +-
 algebra-core/field-assembly/src/assembler.rs  |  18 +-
 algebra-core/field-assembly/src/context.rs    | 121 +++++++
 algebra-core/field-assembly/src/lib.rs        | 294 +++++-------------
 algebra-core/src/fields/macros.rs             |   6 +-
 6 files changed, 230 insertions(+), 242 deletions(-)
 create mode 100644 algebra-core/field-assembly/src/context.rs

diff --git a/algebra-core/build.rs b/algebra-core/build.rs
index 5d2463640..f828e910d 100644
--- a/algebra-core/build.rs
+++ b/algebra-core/build.rs
@@ -4,7 +4,7 @@ use std::path::Path;
 
 use field_assembly::generate_macro_string;
 
-const NUM_LIMBS: usize = 18;
+const NUM_LIMBS: usize = 8;
 
 fn main() {
     let out_dir = env::var_os("OUT_DIR").unwrap();
diff --git a/algebra-core/field-assembly/src/arithmetic.rs b/algebra-core/field-assembly/src/arithmetic.rs
index 22c44b02a..d455538e1 100644
--- a/algebra-core/field-assembly/src/arithmetic.rs
+++ b/algebra-core/field-assembly/src/arithmetic.rs
@@ -1,23 +1,22 @@
 use crate::assembler::*;
-use std::rc::Rc;
+// TODO: Replace assembler with macro to write movq instead of asm.movq
 
 // Computes [rcx, r(8+limbs), ..., r8] =  0($a) * [r(8+limbs), ..., r8]
-pub fn mul_1 (asm: &mut Assembler, a: &str, b: &Vec<&str>) {
+pub fn mul_1 (asm: &mut Assembler, a: &str, b: &Vec<&str>, zero: &str) {
     asm.movq(a, RDX);
-    asm.xorq(RCX, RCX);
     asm.mulxq(b[0], R[0], R[1]);
     for j in 1..asm.limbs-1 {
         asm.mulxq(b[j], RAX, R[((j + 1) % asm.limbs)]);
         asm.adcxq(RAX, R[j]);
     }
     asm.mulxq(b[asm.limbs-1], RAX, RCX);
-    asm.movq("$2", RBX);
+    asm.movq(zero, RBX);
     asm.adcxq(RAX, R[asm.limbs-1]);
     asm.adcxq(RBX, RCX);
 }
 
 
-pub fn mul_add_1 (asm: &mut Assembler, a: &Vec<&str>, b: &Vec<&str>, i: usize) {
+pub fn mul_add_1 (asm: &mut Assembler, a: &Vec<&str>, b: &Vec<&str>, zero: &str, i: usize) {
     asm.movq(a[i], RDX);
     for j in 0..asm.limbs-1 {
         asm.mulxq(b[j], RAX, RBX);
@@ -25,14 +24,14 @@ pub fn mul_add_1 (asm: &mut Assembler, a: &Vec<&str>, b: &Vec<&str>, i: usize) {
         asm.adoxq(RBX, R[(j+i+1) % asm.limbs]);
     }
     asm.mulxq(b[asm.limbs-1], RAX, RCX);
-    asm.movq("$2", RBX);
+    asm.movq(zero, RBX);
     asm.adcxq(RAX, R[(i+asm.limbs-1) % asm.limbs]);
     asm.adoxq(RBX, RCX);
     asm.adcxq(RBX, RCX);
 }
 
-pub fn mul_add_shift_1 (asm: &mut Assembler, a: &Vec<&str>, i: usize) {
-    asm.movq("$3", RDX);
+pub fn mul_add_shift_1 (asm: &mut Assembler, a: &Vec<&str>, inverse: &str, zero: &str, i: usize) {
+    asm.movq(inverse, RDX);
     asm.mulxq(R[i], RDX, RAX);
     asm.mulxq(a[0], RAX, RBX);
     asm.adcxq(R[i % asm.limbs], RAX);
@@ -43,8 +42,22 @@ pub fn mul_add_shift_1 (asm: &mut Assembler, a: &Vec<&str>, i: usize) {
         asm.adoxq(RBX, R[(j+i+1) % asm.limbs]);
     }
     asm.mulxq(a[asm.limbs-1], RAX, R[i % asm.limbs]);
-    asm.movq("$2", RBX);
+    asm.movq(zero, RBX);
     asm.adcxq(RAX, R[(i+asm.limbs-1) % asm.limbs]);
     asm.adoxq(RCX, R[i % asm.limbs]);
     asm.adcxq(RBX, R[i % asm.limbs]);
 }
+
+// Computes [rcx, r(8+limbs), ..., r8] =  0($a) * [r(8+limbs), ..., r8]
+pub fn mul_1_mov (asm: &mut Assembler, a: &str, b: &Vec<&str>) {
+    asm.movq(a, RDX);
+    asm.mulxq(b[0], R[0], R[1]);
+    for j in 1..asm.limbs-1 {
+        asm.mulxq(b[j], RAX, R[((j + 1) % asm.limbs)]);
+        asm.adcxq(RAX, R[j]);
+    }
+    asm.mulxq(b[asm.limbs-1], RAX, RCX);
+    asm.movq("$2", RBX);
+    asm.adcxq(RAX, R[asm.limbs-1]);
+    asm.adcxq(RBX, RCX);
+}
diff --git a/algebra-core/field-assembly/src/assembler.rs b/algebra-core/field-assembly/src/assembler.rs
index 6c419ee2c..bc2bf90d3 100644
--- a/algebra-core/field-assembly/src/assembler.rs
+++ b/algebra-core/field-assembly/src/assembler.rs
@@ -6,8 +6,7 @@ pub const RCX: &'static str = "%rcx";
 pub const RDX: &'static str = "%rdx";
 pub const RDI: &'static str = "%rdi";
 pub const RSI: &'static str = "%rsi";
-pub const R: [&'static str; 18] = ["%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15",
-                                    "%r16", "%r17", "%r18", "%r19", "%r20", "%r21", "%r22", "%r23", "%r24", "%r25"];
+pub const R: [&'static str; 8] = ["%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"];
 
 pub struct Assembler {
     pub limbs: usize,
@@ -15,6 +14,11 @@ pub struct Assembler {
 
 }
 
+// TODO: assembler using closures
+// macro_rules! create_assembler {
+//
+// }
+
 impl<'a> Assembler {
     pub fn new (limbs: usize) -> Assembler {
         Assembler {
@@ -41,27 +45,27 @@ impl<'a> Assembler {
 
     pub fn mulxq (&mut self, a: &str, b: &str, c: &str) {
         self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                    mulxq {}, {}, {}", a, b, c)));
+                                mulxq {}, {}, {}", a, b, c)));
     }
 
     pub fn adcxq (&mut self, a: &str, b: &str) {
         self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                    adcxq {}, {}", a, b)));
+                                adcxq {}, {}", a, b)));
     }
 
     pub fn adoxq (&mut self, a: &str, b: &str) {
         self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                    adoxq {}, {}", a, b)));
+                                adoxq {}, {}", a, b)));
     }
 
     pub fn movq (&mut self, a: &str, b: &str) {
         self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                    movq {}, {}", a, b)));
+                                movq {}, {}", a, b)));
     }
 
     pub fn xorq (&mut self, a: &str, b: &str) {
         self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                    xorq {}, {}", a, b)));
+                                xorq {}, {}", a, b)));
     }
 }
 
diff --git a/algebra-core/field-assembly/src/context.rs b/algebra-core/field-assembly/src/context.rs
new file mode 100644
index 000000000..921d5d4de
--- /dev/null
+++ b/algebra-core/field-assembly/src/context.rs
@@ -0,0 +1,121 @@
+use std::rc::Rc;
+use std::collections::HashMap;
+
+
+pub const REG_CLOBBER: [&'static str; 8] = ["r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"];
+
+#[derive(Clone)]
+pub struct Context {
+    ctx_string: Rc<String>,
+    declarations: HashMap<String, Declare>,
+    declaration_vec: Vec<Declare>,
+    clobbers: Vec<String>
+}
+
+#[derive(Clone)]
+struct Declare {
+    ty: String,
+    var: String,
+    pos: usize,
+    token: String,
+}
+
+impl Context {
+    pub fn new () -> Self {
+        Context {
+            ctx_string: Rc::new(String::new()),
+            declarations: HashMap::new(),
+            declaration_vec: Vec::new(),
+            clobbers: Vec::new(),
+        }
+    }
+
+    pub fn get_string (&mut self) -> String {
+        Rc::make_mut(&mut self.ctx_string).to_string()
+    }
+
+    pub fn reset (&mut self) {
+        self.declarations.clear();
+        self.declaration_vec.clear();
+        self.clobbers.clear();
+    }
+
+    pub fn get(self, id: &str) -> String {
+        self.declarations.get(&id.to_string()).unwrap().token.clone()
+    }
+
+    pub fn try_get(self, id: &str, fallback_id: &str) -> String {
+        match self.declarations.get(&id.to_string()) {
+            Some(dec) => dec.token.clone(),
+            None => self.declarations.get(&fallback_id.to_string()).unwrap().token.clone(),
+        }
+    }
+
+    pub fn add_declaration (&mut self, id: &str, ty: &str, var: &str) {
+        self.declarations.insert(id.to_string(), Declare {
+            ty: ty.to_string(),
+            var: var.to_string(),
+            pos: self.declarations.len(),
+            token: format!("${}", self.declarations.len()),
+        });
+        self.declaration_vec.push(Declare {
+            ty: ty.to_string(),
+            var: var.to_string(),
+            pos: self.declaration_vec.len(),
+            token: format!("${}", self.declaration_vec.len()),
+        });
+
+    }
+
+    pub fn add_limb (&mut self, limb: usize) {
+        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
+                {} => {{", limb)));
+    }
+
+    pub fn add_buffer (&mut self, extra_reg: usize) {
+        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
+                    let mut spill_buffer = [0u64; {}];", extra_reg)));
+    }
+
+    pub fn add_asm (&mut self, ctx_string: String) {
+        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
+                    unsafe {{
+                        asm!({}
+                            :
+                            :", ctx_string)));
+    }
+
+    pub fn add_clobber_from_vec (&mut self, clobbers: Vec<&str>) {
+        for clobber in clobbers {
+            self.clobbers.push(format!(" \"{}\"", clobber));
+        }
+    }
+
+    pub fn add_clobber (&mut self, clobber: &str) {
+        self.clobbers.push(format!(" \"{}\"", clobber));
+    }
+
+    pub fn build (&mut self) {
+        for i in 0..self.declarations.len() {
+            let dec = &self.declaration_vec[i];
+            let last = i == self.declarations.len()-1;
+            self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
+                            \"{}\"({}){}      // {}", dec.ty, dec.var, if last {""} else {","}, dec.pos)));
+        }
+        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
+                            : {}
+                        );
+                    }}
+                }}", self.clobbers.join(","))));
+    }
+
+    pub fn end (&mut self, num_limbs: usize) {
+        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
+            x => panic!(\"asm_mul (no-carry): number of limbs supported is 2 up to {}. You had {{}}.\", x)
+        }};
+    }}
+}}
+",
+        num_limbs)));
+    }
+}
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index d1f3fa8bc..61afa07ce 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -1,266 +1,116 @@
 extern crate std;
-extern crate regex;
-use regex::Regex;
 
 #[macro_use]
 pub mod assembler;
 pub mod arithmetic;
+pub mod context;
 
 use assembler::*;
-use arithmetic::*;
+use arithmetic as ar;
+use context::*;
 
-use std::rc::Rc;
-
-const MAX_REGS: usize = 7;
+const MAX_REGS: usize = 6;
 
 pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
-    if (num_limbs > 3 * MAX_REGS) {
+    if num_limbs > 3 * MAX_REGS {
         panic!("Number of limbs must be <= {} and MAX_REGS >= 6", 3*MAX_REGS);
     }
     let mut macro_string = String::from(
     "macro_rules! asm_mul {
         ($limbs:expr, $a:expr, $b:expr, $modulus:expr, $inverse:expr) => {
             match $limbs {");
-    macro_string = generate_matches(num_limbs, macro_string, true);
+    macro_string = format!("{}{}", macro_string, generate_matches(num_limbs, true));
 
     macro_string = format!("{}{}", macro_string,
     "macro_rules! asm_square {
         ($limbs:expr, $a:expr, $modulus:expr, $inverse:expr) => {
             match $limbs {");
-    macro_string = generate_matches(num_limbs, macro_string, false);
+    macro_string = format!("{}{}", macro_string, generate_matches(num_limbs, false));
     macro_string
 }
 
-fn generate_asm_mul_string (limbs: usize, a: &str, b: &str) -> String {
-    let modulus = "$1";
+fn generate_asm_mul_string (ctx: &Context, limbs: usize) -> String {
+    let a = ctx.clone().get("a");
+    let b = ctx.clone().try_get("b", "a");
+    let modulus = ctx.clone().get("modulus");
+    let zero = ctx.clone().get("0");
+    let inverse = ctx.clone().get("inverse");
+
     generate_array!(a0, a1, a, limbs);
     generate_array!(b0, b1, b, limbs);
     generate_array!(m, m1, modulus, limbs);
+    // if limbs > 8 {
+    //     generate_array!(s, s1, spills, limbs * 2);
+    // }
 
     let mut asm = Assembler::new(limbs);
 
     asm.begin();
-    for i in 0..limbs {
-        if i == 0 {
-            arithmetic::mul_1(&mut asm, a1[0], &b1);
-        } else {
-            arithmetic::mul_add_1(&mut asm, &a1, &b1, i);
-        }
-        arithmetic::mul_add_shift_1(&mut asm, &m1, i);
-    }
-    for i in 0..asm.limbs {
-        asm.movq(R[i], a1[i]);
-    }
-    asm.end();
-    asm.get_asm_string()
-}
 
-fn generate_matches (num_limbs: usize, mut macro_string: String, is_mul: bool) -> String {
-    for i in 2..(num_limbs+1) {
-        let mut limb_specialisation = format!("
-                {} => {{", i);
-        // logic to format macro based on how many limbs there are, whether it is a mul
-        let (mut b_declare, mut spills_declare, mut b, mut spills) = ("                   // $3", String::from(""), "$0", "");
-        let mut rs_clobber = String::from("");
-        for k in 0..std::cmp::min(i, MAX_REGS) { rs_clobber = format!("{}{}", rs_clobber, format!("\"r{}\", ", 8+k)); }
-        if is_mul {
-            b_declare = ",                  // $3
-                              \"r\"(&$b)";
-            b = "$4";
-            spills_declare = String::from("                        // $4");
-        }
-        // if i > MAX_REGS {
-        //     let extra_reg = if i <= 2*MAX_REGS { 2*(i-MAX_REGS) } else { i };
-        //     limb_specialisation = format!("{}{}", limb_specialisation, format!("
-        //             let mut spills = [0u64; {}];", extra_reg));
-        //     if is_mul { spills = "$5"; } else { spills = "$4";}
-        //     spills_declare = format!(",                       // ${}
-        //                       \"r\"(&mut spills)                // {}", 3+(is_mul as usize), spills);
-        // }
-
-        // Actual asm declaration
-        limb_specialisation = format!("{}{}", limb_specialisation, format!("
-                    unsafe {{
-                        asm!({asm_string}
-                            :
-                            : \"r\"(&mut $a),                   // $0
-                              \"r\"(&$modulus),                 // $1
-                              \"i\"(0u64),                      // $2
-                              \"i\"($inverse){b_declare}{spills_declare}
-                            : \"rcx\", \"rbx\", \"rdx\", \"rax\", {rs_clobber}\"cc\", \"memory\"
-                        );
-                    }}
-                }}",
-                asm_string = transform_asm_mul_string(i, generate_asm_mul_string(i, "$0", b), "%rsp", "$0"),
-                rs_clobber=rs_clobber,
-                b_declare=b_declare,
-                spills_declare=spills_declare));
-        macro_string = format!("{}{}", macro_string, limb_specialisation);
-    }
-    macro_string = format!("{}{}", macro_string, format!("
-            x => panic!(\"asm_mul (no-carry): number of limbs supported is 2 up to {}. You had {{}}.\", x)
-        }};
-    }}
-}}
-
-", num_limbs));
-    macro_string
-}
-
-// This hacky way of partitioning registers can be
-// replaced by something more generic like graph colouring
-fn get_registers (limbs: usize) -> (usize, Vec<Vec<usize>>) {
-    assert!(limbs <= 3*MAX_REGS);
-
-    if limbs <= MAX_REGS {
-        (0, Vec::new())
-    } else if limbs == MAX_REGS + 1 {
-        (1, vec![
-                vec![MAX_REGS/2, MAX_REGS]
-                ])
-    } else if limbs == MAX_REGS + 2 {
-        (2, vec![
-                vec![MAX_REGS/2, MAX_REGS],
-                vec![MAX_REGS/2+1, MAX_REGS+1]
-                ])
-    } else if limbs == MAX_REGS + 3 {
-        (3, vec![
-                vec![MAX_REGS/2, MAX_REGS],
-                vec![MAX_REGS/2+1, MAX_REGS+1],
-                vec![MAX_REGS/2+2, MAX_REGS+2]
-                ])
-    } else if limbs <= MAX_REGS * 2 {
-        let n_spills = limbs - MAX_REGS;
-        let mut values = Vec::new();
-        for i in 0..n_spills {
-            values.push(vec![i, MAX_REGS+i]);
-        }
-        (n_spills, values)
-    } else { // if limbs <= MAX_REGS * 3
-        let mut values = Vec::new();
-        for i in 0..MAX_REGS {
-            if i < limbs - 2*MAX_REGS {
-                values.push(vec![i, MAX_REGS+i, 2*MAX_REGS+i]);
+    // if limbs <= 8 {
+        asm.xorq(RCX, RCX);
+        for i in 0..limbs {
+            if i == 0 {
+                ar::mul_1(&mut asm, a1[0], &b1, &zero);
             } else {
-                values.push(vec![i, MAX_REGS+i]);
+                ar::mul_add_1(&mut asm, &a1, &b1, &zero, i);
             }
+            ar::mul_add_shift_1(&mut asm, &m1, &inverse, &zero, i);
+        }
+        for i in 0..asm.limbs {
+            asm.movq(R[i], a1[i]);
         }
-        (MAX_REGS, values)
-    }
-}
-
-// fn get_registers_ (limbs: usize) -> {
-//     if limbs > MAX_REGS {
-//         let mut values = Vec::new();
-//         for i in 0..limbs-MAX_REGS {
-//
-//         }
-//         (limbs-MAX_REGS, )
-//     }
-// }
 
-// This is a compilation pass which converts abstract
-// register numbers into x64 registers with spills. (Unfortunately, rather hacky)
-fn transform_asm_mul_string (limbs: usize, asm_string: String, spills: &str, a: &str) -> String {
-    let (n_spills, spillable) = get_registers(limbs);
-    let mut lines = asm_string.split("\n");
+    // } else {
+    //     asm.xorq(RCX, RCX);
+    //     for i in 0..8 {
+    //         if i == 0 {
+    //             ar::mul_1_mov(&mut asm, a1[0], &b1, 0);
+    //         } else {
+    //             ar::mul_add_1(&mut asm, &a1, &b1, i);
+    //         }
+    //     }
+    //     for i in 0..8 {
+    //         ar::mul_add_1(&mut asm, &m1, 0);
+    //     }
+    //     for i in 0..asm.limbs {
+    //         asm.movq(R[i], a1[i]);
+    //     }
+    //
+    // }
 
-    let re = Regex::new(r"%r\d+").unwrap();
-    let number = Regex::new(r"\d+").unwrap();
+    asm.end();
 
-    let mut line_number = 0;
-    let mut reg_sequence: Vec<Vec<(usize, usize)>> = std::iter::repeat(vec![]).take(n_spills).collect::<Vec<_>>();
+    asm.get_asm_string()
+}
 
-    let mut edited_lines: Vec<String> = Vec::new();
+fn generate_matches (num_limbs: usize, is_mul: bool) -> String {
+    let mut ctx = Context::new();
+    for limbs in 2..(num_limbs+1) {
+        ctx.reset();
+
+        ctx.add_declaration("a", "r", "&mut $a");
+        if is_mul { ctx.add_declaration("b", "r", "&$b"); }
+        ctx.add_declaration("modulus", "r", "&$modulus");
+        ctx.add_declaration("0", "i", "0u64");
+        ctx.add_declaration("inverse", "i", "$inverse");
+
+        ctx.add_limb(limbs);
+        if limbs > 8 {
+            ctx.add_buffer(2*limbs);
+            ctx.add_declaration("buf", "r", "&mut spill_buffer");
+        }
 
-    // For every given register equivalence class,
-    // we collect a list of their occurance in sequential order
-    for line in lines {
-        edited_lines.push(line.to_string());
-        line_number += 1;
-        if re.is_match(&line.to_string()) {
-            let words = line.split(" ");
-            for word in words {
-                if re.is_match(&word.to_string()) {
-                    let num = number.captures(word).unwrap();
-                    let reg_num = &num[0].parse::<usize>().unwrap();
-                    for i in 0..n_spills {
-                        if spillable[i].contains(&(*reg_num-8)) {
-                            reg_sequence[i].push((line_number, *reg_num-8));
-    }    }    }    }    }    }
+        let asm_string = generate_asm_mul_string(&ctx, limbs);
 
-    // We then extract a sequence of swap points,
-    // where there are switches between the registers marked for the same colour
-    let mut swap_sequence: Vec<Vec<(usize, usize, usize)>> = std::iter::repeat(vec![]).take(n_spills).collect::<Vec<_>>();
-    for i in 0..n_spills {
-        let length = reg_sequence[i].len();
-        if length > 0 {
-            for j in 0..reg_sequence[i].len()-1 {
-                if reg_sequence[i][j].1 != reg_sequence[i][j+1].1 {
-                    swap_sequence[i].push((reg_sequence[i][j].0,        // line number
-                                           reg_sequence[i][j].1,        // current reg index
-                                           reg_sequence[i][j+1].1));    // next reg index
-                }
-            }
-        swap_sequence[i].push((reg_sequence[i][length-1].0,
-                               reg_sequence[i][length-1].1,
-                               reg_sequence[i][length-1].1));
-        }
-        // Finally, we insert code to swap the registers to and from memory
-        let length = swap_sequence[i].len();
-        if length > 1 && spillable[i].len() <= 2 {
-            for j in 0..length {
-                let swap = &swap_sequence[i][j];
-                if j < length - 3 {
-                    let index1 = if swap.1 >= MAX_REGS { n_spills + i } else { i };
-                    let index2 =  if swap.2 >= MAX_REGS { n_spills + i } else { i };
-                    edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
-                                movq %r{reg}, {index1}({dest})
-                                movq {index2}({spills}), %r{reg}",
-                                reg=8+spillable[i][0], index1=index1*8, index2=index2*8,
-                                dest=if j!=length-4 {spills} else {a}, spills=spills));
-                }
-            }
-            let swap = &swap_sequence[i][length-3];
-            let index1 = if swap.1 >= MAX_REGS { n_spills + i } else { i };
-            edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
-                                movq %r{reg}, {index1}({dest})",
-                                reg=8+spillable[i][0], index1=index1*8, dest=a));
-            edited_lines[&swap_sequence[i][length-2].0-1] = "".to_string();
-            edited_lines[&swap_sequence[i][length-1].0-1] = "".to_string();
-        } else { // If we have 3 virtual registers allocated to our given register
-            for j in 0..length {
-                let swap = &swap_sequence[i][j];
-                if j < length - 4 {
-                    edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
-                                movq %r{reg}, {index1}({dest})
-                                movq {index2}({spills}), %r{reg}",
-                                reg=8+spillable[i][0], index1=swap.1*8, index2=swap.2*8,
-                                dest=if j!=length-5 && j!=length-6 {spills} else {a}, spills=spills));
-                }
-            }
-            let swap = &swap_sequence[i][length-4];
-            edited_lines[swap.0-1] = format!("{}{}", edited_lines[swap.0-1], format!("
-                                movq %r{reg}, {index1}({dest})",
-                                reg=8+spillable[i][0], index1=swap.1*8, dest=a));
-            edited_lines[&swap_sequence[i][length-3].0-1] = "".to_string();
-            edited_lines[&swap_sequence[i][length-2].0-1] = "".to_string();
-            edited_lines[&swap_sequence[i][length-1].0-1] = "".to_string();
-        }
-    }
-    // Remove lines marked for removal
-    let length = edited_lines.len();
-    for i in 0..limbs+1 {
-        if edited_lines[length-1-i] == "" {
-            edited_lines.remove(length-1-i);
-        }
-    }
-    let mut interspersed = edited_lines[..].join("\n");
-    for i in 0..n_spills { // Replace virtual register numbers with physical registers
-        interspersed = interspersed.replace(&format!("%r{}", 8+spillable[i][1]), &format!("%r{}", 8+spillable[i][0]));
-        if spillable[i].len() == 3 {
-            interspersed = interspersed.replace(&format!("%r{}", 8+spillable[i][2]), &format!("%r{}", 8+spillable[i][0]));
+        ctx.add_asm(asm_string);
+        ctx.add_clobber_from_vec(vec!["rcx", "rbx", "rdx", "rax"]);
+        for j in 0..std::cmp::min(limbs, 8) {
+            ctx.add_clobber(REG_CLOBBER[j]);
         }
+        ctx.add_clobber_from_vec(vec!["cc", "memory"]);
+        ctx.build();
     }
-    interspersed
+    ctx.end(num_limbs);
+    ctx.get_string()
 }
diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index c78e8469f..bd759392d 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -441,7 +441,7 @@ macro_rules! impl_Fp {
 
 macro_rules! impl_field_mul_assign {
     ($limbs:expr) => {
-        #[inline]
+        #[inline(never)]
         #[unroll_for_loops]
         fn mul_assign(&mut self, other: &Self) {
             // Checking the modulus at compile time
@@ -454,7 +454,7 @@ macro_rules! impl_field_mul_assign {
 
             // No-carry optimisation applied to CIOS
             if no_carry {
-                if $limbs <= 18 {
+                if $limbs <= 6 {
                     asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
                     self.reduce();
                 } else {
@@ -536,7 +536,7 @@ macro_rules! impl_field_square_in_place {
         #[inline]
         #[unroll_for_loops]
         fn square_in_place(&mut self) -> &mut Self {
-            if $limbs <= 18 {
+            if $limbs <= 6 {
                 asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
                 self.reduce();
                 self

From 822fb30d9a0e2396717f6434bf13927e3505423c Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 16 Apr 2020 13:21:05 +0800
Subject: [PATCH 09/43] ASM DSL with procedural macros

---
 algebra-core/field-assembly/Cargo.toml        |  2 +-
 algebra-core/field-assembly/src/arithmetic.rs | 63 ------------
 algebra-core/field-assembly/src/assembler.rs  | 83 ----------------
 algebra-core/field-assembly/src/context.rs    |  1 -
 algebra-core/field-assembly/src/lib.rs        | 95 ++++++++++---------
 algebra-core/field-assembly/src/utils.rs      | 20 ++++
 algebra-core/mince/Cargo.toml                 | 14 +++
 algebra-core/mince/src/arithmetic.rs          | 59 ++++++++++++
 algebra-core/mince/src/intrinsics.rs          | 46 +++++++++
 algebra-core/mince/src/lib.rs                 | 63 ++++++++++++
 10 files changed, 252 insertions(+), 194 deletions(-)
 delete mode 100644 algebra-core/field-assembly/src/arithmetic.rs
 delete mode 100644 algebra-core/field-assembly/src/assembler.rs
 create mode 100644 algebra-core/field-assembly/src/utils.rs
 create mode 100644 algebra-core/mince/Cargo.toml
 create mode 100644 algebra-core/mince/src/arithmetic.rs
 create mode 100644 algebra-core/mince/src/intrinsics.rs
 create mode 100644 algebra-core/mince/src/lib.rs

diff --git a/algebra-core/field-assembly/Cargo.toml b/algebra-core/field-assembly/Cargo.toml
index c71b11b70..2d5c0efd2 100644
--- a/algebra-core/field-assembly/Cargo.toml
+++ b/algebra-core/field-assembly/Cargo.toml
@@ -7,4 +7,4 @@ edition = "2018"
 # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
 
 [dependencies]
-regex = ""
+mince = { path = "../mince" }
diff --git a/algebra-core/field-assembly/src/arithmetic.rs b/algebra-core/field-assembly/src/arithmetic.rs
deleted file mode 100644
index d455538e1..000000000
--- a/algebra-core/field-assembly/src/arithmetic.rs
+++ /dev/null
@@ -1,63 +0,0 @@
-use crate::assembler::*;
-// TODO: Replace assembler with macro to write movq instead of asm.movq
-
-// Computes [rcx, r(8+limbs), ..., r8] =  0($a) * [r(8+limbs), ..., r8]
-pub fn mul_1 (asm: &mut Assembler, a: &str, b: &Vec<&str>, zero: &str) {
-    asm.movq(a, RDX);
-    asm.mulxq(b[0], R[0], R[1]);
-    for j in 1..asm.limbs-1 {
-        asm.mulxq(b[j], RAX, R[((j + 1) % asm.limbs)]);
-        asm.adcxq(RAX, R[j]);
-    }
-    asm.mulxq(b[asm.limbs-1], RAX, RCX);
-    asm.movq(zero, RBX);
-    asm.adcxq(RAX, R[asm.limbs-1]);
-    asm.adcxq(RBX, RCX);
-}
-
-
-pub fn mul_add_1 (asm: &mut Assembler, a: &Vec<&str>, b: &Vec<&str>, zero: &str, i: usize) {
-    asm.movq(a[i], RDX);
-    for j in 0..asm.limbs-1 {
-        asm.mulxq(b[j], RAX, RBX);
-        asm.adcxq(RAX, R[(j+i) % asm.limbs]);
-        asm.adoxq(RBX, R[(j+i+1) % asm.limbs]);
-    }
-    asm.mulxq(b[asm.limbs-1], RAX, RCX);
-    asm.movq(zero, RBX);
-    asm.adcxq(RAX, R[(i+asm.limbs-1) % asm.limbs]);
-    asm.adoxq(RBX, RCX);
-    asm.adcxq(RBX, RCX);
-}
-
-pub fn mul_add_shift_1 (asm: &mut Assembler, a: &Vec<&str>, inverse: &str, zero: &str, i: usize) {
-    asm.movq(inverse, RDX);
-    asm.mulxq(R[i], RDX, RAX);
-    asm.mulxq(a[0], RAX, RBX);
-    asm.adcxq(R[i % asm.limbs], RAX);
-    asm.adoxq(RBX, R[(i+1) % asm.limbs]);
-    for j in 1..asm.limbs-1 {
-        asm.mulxq(a[j], RAX, RBX);
-        asm.adcxq(RAX, R[(j+i) % asm.limbs]);
-        asm.adoxq(RBX, R[(j+i+1) % asm.limbs]);
-    }
-    asm.mulxq(a[asm.limbs-1], RAX, R[i % asm.limbs]);
-    asm.movq(zero, RBX);
-    asm.adcxq(RAX, R[(i+asm.limbs-1) % asm.limbs]);
-    asm.adoxq(RCX, R[i % asm.limbs]);
-    asm.adcxq(RBX, R[i % asm.limbs]);
-}
-
-// Computes [rcx, r(8+limbs), ..., r8] =  0($a) * [r(8+limbs), ..., r8]
-pub fn mul_1_mov (asm: &mut Assembler, a: &str, b: &Vec<&str>) {
-    asm.movq(a, RDX);
-    asm.mulxq(b[0], R[0], R[1]);
-    for j in 1..asm.limbs-1 {
-        asm.mulxq(b[j], RAX, R[((j + 1) % asm.limbs)]);
-        asm.adcxq(RAX, R[j]);
-    }
-    asm.mulxq(b[asm.limbs-1], RAX, RCX);
-    asm.movq("$2", RBX);
-    asm.adcxq(RAX, R[asm.limbs-1]);
-    asm.adcxq(RBX, RCX);
-}
diff --git a/algebra-core/field-assembly/src/assembler.rs b/algebra-core/field-assembly/src/assembler.rs
deleted file mode 100644
index bc2bf90d3..000000000
--- a/algebra-core/field-assembly/src/assembler.rs
+++ /dev/null
@@ -1,83 +0,0 @@
-use std::rc::Rc;
-
-pub const RAX: &'static str = "%rax";
-pub const RBX: &'static str = "%rbx";
-pub const RCX: &'static str = "%rcx";
-pub const RDX: &'static str = "%rdx";
-pub const RDI: &'static str = "%rdi";
-pub const RSI: &'static str = "%rsi";
-pub const R: [&'static str; 8] = ["%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"];
-
-pub struct Assembler {
-    pub limbs: usize,
-    asm_string: Rc<String>,
-
-}
-
-// TODO: assembler using closures
-// macro_rules! create_assembler {
-//
-// }
-
-impl<'a> Assembler {
-    pub fn new (limbs: usize) -> Assembler {
-        Assembler {
-            limbs: limbs,
-            asm_string: Rc::new(String::new()),
-        }
-    }
-
-    pub fn get_asm_string (&mut self) -> String {
-        Rc::make_mut(&mut self.asm_string).to_string()
-    }
-
-    pub fn begin (&mut self) {
-        self.asm_string = Rc::new("\"".to_string());
-    }
-    pub fn end (&mut self) {
-        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), "
-                                \"".to_string()));
-    }
-
-    pub fn comment (&mut self, comment: &str) {
-        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("         // {}", comment)));
-    }
-
-    pub fn mulxq (&mut self, a: &str, b: &str, c: &str) {
-        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                mulxq {}, {}, {}", a, b, c)));
-    }
-
-    pub fn adcxq (&mut self, a: &str, b: &str) {
-        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                adcxq {}, {}", a, b)));
-    }
-
-    pub fn adoxq (&mut self, a: &str, b: &str) {
-        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                adoxq {}, {}", a, b)));
-    }
-
-    pub fn movq (&mut self, a: &str, b: &str) {
-        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                movq {}, {}", a, b)));
-    }
-
-    pub fn xorq (&mut self, a: &str, b: &str) {
-        self.asm_string = Rc::new(format!("{}{}", Rc::clone(&self.asm_string), format!("
-                                xorq {}, {}", a, b)));
-    }
-}
-
-macro_rules! generate_array {
-    ($a_0:ident, $a_1:ident, $a:ident, $range:expr) => {
-        let mut $a_0 = Vec::new();
-        let mut $a_1 = Vec::new();
-        for i in 0..$range {
-            $a_0.push(format!("{}({})", i*8, $a));
-        }
-        for i in 0..$range {
-            $a_1.push(&*$a_0[i]);
-        }
-    }
-}
diff --git a/algebra-core/field-assembly/src/context.rs b/algebra-core/field-assembly/src/context.rs
index 921d5d4de..0fa56f0cd 100644
--- a/algebra-core/field-assembly/src/context.rs
+++ b/algebra-core/field-assembly/src/context.rs
@@ -1,7 +1,6 @@
 use std::rc::Rc;
 use std::collections::HashMap;
 
-
 pub const REG_CLOBBER: [&'static str; 8] = ["r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"];
 
 #[derive(Clone)]
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index 61afa07ce..7e5d37f2f 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -1,14 +1,16 @@
 extern crate std;
 
 #[macro_use]
-pub mod assembler;
-pub mod arithmetic;
-pub mod context;
+pub mod utils;
+use utils::*;
 
-use assembler::*;
-use arithmetic as ar;
+pub mod context;
 use context::*;
 
+use mince::assemble;
+
+use std::cell::RefCell;
+
 const MAX_REGS: usize = 6;
 
 pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
@@ -29,59 +31,53 @@ pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
     macro_string
 }
 
-fn generate_asm_mul_string (ctx: &Context, limbs: usize) -> String {
-    let a = ctx.clone().get("a");
-    let b = ctx.clone().try_get("b", "a");
-    let modulus = ctx.clone().get("modulus");
-    let zero = ctx.clone().get("0");
-    let inverse = ctx.clone().get("inverse");
-
-    generate_array!(a0, a1, a, limbs);
-    generate_array!(b0, b1, b, limbs);
-    generate_array!(m, m1, modulus, limbs);
+#[assemble]
+fn generate_asm_mul_string (
+    a: &str,
+    b: &str,
+    modulus: &str,
+    zero: &str,
+    inverse: &str,
+    limbs: usize
+) -> String {
+    reg!(a0, a1, a, limbs);
+    reg!(b0, b1, b, limbs);
+    reg!(m, m1, modulus, limbs);
     // if limbs > 8 {
-    //     generate_array!(s, s1, spills, limbs * 2);
+    //     reg!(s, s1, spills, limbs * 2);
     // }
 
-    let mut asm = Assembler::new(limbs);
-
-    asm.begin();
-
     // if limbs <= 8 {
-        asm.xorq(RCX, RCX);
+        xorq(RCX, RCX);
         for i in 0..limbs {
             if i == 0 {
-                ar::mul_1(&mut asm, a1[0], &b1, &zero);
+                mul_1!(a1[0], b1, zero, limbs);
             } else {
-                ar::mul_add_1(&mut asm, &a1, &b1, &zero, i);
+                mul_add_1!(a1, b1, zero, i, limbs);
             }
-            ar::mul_add_shift_1(&mut asm, &m1, &inverse, &zero, i);
+            mul_add_shift_1!(m1, inverse, zero, i, limbs);
         }
-        for i in 0..asm.limbs {
-            asm.movq(R[i], a1[i]);
+        for i in 0..limbs {
+            movq(R[i], a1[i]);
         }
 
     // } else {
-    //     asm.xorq(RCX, RCX);
-    //     for i in 0..8 {
-    //         if i == 0 {
-    //             ar::mul_1_mov(&mut asm, a1[0], &b1, 0);
-    //         } else {
-    //             ar::mul_add_1(&mut asm, &a1, &b1, i);
-    //         }
-    //     }
-    //     for i in 0..8 {
-    //         ar::mul_add_1(&mut asm, &m1, 0);
-    //     }
-    //     for i in 0..asm.limbs {
-    //         asm.movq(R[i], a1[i]);
-    //     }
-    //
-    // }
+        // asm.xorq(RCX, RCX);
+        // for i in 0..8 {
+        //     if i == 0 {
+        //         ar::mul_1_mov(&mut asm, a1[0], &b1, 0);
+        //     } else {
+        //         ar::mul_add_1(&mut asm, &a1, &b1, i);
+        //     }
+        // }
+        // for i in 0..8 {
+        //     ar::mul_add_1(&mut asm, &m1, 0);
+        // }
+        // for i in 0..asm.limbs {
+        //     asm.movq(R[i], a1[i]);
+        // }
 
-    asm.end();
-
-    asm.get_asm_string()
+    // }
 }
 
 fn generate_matches (num_limbs: usize, is_mul: bool) -> String {
@@ -101,7 +97,14 @@ fn generate_matches (num_limbs: usize, is_mul: bool) -> String {
             ctx.add_declaration("buf", "r", "&mut spill_buffer");
         }
 
-        let asm_string = generate_asm_mul_string(&ctx, limbs);
+        let asm_string = generate_asm_mul_string(
+            &ctx.clone().get("a"),
+            &ctx.clone().try_get("b", "a"),
+            &ctx.clone().get("modulus"),
+            &ctx.clone().get("0"),
+            &ctx.clone().get("inverse"),
+            limbs
+        );
 
         ctx.add_asm(asm_string);
         ctx.add_clobber_from_vec(vec!["rcx", "rbx", "rdx", "rax"]);
diff --git a/algebra-core/field-assembly/src/utils.rs b/algebra-core/field-assembly/src/utils.rs
new file mode 100644
index 000000000..1120c4cd4
--- /dev/null
+++ b/algebra-core/field-assembly/src/utils.rs
@@ -0,0 +1,20 @@
+pub const RAX: &'static str = "%rax";
+pub const RBX: &'static str = "%rbx";
+pub const RCX: &'static str = "%rcx";
+pub const RDX: &'static str = "%rdx";
+pub const RDI: &'static str = "%rdi";
+pub const RSI: &'static str = "%rsi";
+pub const R: [&'static str; 8] = ["%r8", "%r9", "%r10", "%r11", "%r12", "%r13", "%r14", "%r15"];
+
+macro_rules! reg {
+    ($a_0:ident, $a_1:ident, $a:ident, $range:expr) => {
+        let mut $a_0 = Vec::new();
+        let mut $a_1 = Vec::new();
+        for i in 0..$range {
+            $a_0.push(format!("{}({})", i*8, $a));
+        }
+        for i in 0..$range {
+            $a_1.push(&*$a_0[i]);
+        }
+    }
+}
diff --git a/algebra-core/mince/Cargo.toml b/algebra-core/mince/Cargo.toml
new file mode 100644
index 000000000..7fe5e22ab
--- /dev/null
+++ b/algebra-core/mince/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "mince"
+version = "0.1.0"
+authors = ["jon-chuang <9093549+jon-chuang@users.noreply.github.com>"]
+edition = "2018"
+
+# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html
+
+[dependencies]
+quote = "1.0"
+syn = {version = "1.0.17", features = ["full"]}
+
+[lib]
+proc-macro = true
diff --git a/algebra-core/mince/src/arithmetic.rs b/algebra-core/mince/src/arithmetic.rs
new file mode 100644
index 000000000..cd2fcdb99
--- /dev/null
+++ b/algebra-core/mince/src/arithmetic.rs
@@ -0,0 +1,59 @@
+use proc_macro::TokenStream;
+use quote::quote;
+
+pub fn define_arithmetic() -> TokenStream {
+    (quote! {
+        {
+            macro_rules! mul_1 {
+                ($a:expr, $b:ident, $zero:ident, $limbs:expr) => {
+                    movq($a, RDX);
+                    mulxq($b[0], R[0], R[1]);
+                    for j in 1..$limbs-1 {
+                        mulxq($b[j], RAX, R[((j + 1) % $limbs)]);
+                        adcxq(RAX, R[j]);
+                    }
+                    mulxq($b[$limbs-1], RAX, RCX);
+                    movq($zero, RBX);
+                    adcxq(RAX, R[$limbs-1]);
+                    adcxq(RBX, RCX);
+                }
+            }
+
+            macro_rules! mul_add_1 {
+                ($a:ident, $b:ident, $zero:ident, $i:ident, $limbs:expr) => {
+                    movq($a[$i], RDX);
+                    for j in 0..$limbs-1 {
+                        mulxq($b[j], RAX, RBX);
+                        adcxq(RAX, R[(j+$i) % $limbs]);
+                        adoxq(RBX, R[(j+$i+1) % $limbs]);
+                    }
+                    mulxq($b[$limbs-1], RAX, RCX);
+                    movq($zero, RBX);
+                    adcxq(RAX, R[($i+$limbs-1) % $limbs]);
+                    adoxq(RBX, RCX);
+                    adcxq(RBX, RCX);
+                }
+            }
+
+            macro_rules! mul_add_shift_1 {
+                ($a:ident, $inverse:ident, $zero:ident, $i:ident, $limbs:expr) => {
+                    movq($inverse, RDX);
+                    mulxq(R[$i], RDX, RAX);
+                    mulxq($a[0], RAX, RBX);
+                    adcxq(R[$i % $limbs], RAX);
+                    adoxq(RBX, R[($i+1) % $limbs]);
+                    for j in 1..$limbs-1 {
+                        mulxq($a[j], RAX, RBX);
+                        adcxq(RAX, R[(j+$i) % $limbs]);
+                        adoxq(RBX, R[(j+$i+1) % $limbs]);
+                    }
+                    mulxq($a[$limbs-1], RAX, R[$i % $limbs]);
+                    movq($zero, RBX);
+                    adcxq(RAX, R[($i+$limbs-1) % $limbs]);
+                    adoxq(RCX, R[$i % $limbs]);
+                    adcxq(RBX, R[$i % $limbs]);
+                }
+            }
+        }
+    }).into()
+}
diff --git a/algebra-core/mince/src/intrinsics.rs b/algebra-core/mince/src/intrinsics.rs
new file mode 100644
index 000000000..a89056e9a
--- /dev/null
+++ b/algebra-core/mince/src/intrinsics.rs
@@ -0,0 +1,46 @@
+use proc_macro::TokenStream;
+use quote::quote;
+
+pub fn define_intrinsics() -> TokenStream {
+    (quote! {
+        {
+            let mut begin = || {
+                asm_string.replace_with(|_| "\"".to_string());
+            };
+
+            let mut end = || {
+                asm_string.replace_with(|x| format!("{}{}", x, "
+                                        \"".to_string()).clone());
+            };
+
+            let mut comment = | comment: &str | {
+                asm_string.replace_with(|x| format!("{}{}", x, format!("         // {}", comment)).clone());
+            };
+
+            let mut mulxq = | a: &str, b: &str, c: &str | {
+                asm_string.replace_with(|x| format!("{}{}", x, format!("
+                                        mulxq {}, {}, {}", a, b, c)).clone());
+            };
+
+            let mut adcxq = | a: &str, b: &str| {
+                asm_string.replace_with(|x| format!("{}{}", x, format!("
+                                        adcxq {}, {}", a, b)));
+            };
+
+            let mut adoxq = | a: &str, b: &str | {
+                asm_string.replace_with(|x| format!("{}{}", x, format!("
+                                        adoxq {}, {}", a, b)).clone());
+            };
+
+            let mut movq = | a: &str, b: &str | {
+                asm_string.replace_with(|x| format!("{}{}", x, format!("
+                                        movq {}, {}", a, b)).clone());
+            };
+
+            let mut xorq = | a: &str, b: &str | {
+                asm_string.replace_with(|x| format!("{}{}", x, format!("
+                                        xorq {}, {}", a, b)).clone());
+            };
+        }
+    }).into()
+}
diff --git a/algebra-core/mince/src/lib.rs b/algebra-core/mince/src/lib.rs
new file mode 100644
index 000000000..a5de1f843
--- /dev/null
+++ b/algebra-core/mince/src/lib.rs
@@ -0,0 +1,63 @@
+#![recursion_limit="256"]
+
+extern crate proc_macro;
+#[macro_use]
+extern crate quote;
+
+mod intrinsics;
+use intrinsics::*;
+
+mod arithmetic;
+use arithmetic::*;
+
+use proc_macro::TokenStream;
+use syn;
+use quote::quote;
+
+#[proc_macro_attribute]
+pub fn assemble (_meta: TokenStream, input: TokenStream) -> TokenStream {
+    let ast: syn::ItemFn = syn::parse(input).unwrap();
+    let sig = ast.sig;
+    let block =  ast.block;
+    let attrs = ast.attrs;
+
+    let arithmetic: syn::Block = syn::parse(define_arithmetic()).unwrap();
+    let intrinsics: syn::Block = syn::parse(define_intrinsics()).unwrap();
+
+    let begin: syn::Stmt = syn::parse((quote! { begin(); }).into()).unwrap();
+    let end: syn::Stmt = syn::parse((quote! { end(); }).into()).unwrap();
+    let ret: syn::Stmt = syn::parse((quote! { return asm_string.into_inner(); }).into()).unwrap();
+
+    let mut new_stmts = Vec::new();
+    for stmt in &intrinsics.stmts {
+        new_stmts.push(stmt.clone());
+    }
+    for stmt in &arithmetic.stmts {
+        new_stmts.push(stmt.clone());
+    }
+
+    new_stmts.push(begin);
+
+    for stmt in block.stmts {
+        new_stmts.push(stmt);
+    }
+
+    new_stmts.push(end);
+    new_stmts.push(ret);
+
+    let new_block = syn::Block {
+        brace_token: block.brace_token,
+        stmts: new_stmts,
+    };
+
+    let gen = quote! {
+        #(#attrs)
+        *
+        #sig {
+            let mut asm_string = RefCell::new(String::new());
+
+            #new_block
+        }
+    };
+    gen.into()
+}

From 4a12185f8f28c6df528e4ff39ea95047eee4b33f Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 16 Apr 2020 14:41:16 +0800
Subject: [PATCH 10/43] conditional compilation

---
 algebra-benches/Cargo.toml        | 3 +++
 algebra-core/Cargo.toml           | 1 +
 algebra-core/src/fields/macros.rs | 9 ++++++---
 algebra-core/src/lib.rs           | 9 ++++++---
 algebra/Cargo.toml                | 1 +
 5 files changed, 17 insertions(+), 6 deletions(-)

diff --git a/algebra-benches/Cargo.toml b/algebra-benches/Cargo.toml
index b958e8eca..b46395564 100644
--- a/algebra-benches/Cargo.toml
+++ b/algebra-benches/Cargo.toml
@@ -27,3 +27,6 @@ algebra = { path = "../algebra", features = [ "full" ] }
 blake2 = "0.8.1"
 rand = "0.7"
 rand_xorshift = { version = "0.2" }
+
+[features]
+asm = [ "algebra/asm"]
diff --git a/algebra-core/Cargo.toml b/algebra-core/Cargo.toml
index 8c303bd1e..9010cf8fd 100644
--- a/algebra-core/Cargo.toml
+++ b/algebra-core/Cargo.toml
@@ -41,3 +41,4 @@ default = [ "std" ]
 std = []
 parallel = [ "std", "rayon" ]
 derive = [ "algebra-core-derive" ]
+asm = []
diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index bd759392d..839db7edc 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -1,6 +1,8 @@
 macro_rules! impl_Fp {
     ($Fp:ident, $FpParameters:ident, $limbs:expr) => {
         pub trait $FpParameters: FpParameters<BigInt = BigInteger> {}
+
+        #[cfg(feature = "asm")]
         include!(concat!(env!("OUT_DIR"), "/field_assembly.rs"));
 
         #[derive(Derivative)]
@@ -438,7 +440,6 @@ macro_rules! impl_Fp {
 /// [here](https://hackmd.io/@zkteam/modular_multiplication) if
 /// `P::MODULUS` has (a) a non-zero MSB, and (b) at least one
 /// zero bit in the rest of the modulus.
-
 macro_rules! impl_field_mul_assign {
     ($limbs:expr) => {
         #[inline(never)]
@@ -454,7 +455,8 @@ macro_rules! impl_field_mul_assign {
 
             // No-carry optimisation applied to CIOS
             if no_carry {
-                if $limbs <= 6 {
+                if $limbs <= 6 && cfg!(feature = "asm") {
+                    #[cfg(feature = "asm")]
                     asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
                     self.reduce();
                 } else {
@@ -536,7 +538,8 @@ macro_rules! impl_field_square_in_place {
         #[inline]
         #[unroll_for_loops]
         fn square_in_place(&mut self) -> &mut Self {
-            if $limbs <= 6 {
+            if $limbs <= 6 && cfg!(feature = "asm") {
+                #[cfg(feature = "asm")]
                 asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
                 self.reduce();
                 self
diff --git a/algebra-core/src/lib.rs b/algebra-core/src/lib.rs
index 5971dbb58..98976e1de 100644
--- a/algebra-core/src/lib.rs
+++ b/algebra-core/src/lib.rs
@@ -4,9 +4,12 @@
 #![deny(non_shorthand_field_patterns, unused_attributes, unused_imports)]
 #![deny(unused_extern_crates, renamed_and_removed_lints, unused_allocation)]
 #![deny(unused_comparisons, bare_trait_objects, const_err, unused_must_use)]
-#![deny(unused_mut, unused_unsafe, private_in_public)]//, unsafe_code)]
-// #![forbid(unsafe_code)]
-#![feature(asm)]
+#![deny(unused_mut, unused_unsafe, private_in_public)]
+
+#![cfg_attr(not(feature = "asm"), deny(unsafe_code))]
+#![cfg_attr(not(feature = "asm"), forbid(unsafe_code))]
+
+#![cfg_attr(feature = "asm", feature(asm))]
 
 #[cfg(all(test, not(feature = "std")))]
 #[macro_use]
diff --git a/algebra/Cargo.toml b/algebra/Cargo.toml
index c88220a80..042cc4ceb 100644
--- a/algebra/Cargo.toml
+++ b/algebra/Cargo.toml
@@ -47,3 +47,4 @@ mnt6_753 = []
 std = [ "algebra-core/std" ]
 parallel = [ "std", "algebra-core/parallel" ]
 derive = [ "algebra-core/derive" ]
+asm = [ "algebra-core/asm"]

From d6f3ba5a3653387e344de509e0870556fcb41dcd Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 16 Apr 2020 16:11:51 +0800
Subject: [PATCH 11/43] target_arch and target_feature. Compile with:
 RUSTFLAGS="--emit=asm -C target-cpu=native -C target-feature=+bmi2,+adx"
 cargo +nightly bench --features asm

---
 algebra-core/build.rs             |  12 +++
 algebra-core/mince/src/lib.rs     |   2 -
 algebra-core/src/fields/macros.rs | 135 +++++++++++++++++-------------
 3 files changed, 89 insertions(+), 60 deletions(-)

diff --git a/algebra-core/build.rs b/algebra-core/build.rs
index f828e910d..25acd792a 100644
--- a/algebra-core/build.rs
+++ b/algebra-core/build.rs
@@ -2,18 +2,30 @@ use std::env;
 use std::fs;
 use std::path::Path;
 
+#[cfg(feature = "asm")]
 use field_assembly::generate_macro_string;
 
+#[cfg(feature = "asm")]
 const NUM_LIMBS: usize = 8;
 
+
 fn main() {
     let out_dir = env::var_os("OUT_DIR").unwrap();
     let dest_path = Path::new(&out_dir).join("field_assembly.rs");
 
+
+    #[cfg(feature = "asm")]
     fs::write(
         &dest_path,
         generate_macro_string(NUM_LIMBS)
     ).unwrap();
 
+
+    #[cfg(not(feature = "asm"))]
+    fs::write(
+        &dest_path,
+        ""
+    ).unwrap();
+
     println!("cargo:rerun-if-changed=build.rs");
 }
diff --git a/algebra-core/mince/src/lib.rs b/algebra-core/mince/src/lib.rs
index a5de1f843..c070b84c6 100644
--- a/algebra-core/mince/src/lib.rs
+++ b/algebra-core/mince/src/lib.rs
@@ -1,8 +1,6 @@
 #![recursion_limit="256"]
 
 extern crate proc_macro;
-#[macro_use]
-extern crate quote;
 
 mod intrinsics;
 use intrinsics::*;
diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index 839db7edc..034fe21bb 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -2,7 +2,11 @@ macro_rules! impl_Fp {
     ($Fp:ident, $FpParameters:ident, $limbs:expr) => {
         pub trait $FpParameters: FpParameters<BigInt = BigInteger> {}
 
-        #[cfg(feature = "asm")]
+        #[cfg(all(feature = "asm",
+              // target_arch = "x86_64",
+              // target_feature="bmi2",
+              // target_feature="adx"
+          ))]
         include!(concat!(env!("OUT_DIR"), "/field_assembly.rs"));
 
         #[derive(Derivative)]
@@ -455,28 +459,32 @@ macro_rules! impl_field_mul_assign {
 
             // No-carry optimisation applied to CIOS
             if no_carry {
-                if $limbs <= 6 && cfg!(feature = "asm") {
-                    #[cfg(feature = "asm")]
-                    asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
-                    self.reduce();
-                } else {
-                    let mut r = [0u64; $limbs];
-                    let mut carry1 = 0u64;
-                    let mut carry2 = 0u64;
-
-                    for i in 0..$limbs {
-                        r[0] = fa::mac(r[0], (self.0).0[0], (other.0).0[i], &mut carry1);
-                        let k = r[0].wrapping_mul(P::INV);
-                        fa::mac_discard(r[0], k, P::MODULUS.0[0], &mut carry2);
-                        for j in 1..$limbs {
-                            r[j] = fa::mac_with_carry(r[j], (self.0).0[j], (other.0).0[i], &mut carry1);
-                            r[j - 1] = fa::mac_with_carry(r[j], k, P::MODULUS.0[j], &mut carry2);
-                        }
-                        r[$limbs - 1] = carry1 + carry2;
+                #[cfg(all(feature = "asm", target_feature="bmi2",
+                target_feature="adx", target_arch = "x86_64"))]
+                {
+                    if $limbs <= 6
+                    {
+                        asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
+                        self.reduce();
+                        return;
                     }
-                    (self.0).0 = r;
-                    self.reduce();
                 }
+                let mut r = [0u64; $limbs];
+                let mut carry1 = 0u64;
+                let mut carry2 = 0u64;
+
+                for i in 0..$limbs {
+                    r[0] = fa::mac(r[0], (self.0).0[0], (other.0).0[i], &mut carry1);
+                    let k = r[0].wrapping_mul(P::INV);
+                    fa::mac_discard(r[0], k, P::MODULUS.0[0], &mut carry2);
+                    for j in 1..$limbs {
+                        r[j] = fa::mac_with_carry(r[j], (self.0).0[j], (other.0).0[i], &mut carry1);
+                        r[j - 1] = fa::mac_with_carry(r[j], k, P::MODULUS.0[j], &mut carry2);
+                    }
+                    r[$limbs - 1] = carry1 + carry2;
+                }
+                (self.0).0 = r;
+                self.reduce();
             // Alternative implementation
             } else {
                 let mut r = [0u64; $limbs * 2];
@@ -538,49 +546,60 @@ macro_rules! impl_field_square_in_place {
         #[inline]
         #[unroll_for_loops]
         fn square_in_place(&mut self) -> &mut Self {
-            if $limbs <= 6 && cfg!(feature = "asm") {
-                #[cfg(feature = "asm")]
-                asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
-                self.reduce();
-                self
-            } else {
-                let mut r = [0u64; $limbs*2];
+            // Checking the modulus at compile time
+            let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0;
+            let mut all_bits_set = P::MODULUS.0[$limbs - 1] == !0 - (1 << 63);
+            for i in 1..$limbs {
+                all_bits_set &= P::MODULUS.0[$limbs - i - 1] == !0u64;
+            }
+            let no_carry: bool = !(first_bit_set || all_bits_set);
 
-                let mut carry = 0;
-                for i in 0..$limbs {
-                    if i < $limbs-1 {
-                        for j in 0..$limbs {
-                            if j >= (i+1) { r[i+j] = fa::mac_with_carry(r[i+j], (self.0).0[i], (self.0).0[j], &mut carry); }
-                        }
-                        r[$limbs+i] = carry;
-                        carry = 0;
-                    }
+            #[cfg(all(feature = "asm", target_feature="bmi2",
+            target_feature="adx", target_arch = "x86_64"))]
+            {
+                if $limbs <= 6 && no_carry
+                {
+                    asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
+                    self.reduce();
+                    return self;
                 }
-                r[$limbs*2-1] = r[$limbs*2-2] >> 63;
-                for i in 0..$limbs { r[$limbs*2-2-i] = (r[$limbs*2-2-i] << 1) | (r[$limbs*2-3-i] >> 63); }
-                for i in 3..$limbs { r[$limbs+1-i] = (r[$limbs+1-i] << 1) | (r[$limbs-i] >> 63); }
-                r[1] = r[1] << 1;
+            }
+            let mut r = [0u64; $limbs*2];
 
-                for i in 0..$limbs {
-                    r[2*i] = fa::mac_with_carry(r[2*i], (self.0).0[i], (self.0).0[i], &mut carry);
-                    r[2*i+1] = fa::adc(r[2*i+1], 0, &mut carry);
-                }
-                // Montgomery reduction
-                let mut _carry2 = 0;
-                for i in 0..$limbs {
-                    let k = r[i].wrapping_mul(P::INV);
-                    let mut carry = 0;
-                    fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
-                    for j in 1..$limbs {
-                        r[j+i] = fa::mac_with_carry(r[j+i], k, P::MODULUS.0[j], &mut carry);
+            let mut carry = 0;
+            for i in 0..$limbs {
+                if i < $limbs-1 {
+                    for j in 0..$limbs {
+                        if j >= (i+1) { r[i+j] = fa::mac_with_carry(r[i+j], (self.0).0[i], (self.0).0[j], &mut carry); }
                     }
-                    r[$limbs+i] = fa::adc(r[$limbs+i], _carry2, &mut carry);
-                    _carry2 = carry;
+                    r[$limbs+i] = carry;
+                    carry = 0;
                 }
-                (self.0).0.copy_from_slice(&r[$limbs..]);
-                self.reduce();
-                self
             }
+            r[$limbs*2-1] = r[$limbs*2-2] >> 63;
+            for i in 0..$limbs { r[$limbs*2-2-i] = (r[$limbs*2-2-i] << 1) | (r[$limbs*2-3-i] >> 63); }
+            for i in 3..$limbs { r[$limbs+1-i] = (r[$limbs+1-i] << 1) | (r[$limbs-i] >> 63); }
+            r[1] = r[1] << 1;
+
+            for i in 0..$limbs {
+                r[2*i] = fa::mac_with_carry(r[2*i], (self.0).0[i], (self.0).0[i], &mut carry);
+                r[2*i+1] = fa::adc(r[2*i+1], 0, &mut carry);
+            }
+            // Montgomery reduction
+            let mut _carry2 = 0;
+            for i in 0..$limbs {
+                let k = r[i].wrapping_mul(P::INV);
+                let mut carry = 0;
+                fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
+                for j in 1..$limbs {
+                    r[j+i] = fa::mac_with_carry(r[j+i], k, P::MODULUS.0[j], &mut carry);
+                }
+                r[$limbs+i] = fa::adc(r[$limbs+i], _carry2, &mut carry);
+                _carry2 = carry;
+            }
+            (self.0).0.copy_from_slice(&r[$limbs..]);
+            self.reduce();
+            self
         }
     };
 }

From feff30db8e830082f69f9733d933bfab76324817 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Sat, 18 Apr 2020 16:18:45 +0800
Subject: [PATCH 12/43] minor changes in config and naming

---
 algebra-core/field-assembly/src/context.rs |  2 +-
 algebra-core/field-assembly/src/lib.rs     | 14 ++++----
 algebra-core/mince/src/arithmetic.rs       |  4 +--
 algebra-core/src/fields/macros.rs          | 37 ++++++++++------------
 algebra/Cargo.toml                         |  8 ++++-
 5 files changed, 33 insertions(+), 32 deletions(-)

diff --git a/algebra-core/field-assembly/src/context.rs b/algebra-core/field-assembly/src/context.rs
index 0fa56f0cd..5bfb09c41 100644
--- a/algebra-core/field-assembly/src/context.rs
+++ b/algebra-core/field-assembly/src/context.rs
@@ -73,7 +73,7 @@ impl Context {
 
     pub fn add_buffer (&mut self, extra_reg: usize) {
         self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
-                    let mut spill_buffer = [0u64; {}];", extra_reg)));
+                    let mut spill_buffer = MaybeUninit::<[u64; {}]>::uninit();", extra_reg)));
     }
 
     pub fn add_asm (&mut self, ctx_string: String) {
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index 7e5d37f2f..785dfc567 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -19,13 +19,13 @@ pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
     }
     let mut macro_string = String::from(
     "macro_rules! asm_mul {
-        ($limbs:expr, $a:expr, $b:expr, $modulus:expr, $inverse:expr) => {
+        ($limbs:expr, $a:expr, $b:expr, $modulus:expr, $mod_prime:expr) => {
             match $limbs {");
     macro_string = format!("{}{}", macro_string, generate_matches(num_limbs, true));
 
     macro_string = format!("{}{}", macro_string,
     "macro_rules! asm_square {
-        ($limbs:expr, $a:expr, $modulus:expr, $inverse:expr) => {
+        ($limbs:expr, $a:expr, $modulus:expr, $mod_prime:expr) => {
             match $limbs {");
     macro_string = format!("{}{}", macro_string, generate_matches(num_limbs, false));
     macro_string
@@ -37,7 +37,7 @@ fn generate_asm_mul_string (
     b: &str,
     modulus: &str,
     zero: &str,
-    inverse: &str,
+    mod_prime: &str,
     limbs: usize
 ) -> String {
     reg!(a0, a1, a, limbs);
@@ -55,7 +55,7 @@ fn generate_asm_mul_string (
             } else {
                 mul_add_1!(a1, b1, zero, i, limbs);
             }
-            mul_add_shift_1!(m1, inverse, zero, i, limbs);
+            mul_add_shift_1!(m1, mod_prime, zero, i, limbs);
         }
         for i in 0..limbs {
             movq(R[i], a1[i]);
@@ -89,10 +89,10 @@ fn generate_matches (num_limbs: usize, is_mul: bool) -> String {
         if is_mul { ctx.add_declaration("b", "r", "&$b"); }
         ctx.add_declaration("modulus", "r", "&$modulus");
         ctx.add_declaration("0", "i", "0u64");
-        ctx.add_declaration("inverse", "i", "$inverse");
+        ctx.add_declaration("mod_prime", "i", "$mod_prime");
 
         ctx.add_limb(limbs);
-        if limbs > 8 {
+        if limbs > MAX_REGS {
             ctx.add_buffer(2*limbs);
             ctx.add_declaration("buf", "r", "&mut spill_buffer");
         }
@@ -102,7 +102,7 @@ fn generate_matches (num_limbs: usize, is_mul: bool) -> String {
             &ctx.clone().try_get("b", "a"),
             &ctx.clone().get("modulus"),
             &ctx.clone().get("0"),
-            &ctx.clone().get("inverse"),
+            &ctx.clone().get("mod_prime"),
             limbs
         );
 
diff --git a/algebra-core/mince/src/arithmetic.rs b/algebra-core/mince/src/arithmetic.rs
index cd2fcdb99..d820b2139 100644
--- a/algebra-core/mince/src/arithmetic.rs
+++ b/algebra-core/mince/src/arithmetic.rs
@@ -36,8 +36,8 @@ pub fn define_arithmetic() -> TokenStream {
             }
 
             macro_rules! mul_add_shift_1 {
-                ($a:ident, $inverse:ident, $zero:ident, $i:ident, $limbs:expr) => {
-                    movq($inverse, RDX);
+                ($a:ident, $mod_prime:ident, $zero:ident, $i:ident, $limbs:expr) => {
+                    movq($mod_prime, RDX);
                     mulxq(R[$i], RDX, RAX);
                     mulxq($a[0], RAX, RBX);
                     adcxq(R[$i % $limbs], RAX);
diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index 034fe21bb..5c670db26 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -2,11 +2,10 @@ macro_rules! impl_Fp {
     ($Fp:ident, $FpParameters:ident, $limbs:expr) => {
         pub trait $FpParameters: FpParameters<BigInt = BigInteger> {}
 
-        #[cfg(all(feature = "asm",
-              // target_arch = "x86_64",
-              // target_feature="bmi2",
-              // target_feature="adx"
-          ))]
+        #[cfg(all(feature = "asm", target_arch = "x86_64", target_feature="bmi2", target_feature="adx"))]
+        use std::mem::MaybeUninit;
+
+        #[cfg(all(feature = "asm", target_arch = "x86_64", target_feature="bmi2", target_feature="adx"))]
         include!(concat!(env!("OUT_DIR"), "/field_assembly.rs"));
 
         #[derive(Derivative)]
@@ -446,7 +445,7 @@ macro_rules! impl_Fp {
 /// zero bit in the rest of the modulus.
 macro_rules! impl_field_mul_assign {
     ($limbs:expr) => {
-        #[inline(never)]
+        #[inline]
         #[unroll_for_loops]
         fn mul_assign(&mut self, other: &Self) {
             // Checking the modulus at compile time
@@ -455,19 +454,17 @@ macro_rules! impl_field_mul_assign {
             for i in 1..$limbs {
                 all_bits_set &= P::MODULUS.0[$limbs - i - 1] == !0u64;
             }
-            let no_carry: bool = !(first_bit_set || all_bits_set);
+            let _no_carry: bool = !(first_bit_set || all_bits_set);
 
             // No-carry optimisation applied to CIOS
-            if no_carry {
+            if _no_carry {
                 #[cfg(all(feature = "asm", target_feature="bmi2",
                 target_feature="adx", target_arch = "x86_64"))]
+                if $limbs <= 6
                 {
-                    if $limbs <= 6
-                    {
-                        asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
-                        self.reduce();
-                        return;
-                    }
+                    asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
+                    self.reduce();
+                    return;
                 }
                 let mut r = [0u64; $limbs];
                 let mut carry1 = 0u64;
@@ -552,17 +549,15 @@ macro_rules! impl_field_square_in_place {
             for i in 1..$limbs {
                 all_bits_set &= P::MODULUS.0[$limbs - i - 1] == !0u64;
             }
-            let no_carry: bool = !(first_bit_set || all_bits_set);
+            let _no_carry: bool = !(first_bit_set || all_bits_set);
 
             #[cfg(all(feature = "asm", target_feature="bmi2",
             target_feature="adx", target_arch = "x86_64"))]
+            if $limbs <= 6 && _no_carry
             {
-                if $limbs <= 6 && no_carry
-                {
-                    asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
-                    self.reduce();
-                    return self;
-                }
+                asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
+                self.reduce();
+                return self;
             }
             let mut r = [0u64; $limbs*2];
 
diff --git a/algebra/Cargo.toml b/algebra/Cargo.toml
index 042cc4ceb..a4f48e0f7 100644
--- a/algebra/Cargo.toml
+++ b/algebra/Cargo.toml
@@ -47,4 +47,10 @@ mnt6_753 = []
 std = [ "algebra-core/std" ]
 parallel = [ "std", "algebra-core/parallel" ]
 derive = [ "algebra-core/derive" ]
-asm = [ "algebra-core/asm"]
+asm = [ "algebra-core/asm" ]
+
+full_asm = [ "algebra-core/asm", "bls12_377", "bls12_381", "sw6", "mnt4_298", "mnt4_753", "mnt6_298", "mnt6_753", "edwards_bls12", "edwards_sw6", "jubjub" ]
+small_asm = ["algebra-core/asm", "mnt4_298", "mnt6_298" ]
+mid_asm = [ "algebra-core/asm", "bls12_377", "bls12_381", "edwards_bls12"]
+big_asm = [ "algebra-core/asm", "sw6", "mnt4_753", "mnt6_753", "edwards_sw6" ]
+mix_asm = [ "algebra-core/asm", "sw6", "mnt4_753", "bls12_381", "mnt6_298" ]

From aab30ce95272647c1c3e9e5e5e1fa2fd2e282f36 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 17:05:48 +0800
Subject: [PATCH 13/43] readme asm instructions

---
 README.md | 14 ++++++++++++--
 1 file changed, 12 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 6e6032d1c..142ca767a 100644
--- a/README.md
+++ b/README.md
@@ -64,9 +64,9 @@ cargo build --release
 This library comes with unit tests for each of the provided crates. Run the tests with:
 ```bash
 cargo test
-``` 
+```
 
-Lastly, this library comes with benchmarks for the following crates:
+This library comes with benchmarks for the following crates:
 
 - [`algebra`](algebra)
 - [`dpc`](dpc)
@@ -76,6 +76,16 @@ These benchmarks require the nightly Rust toolchain; to install this, run `rustu
 cargo +nightly bench
 ```
 
+To make use of `adcxq`, `adoxq` and `mulxq` available on most modern `x86_64` platforms (at least starting from Haswell):
+```bash
+RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test/build/bench --features asm
+```
+
+To run with multiple features, make sure to double quote the features. E.g.
+```bash
+RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test --features "asm bls12_381"
+```
+
 ## License
 
 ZEXE is licensed under either of the following licenses, at your discretion.

From d71eac62f3b5324e5b8bdd551feae67bc6b3f52f Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 17:10:08 +0800
Subject: [PATCH 14/43] More detailed readme instructions

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 142ca767a..eaf08e4e3 100644
--- a/README.md
+++ b/README.md
@@ -76,12 +76,12 @@ These benchmarks require the nightly Rust toolchain; to install this, run `rustu
 cargo +nightly bench
 ```
 
-To make use of `adcxq`, `adoxq` and `mulxq` available on most modern `x86_64` platforms (at least starting from Haswell):
+To make use of `adcxq`, `adoxq` and `mulxq` available on most modern `x86_64` platforms (Broadwell onwards for Intel and Ryzen onwards for AMD), leading to a 30-70% speedup, run the following:
 ```bash
 RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test/build/bench --features asm
 ```
 
-To run with multiple features, make sure to double quote the features. E.g.
+To run with multiple features, make sure to double quote the features. e.g.
 ```bash
 RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test --features "asm bls12_381"
 ```

From dce1003415a08586a383d1281dcf7de86c877afa Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 17:14:40 +0800
Subject: [PATCH 15/43] more readme edits

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index eaf08e4e3..644573059 100644
--- a/README.md
+++ b/README.md
@@ -76,7 +76,7 @@ These benchmarks require the nightly Rust toolchain; to install this, run `rustu
 cargo +nightly bench
 ```
 
-To make use of `adcxq`, `adoxq` and `mulxq` available on most modern `x86_64` platforms (Broadwell onwards for Intel and Ryzen onwards for AMD), leading to a 30-70% speedup, run the following:
+Compiling with `adcxq`, `adoxq` and `mulxq` instructions can lead to a 30-70% speedup. These are available on most `x86_64` platforms (Broadwell onwards for Intel and Ryzen onwards for AMD). Run the following command:
 ```bash
 RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test/build/bench --features asm
 ```

From 44516e220006f7d9d01aa08711e5a12b3243b495 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 21:49:51 +0800
Subject: [PATCH 16/43] cleaned up benches with macros, increasing code reuse

---
 algebra-benches/Cargo.toml                    |   2 +
 algebra-benches/benches/bls12_377/ec.rs       | 183 ----------
 algebra-benches/benches/bls12_377/fq.rs       | 290 ----------------
 algebra-benches/benches/bls12_377/fq12.rs     | 113 -------
 algebra-benches/benches/bls12_377/fq2.rs      | 129 -------
 algebra-benches/benches/bls12_377/fr.rs       | 290 ----------------
 algebra-benches/benches/bls12_377/mod.rs      |   6 -
 algebra-benches/benches/bls12_377/pairing.rs  |  76 -----
 algebra-benches/benches/bls12_381/ec.rs       | 187 ----------
 algebra-benches/benches/bls12_381/fq.rs       | 290 ----------------
 algebra-benches/benches/bls12_381/fq12.rs     | 112 ------
 algebra-benches/benches/bls12_381/fq2.rs      | 129 -------
 algebra-benches/benches/bls12_381/fr.rs       | 290 ----------------
 algebra-benches/benches/bls12_381/mod.rs      |   6 -
 algebra-benches/benches/bls12_381/pairing.rs  |  78 -----
 .../benches/curve_and_field_benches.rs        |   7 -
 algebra-benches/benches/sw6/ec.rs             | 183 ----------
 algebra-benches/benches/sw6/fq.rs             | 290 ----------------
 algebra-benches/benches/sw6/fq3.rs            | 129 -------
 algebra-benches/benches/sw6/fq6.rs            | 113 -------
 algebra-benches/benches/sw6/fr.rs             | 291 ----------------
 algebra-benches/benches/sw6/mod.rs            |   6 -
 algebra-benches/benches/sw6/pairing.rs        |  73 ----
 algebra-benches/src/bls12_377.rs              |  17 +
 algebra-benches/src/bls12_381.rs              |  17 +
 algebra-benches/src/lib.rs                    |   9 +
 algebra-benches/src/macros/ec.rs              | 167 +++++++++
 algebra-benches/src/macros/field.rs           | 318 ++++++++++++++++++
 algebra-benches/src/macros/mod.rs             |  15 +
 algebra-benches/src/macros/pairing.rs         |  61 ++++
 algebra-benches/src/macros/utils.rs           |  36 ++
 algebra-benches/src/sw6.rs                    |  17 +
 32 files changed, 659 insertions(+), 3271 deletions(-)
 delete mode 100644 algebra-benches/benches/bls12_377/ec.rs
 delete mode 100644 algebra-benches/benches/bls12_377/fq.rs
 delete mode 100644 algebra-benches/benches/bls12_377/fq12.rs
 delete mode 100644 algebra-benches/benches/bls12_377/fq2.rs
 delete mode 100644 algebra-benches/benches/bls12_377/fr.rs
 delete mode 100644 algebra-benches/benches/bls12_377/mod.rs
 delete mode 100644 algebra-benches/benches/bls12_377/pairing.rs
 delete mode 100644 algebra-benches/benches/bls12_381/ec.rs
 delete mode 100644 algebra-benches/benches/bls12_381/fq.rs
 delete mode 100644 algebra-benches/benches/bls12_381/fq12.rs
 delete mode 100644 algebra-benches/benches/bls12_381/fq2.rs
 delete mode 100644 algebra-benches/benches/bls12_381/fr.rs
 delete mode 100644 algebra-benches/benches/bls12_381/mod.rs
 delete mode 100644 algebra-benches/benches/bls12_381/pairing.rs
 delete mode 100644 algebra-benches/benches/curve_and_field_benches.rs
 delete mode 100644 algebra-benches/benches/sw6/ec.rs
 delete mode 100644 algebra-benches/benches/sw6/fq.rs
 delete mode 100644 algebra-benches/benches/sw6/fq3.rs
 delete mode 100644 algebra-benches/benches/sw6/fq6.rs
 delete mode 100644 algebra-benches/benches/sw6/fr.rs
 delete mode 100644 algebra-benches/benches/sw6/mod.rs
 delete mode 100644 algebra-benches/benches/sw6/pairing.rs
 create mode 100644 algebra-benches/src/bls12_377.rs
 create mode 100644 algebra-benches/src/bls12_381.rs
 create mode 100644 algebra-benches/src/macros/ec.rs
 create mode 100644 algebra-benches/src/macros/field.rs
 create mode 100644 algebra-benches/src/macros/mod.rs
 create mode 100644 algebra-benches/src/macros/pairing.rs
 create mode 100644 algebra-benches/src/macros/utils.rs
 create mode 100644 algebra-benches/src/sw6.rs

diff --git a/algebra-benches/Cargo.toml b/algebra-benches/Cargo.toml
index b46395564..49ff81dfb 100644
--- a/algebra-benches/Cargo.toml
+++ b/algebra-benches/Cargo.toml
@@ -27,6 +27,8 @@ algebra = { path = "../algebra", features = [ "full" ] }
 blake2 = "0.8.1"
 rand = "0.7"
 rand_xorshift = { version = "0.2" }
+paste = "0.1"
 
 [features]
 asm = [ "algebra/asm"]
+n_fold = []
diff --git a/algebra-benches/benches/bls12_377/ec.rs b/algebra-benches/benches/bls12_377/ec.rs
deleted file mode 100644
index bf332e32a..000000000
--- a/algebra-benches/benches/bls12_377/ec.rs
+++ /dev/null
@@ -1,183 +0,0 @@
-mod g1 {
-    use algebra::{
-        bls12_377::{Fr, G1Affine, G1Projective as G1},
-        ProjectiveCurve, UniformRand,
-    };
-    use core::ops::AddAssign;
-    use rand::SeedableRng;
-    use rand_xorshift::XorShiftRng;
-
-    #[bench]
-    fn bench_g1_rand(b: &mut ::test::Bencher) {
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-        b.iter(|| G1::rand(&mut rng));
-    }
-
-    #[bench]
-    fn bench_g1_mul_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, Fr)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), Fr::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp *= v[count].1;
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g1_add_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G1)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g1_add_assign_mixed(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G1Affine)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng).into()))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign_mixed(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g1_double(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G1)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.double_in_place();
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-}
-
-mod g2 {
-    use algebra::{
-        bls12_377::{Fr, G2Affine, G2Projective as G2},
-        ProjectiveCurve, UniformRand,
-    };
-    use core::ops::AddAssign;
-    use rand::SeedableRng;
-    use rand_xorshift::XorShiftRng;
-
-    #[bench]
-    fn bench_g2_rand(b: &mut ::test::Bencher) {
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-        b.iter(|| G2::rand(&mut rng));
-    }
-
-    #[bench]
-    fn bench_g2_mul_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, Fr)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), Fr::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp *= v[count].1;
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g2_add_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, G2)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g2_add_assign_mixed(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, G2Affine)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng).into()))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign_mixed(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g2_double(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, G2)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.double_in_place();
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-}
diff --git a/algebra-benches/benches/bls12_377/fq.rs b/algebra-benches/benches/bls12_377/fq.rs
deleted file mode 100644
index 86a430e65..000000000
--- a/algebra-benches/benches/bls12_377/fq.rs
+++ /dev/null
@@ -1,290 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-use algebra::{
-    biginteger::BigInteger384 as FqRepr, bls12_377::fq::Fq, BigInteger, Field, PrimeField,
-    SquareRootField,
-};
-
-#[bench]
-fn bench_fq_repr_add_nocarry(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FqRepr, FqRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp1 = FqRepr::rand(&mut rng);
-            let mut tmp2 = FqRepr::rand(&mut rng);
-            // Shave a few bits off to avoid overflow.
-            for _ in 0..3 {
-                tmp1.div2();
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_nocarry(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_sub_noborrow(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FqRepr, FqRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let tmp1 = FqRepr::rand(&mut rng);
-            let mut tmp2 = tmp1;
-            // Ensure tmp2 is smaller than tmp1.
-            for _ in 0..10 {
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_noborrow(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_num_bits(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES).map(|_| FqRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].num_bits();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_mul2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES).map(|_| FqRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.mul2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_div2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES).map(|_| FqRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.div2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq, Fq)> = (0..SAMPLES)
-        .map(|_| (Fq::rand(&mut rng), Fq::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq, Fq)> = (0..SAMPLES)
-        .map(|_| (Fq::rand(&mut rng), Fq::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq, Fq)> = (0..SAMPLES)
-        .map(|_| (Fq::rand(&mut rng), Fq::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].inverse()
-    });
-}
-
-#[bench]
-fn bench_fq_negate(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp = -tmp;
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_sqrt(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp = Fq::rand(&mut rng);
-            tmp.square_in_place();
-            tmp
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].sqrt()
-    });
-}
-
-#[bench]
-fn bench_fq_into_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].into_repr()
-    });
-}
-
-#[bench]
-fn bench_fq_from_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES)
-        .map(|_| Fq::rand(&mut rng).into_repr())
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        Fq::from_repr(v[count])
-    });
-}
diff --git a/algebra-benches/benches/bls12_377/fq12.rs b/algebra-benches/benches/bls12_377/fq12.rs
deleted file mode 100644
index 3ee1f0c06..000000000
--- a/algebra-benches/benches/bls12_377/fq12.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-use algebra::{bls12_377::Fq12, Field};
-
-#[bench]
-fn bench_fq12_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq12, Fq12)> = (0..SAMPLES)
-        .map(|_| (Fq12::rand(&mut rng), Fq12::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq12, Fq12)> = (0..SAMPLES)
-        .map(|_| (Fq12::rand(&mut rng), Fq12::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq12, Fq12)> = (0..SAMPLES)
-        .map(|_| (Fq12::rand(&mut rng), Fq12::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq12> = (0..SAMPLES).map(|_| Fq12::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq12> = (0..SAMPLES).map(|_| Fq12::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq12> = (0..SAMPLES).map(|_| Fq12::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].inverse();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
diff --git a/algebra-benches/benches/bls12_377/fq2.rs b/algebra-benches/benches/bls12_377/fq2.rs
deleted file mode 100644
index ac38bde8a..000000000
--- a/algebra-benches/benches/bls12_377/fq2.rs
+++ /dev/null
@@ -1,129 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-use algebra::{bls12_377::fq2::Fq2, Field, SquareRootField};
-
-#[bench]
-fn bench_fq2_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq2, Fq2)> = (0..SAMPLES)
-        .map(|_| (Fq2::rand(&mut rng), Fq2::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq2, Fq2)> = (0..SAMPLES)
-        .map(|_| (Fq2::rand(&mut rng), Fq2::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq2, Fq2)> = (0..SAMPLES)
-        .map(|_| (Fq2::rand(&mut rng), Fq2::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq2> = (0..SAMPLES).map(|_| Fq2::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq2> = (0..SAMPLES).map(|_| Fq2::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq2> = (0..SAMPLES).map(|_| Fq2::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].inverse();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_sqrt(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq2> = (0..SAMPLES).map(|_| Fq2::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].sqrt();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
diff --git a/algebra-benches/benches/bls12_377/fr.rs b/algebra-benches/benches/bls12_377/fr.rs
deleted file mode 100644
index 786605d65..000000000
--- a/algebra-benches/benches/bls12_377/fr.rs
+++ /dev/null
@@ -1,290 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-use algebra::{
-    biginteger::BigInteger256 as FrRepr, bls12_377::fr::Fr, BigInteger, Field, PrimeField,
-    SquareRootField,
-};
-
-#[bench]
-fn bench_fr_repr_add_nocarry(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FrRepr, FrRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp1 = FrRepr::rand(&mut rng);
-            let mut tmp2 = FrRepr::rand(&mut rng);
-            // Shave a few bits off to avoid overflow.
-            for _ in 0..3 {
-                tmp1.div2();
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_nocarry(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_sub_noborrow(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FrRepr, FrRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let tmp1 = FrRepr::rand(&mut rng);
-            let mut tmp2 = tmp1;
-            // Ensure tmp2 is smaller than tmp1.
-            for _ in 0..10 {
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_noborrow(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_num_bits(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES).map(|_| FrRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].num_bits();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_mul2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES).map(|_| FrRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.mul2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_div2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES).map(|_| FrRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.div2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fr, Fr)> = (0..SAMPLES)
-        .map(|_| (Fr::rand(&mut rng), Fr::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fr, Fr)> = (0..SAMPLES)
-        .map(|_| (Fr::rand(&mut rng), Fr::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fr, Fr)> = (0..SAMPLES)
-        .map(|_| (Fr::rand(&mut rng), Fr::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].inverse()
-    });
-}
-
-#[bench]
-fn bench_fr_negate(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp = -tmp;
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_sqrt(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp = Fr::rand(&mut rng);
-            tmp.square_in_place();
-            tmp
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].sqrt()
-    });
-}
-
-#[bench]
-fn bench_fr_into_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].into_repr()
-    });
-}
-
-#[bench]
-fn bench_fr_from_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES)
-        .map(|_| Fr::rand(&mut rng).into_repr())
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        Fr::from_repr(v[count])
-    });
-}
diff --git a/algebra-benches/benches/bls12_377/mod.rs b/algebra-benches/benches/bls12_377/mod.rs
deleted file mode 100644
index ea18d0268..000000000
--- a/algebra-benches/benches/bls12_377/mod.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-mod ec;
-mod fq;
-mod fq12;
-mod fq2;
-mod fr;
-mod pairing;
diff --git a/algebra-benches/benches/bls12_377/pairing.rs b/algebra-benches/benches/bls12_377/pairing.rs
deleted file mode 100644
index 8ca4cbd32..000000000
--- a/algebra-benches/benches/bls12_377/pairing.rs
+++ /dev/null
@@ -1,76 +0,0 @@
-mod pairing {
-    use algebra::UniformRand;
-    use rand::SeedableRng;
-    use rand_xorshift::XorShiftRng;
-
-    use algebra::{
-        bls12::{G1Prepared, G2Prepared},
-        bls12_377::{
-            Bls12_377, Fq12, G1Affine, G1Projective as G1, G2Affine, G2Projective as G2, Parameters,
-        },
-        PairingEngine,
-    };
-
-    #[bench]
-    fn bench_pairing_miller_loop(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1Prepared<Parameters>, G2Prepared<Parameters>)> = (0..SAMPLES)
-            .map(|_| {
-                (
-                    G1Affine::from(G1::rand(&mut rng)).into(),
-                    G2Affine::from(G2::rand(&mut rng)).into(),
-                )
-            })
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let tmp = Bls12_377::miller_loop(&[(v[count].0.clone(), v[count].1.clone())]);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_pairing_final_exponentiation(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<Fq12> = (0..SAMPLES)
-            .map(|_| {
-                let p = G1Affine::from(G1::rand(&mut rng)).into();
-                let q = G2Affine::from(G2::rand(&mut rng)).into();
-                Bls12_377::miller_loop(&[(p, q)])
-            })
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let tmp = Bls12_377::final_exponentiation(&v[count]);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_pairing_full(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G2)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G2::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let tmp = Bls12_377::pairing(v[count].0, v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-}
diff --git a/algebra-benches/benches/bls12_381/ec.rs b/algebra-benches/benches/bls12_381/ec.rs
deleted file mode 100644
index 3ec0098e8..000000000
--- a/algebra-benches/benches/bls12_381/ec.rs
+++ /dev/null
@@ -1,187 +0,0 @@
-mod g1 {
-    use algebra::UniformRand;
-    use rand::SeedableRng;
-    use rand_xorshift::XorShiftRng;
-    use std::ops::AddAssign;
-
-    use algebra::{
-        bls12_381::{Fr, G1Affine, G1Projective as G1},
-        ProjectiveCurve,
-    };
-
-    #[bench]
-    fn bench_g1_rand(b: &mut ::test::Bencher) {
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-        b.iter(|| G1::rand(&mut rng));
-    }
-
-    #[bench]
-    fn bench_g1_mul_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, Fr)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), Fr::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp *= v[count].1;
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g1_add_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G1)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g1_add_assign_mixed(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G1Affine)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng).into()))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign_mixed(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g1_double(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G1)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.double_in_place();
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-}
-
-mod g2 {
-    use algebra::UniformRand;
-    use rand::SeedableRng;
-    use rand_xorshift::XorShiftRng;
-    use std::ops::AddAssign;
-
-    use algebra::{
-        bls12_381::{Fr, G2Affine, G2Projective as G2},
-        ProjectiveCurve,
-    };
-
-    #[bench]
-    fn bench_g2_rand(b: &mut ::test::Bencher) {
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-        b.iter(|| G2::rand(&mut rng));
-    }
-
-    #[bench]
-    fn bench_g2_mul_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, Fr)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), Fr::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp *= v[count].1;
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g2_add_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, G2)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g2_add_assign_mixed(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, G2Affine)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng).into()))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign_mixed(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g2_double(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, G2)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.double_in_place();
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-}
diff --git a/algebra-benches/benches/bls12_381/fq.rs b/algebra-benches/benches/bls12_381/fq.rs
deleted file mode 100644
index 16987d8a1..000000000
--- a/algebra-benches/benches/bls12_381/fq.rs
+++ /dev/null
@@ -1,290 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-use algebra::{
-    biginteger::BigInteger384 as FqRepr, bls12_381::fq::Fq, BigInteger, Field, PrimeField,
-    SquareRootField,
-};
-
-#[bench]
-fn bench_fq_repr_add_nocarry(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FqRepr, FqRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp1 = FqRepr::rand(&mut rng);
-            let mut tmp2 = FqRepr::rand(&mut rng);
-            // Shave a few bits off to avoid overflow.
-            for _ in 0..3 {
-                tmp1.div2();
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_nocarry(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_sub_noborrow(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FqRepr, FqRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let tmp1 = FqRepr::rand(&mut rng);
-            let mut tmp2 = tmp1;
-            // Ensure tmp2 is smaller than tmp1.
-            for _ in 0..10 {
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_noborrow(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_num_bits(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES).map(|_| FqRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].num_bits();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_mul2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES).map(|_| FqRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.mul2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_div2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES).map(|_| FqRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.div2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq, Fq)> = (0..SAMPLES)
-        .map(|_| (Fq::rand(&mut rng), Fq::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq, Fq)> = (0..SAMPLES)
-        .map(|_| (Fq::rand(&mut rng), Fq::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq, Fq)> = (0..SAMPLES)
-        .map(|_| (Fq::rand(&mut rng), Fq::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].inverse()
-    });
-}
-
-#[bench]
-fn bench_fq_negate(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp = -tmp;
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_sqrt(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp = Fq::rand(&mut rng);
-            tmp.square_in_place();
-            tmp
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].sqrt()
-    });
-}
-
-#[bench]
-fn bench_fq_into_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].into_repr()
-    });
-}
-
-#[bench]
-fn bench_fq_from_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES)
-        .map(|_| Fq::rand(&mut rng).into_repr())
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        Fq::from_repr(v[count])
-    });
-}
diff --git a/algebra-benches/benches/bls12_381/fq12.rs b/algebra-benches/benches/bls12_381/fq12.rs
deleted file mode 100644
index c92acf8b3..000000000
--- a/algebra-benches/benches/bls12_381/fq12.rs
+++ /dev/null
@@ -1,112 +0,0 @@
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-use algebra::{bls12_381::Fq12, Field, UniformRand};
-
-#[bench]
-fn bench_fq12_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq12, Fq12)> = (0..SAMPLES)
-        .map(|_| (Fq12::rand(&mut rng), Fq12::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq12, Fq12)> = (0..SAMPLES)
-        .map(|_| (Fq12::rand(&mut rng), Fq12::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq12, Fq12)> = (0..SAMPLES)
-        .map(|_| (Fq12::rand(&mut rng), Fq12::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq12> = (0..SAMPLES).map(|_| Fq12::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq12> = (0..SAMPLES).map(|_| Fq12::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq12_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq12> = (0..SAMPLES).map(|_| Fq12::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].inverse();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
diff --git a/algebra-benches/benches/bls12_381/fq2.rs b/algebra-benches/benches/bls12_381/fq2.rs
deleted file mode 100644
index 883a41714..000000000
--- a/algebra-benches/benches/bls12_381/fq2.rs
+++ /dev/null
@@ -1,129 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-use algebra::{bls12_381::fq2::Fq2, Field, SquareRootField};
-
-#[bench]
-fn bench_fq2_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq2, Fq2)> = (0..SAMPLES)
-        .map(|_| (Fq2::rand(&mut rng), Fq2::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq2, Fq2)> = (0..SAMPLES)
-        .map(|_| (Fq2::rand(&mut rng), Fq2::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq2, Fq2)> = (0..SAMPLES)
-        .map(|_| (Fq2::rand(&mut rng), Fq2::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq2> = (0..SAMPLES).map(|_| Fq2::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq2> = (0..SAMPLES).map(|_| Fq2::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq2> = (0..SAMPLES).map(|_| Fq2::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].inverse();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq2_sqrt(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq2> = (0..SAMPLES).map(|_| Fq2::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].sqrt();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
diff --git a/algebra-benches/benches/bls12_381/fr.rs b/algebra-benches/benches/bls12_381/fr.rs
deleted file mode 100644
index a5cc3f4f9..000000000
--- a/algebra-benches/benches/bls12_381/fr.rs
+++ /dev/null
@@ -1,290 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-use algebra::{
-    biginteger::BigInteger256 as FrRepr, bls12_381::fr::Fr, BigInteger, Field, PrimeField,
-    SquareRootField,
-};
-
-#[bench]
-fn bench_fr_repr_add_nocarry(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FrRepr, FrRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp1 = FrRepr::rand(&mut rng);
-            let mut tmp2 = FrRepr::rand(&mut rng);
-            // Shave a few bits off to avoid overflow.
-            for _ in 0..3 {
-                tmp1.div2();
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_nocarry(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_sub_noborrow(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FrRepr, FrRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let tmp1 = FrRepr::rand(&mut rng);
-            let mut tmp2 = tmp1;
-            // Ensure tmp2 is smaller than tmp1.
-            for _ in 0..10 {
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_noborrow(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_num_bits(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES).map(|_| FrRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].num_bits();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_mul2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES).map(|_| FrRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.mul2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_div2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES).map(|_| FrRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.div2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fr, Fr)> = (0..SAMPLES)
-        .map(|_| (Fr::rand(&mut rng), Fr::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fr, Fr)> = (0..SAMPLES)
-        .map(|_| (Fr::rand(&mut rng), Fr::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fr, Fr)> = (0..SAMPLES)
-        .map(|_| (Fr::rand(&mut rng), Fr::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].inverse()
-    });
-}
-
-#[bench]
-fn bench_fr_negate(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp = -tmp;
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_sqrt(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp = Fr::rand(&mut rng);
-            tmp.square_in_place();
-            tmp
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].sqrt()
-    });
-}
-
-#[bench]
-fn bench_fr_into_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].into_repr()
-    });
-}
-
-#[bench]
-fn bench_fr_from_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES)
-        .map(|_| Fr::rand(&mut rng).into_repr())
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        Fr::from_repr(v[count])
-    });
-}
diff --git a/algebra-benches/benches/bls12_381/mod.rs b/algebra-benches/benches/bls12_381/mod.rs
deleted file mode 100644
index ea18d0268..000000000
--- a/algebra-benches/benches/bls12_381/mod.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-mod ec;
-mod fq;
-mod fq12;
-mod fq2;
-mod fr;
-mod pairing;
diff --git a/algebra-benches/benches/bls12_381/pairing.rs b/algebra-benches/benches/bls12_381/pairing.rs
deleted file mode 100644
index 78aa7db27..000000000
--- a/algebra-benches/benches/bls12_381/pairing.rs
+++ /dev/null
@@ -1,78 +0,0 @@
-mod pairing {
-    use algebra::UniformRand;
-    use rand::SeedableRng;
-    use rand_xorshift::XorShiftRng;
-
-    use algebra::{
-        bls12::{G1Prepared, G2Prepared},
-        bls12_381::{
-            Bls12_381, Fq12, G1Affine, G1Projective as G1, G2Affine, G2Projective as G2, Parameters,
-        },
-        PairingEngine,
-    };
-
-    #[bench]
-    fn bench_pairing_miller_loop(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1Prepared<Parameters>, G2Prepared<Parameters>)> = (0..SAMPLES)
-            .map(|_| {
-                (
-                    G1Affine::from(G1::rand(&mut rng)).into(),
-                    G2Affine::from(G2::rand(&mut rng)).into(),
-                )
-            })
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let tmp = Bls12_381::miller_loop(&[(v[count].0.clone(), v[count].1.clone())]);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_pairing_final_exponentiation(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<Fq12> = (0..SAMPLES)
-            .map(|_| {
-                (
-                    G1Affine::from(G1::rand(&mut rng)).into(),
-                    G2Affine::from(G2::rand(&mut rng)).into(),
-                )
-            })
-            .map(|(p, q)| Bls12_381::miller_loop(&[(p, q)]))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let tmp = Bls12_381::final_exponentiation(&v[count]);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_pairing_full(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G2)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G2::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let tmp = Bls12_381::pairing(v[count].0, v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-}
diff --git a/algebra-benches/benches/curve_and_field_benches.rs b/algebra-benches/benches/curve_and_field_benches.rs
deleted file mode 100644
index 044e3cec6..000000000
--- a/algebra-benches/benches/curve_and_field_benches.rs
+++ /dev/null
@@ -1,7 +0,0 @@
-#![feature(test)]
-
-extern crate test;
-
-mod bls12_377;
-mod bls12_381;
-mod sw6;
diff --git a/algebra-benches/benches/sw6/ec.rs b/algebra-benches/benches/sw6/ec.rs
deleted file mode 100644
index e9f747920..000000000
--- a/algebra-benches/benches/sw6/ec.rs
+++ /dev/null
@@ -1,183 +0,0 @@
-mod g1 {
-    use algebra::{
-        sw6::{Fr, G1Affine, G1Projective as G1},
-        ProjectiveCurve, UniformRand,
-    };
-    use rand::SeedableRng;
-    use rand_xorshift::XorShiftRng;
-    use std::ops::AddAssign;
-
-    #[bench]
-    fn bench_g1_rand(b: &mut ::test::Bencher) {
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-        b.iter(|| G1::rand(&mut rng));
-    }
-
-    #[bench]
-    fn bench_g1_mul_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, Fr)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), Fr::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp *= v[count].1;
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g1_add_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G1)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g1_add_assign_mixed(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G1Affine)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng).into()))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign_mixed(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g1_double(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G1)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.double_in_place();
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-}
-
-mod g2 {
-    use algebra::{
-        sw6::{Fr, G2Affine, G2Projective as G2},
-        ProjectiveCurve, UniformRand,
-    };
-    use rand::SeedableRng;
-    use rand_xorshift::XorShiftRng;
-    use std::ops::AddAssign;
-
-    #[bench]
-    fn bench_g2_rand(b: &mut ::test::Bencher) {
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-        b.iter(|| G2::rand(&mut rng));
-    }
-
-    #[bench]
-    fn bench_g2_mul_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, Fr)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), Fr::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp *= v[count].1;
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g2_add_assign(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, G2)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g2_add_assign_mixed(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, G2Affine)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng).into()))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.add_assign_mixed(&v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_g2_double(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G2, G2)> = (0..SAMPLES)
-            .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let mut tmp = v[count].0;
-            tmp.double_in_place();
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-}
diff --git a/algebra-benches/benches/sw6/fq.rs b/algebra-benches/benches/sw6/fq.rs
deleted file mode 100644
index 093d62c72..000000000
--- a/algebra-benches/benches/sw6/fq.rs
+++ /dev/null
@@ -1,290 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-
-use algebra::{
-    biginteger::BigInteger832 as FqRepr, sw6::fq::Fq, BigInteger, Field, PrimeField,
-    SquareRootField,
-};
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-#[bench]
-fn bench_fq_repr_add_nocarry(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FqRepr, FqRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp1 = FqRepr::rand(&mut rng);
-            let mut tmp2 = FqRepr::rand(&mut rng);
-            // Shave a few bits off to avoid overflow.
-            for _ in 0..3 {
-                tmp1.div2();
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_nocarry(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_sub_noborrow(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FqRepr, FqRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let tmp1 = FqRepr::rand(&mut rng);
-            let mut tmp2 = tmp1;
-            // Ensure tmp2 is smaller than tmp1.
-            for _ in 0..10 {
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_noborrow(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_num_bits(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES).map(|_| FqRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].num_bits();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_mul2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES).map(|_| FqRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.mul2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_repr_div2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES).map(|_| FqRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.div2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq, Fq)> = (0..SAMPLES)
-        .map(|_| (Fq::rand(&mut rng), Fq::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq, Fq)> = (0..SAMPLES)
-        .map(|_| (Fq::rand(&mut rng), Fq::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq, Fq)> = (0..SAMPLES)
-        .map(|_| (Fq::rand(&mut rng), Fq::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].inverse()
-    });
-}
-
-#[bench]
-fn bench_fq_negate(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp = -tmp;
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq_sqrt(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp = Fq::rand(&mut rng);
-            tmp.square_in_place();
-            tmp
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].sqrt()
-    });
-}
-
-#[bench]
-fn bench_fq_into_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq> = (0..SAMPLES).map(|_| Fq::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].into_repr()
-    });
-}
-
-#[bench]
-fn bench_fq_from_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FqRepr> = (0..SAMPLES)
-        .map(|_| Fq::rand(&mut rng).into_repr())
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        Fq::from_repr(v[count])
-    });
-}
diff --git a/algebra-benches/benches/sw6/fq3.rs b/algebra-benches/benches/sw6/fq3.rs
deleted file mode 100644
index 624a3f485..000000000
--- a/algebra-benches/benches/sw6/fq3.rs
+++ /dev/null
@@ -1,129 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-
-use algebra::{sw6::fq3::Fq3, Field, SquareRootField};
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-#[bench]
-fn bench_fq3_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq3, Fq3)> = (0..SAMPLES)
-        .map(|_| (Fq3::rand(&mut rng), Fq3::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq3_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq3, Fq3)> = (0..SAMPLES)
-        .map(|_| (Fq3::rand(&mut rng), Fq3::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq3_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq3, Fq3)> = (0..SAMPLES)
-        .map(|_| (Fq3::rand(&mut rng), Fq3::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq3_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq3> = (0..SAMPLES).map(|_| Fq3::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq3_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq3> = (0..SAMPLES).map(|_| Fq3::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq3_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq3> = (0..SAMPLES).map(|_| Fq3::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].inverse();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq3_sqrt(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq3> = (0..SAMPLES).map(|_| Fq3::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].sqrt();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
diff --git a/algebra-benches/benches/sw6/fq6.rs b/algebra-benches/benches/sw6/fq6.rs
deleted file mode 100644
index 46376069a..000000000
--- a/algebra-benches/benches/sw6/fq6.rs
+++ /dev/null
@@ -1,113 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-
-use algebra::{sw6::Fq6, Field};
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-#[bench]
-fn bench_fq6_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq6, Fq6)> = (0..SAMPLES)
-        .map(|_| (Fq6::rand(&mut rng), Fq6::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq6_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq6, Fq6)> = (0..SAMPLES)
-        .map(|_| (Fq6::rand(&mut rng), Fq6::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq6_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fq6, Fq6)> = (0..SAMPLES)
-        .map(|_| (Fq6::rand(&mut rng), Fq6::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq6_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq6> = (0..SAMPLES).map(|_| Fq6::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq6_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq6> = (0..SAMPLES).map(|_| Fq6::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fq6_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fq6> = (0..SAMPLES).map(|_| Fq6::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].inverse();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
diff --git a/algebra-benches/benches/sw6/fr.rs b/algebra-benches/benches/sw6/fr.rs
deleted file mode 100644
index caaff5d81..000000000
--- a/algebra-benches/benches/sw6/fr.rs
+++ /dev/null
@@ -1,291 +0,0 @@
-use algebra::UniformRand;
-use rand::SeedableRng;
-use rand_xorshift::XorShiftRng;
-
-use algebra::{
-    biginteger::{BigInteger, BigInteger384 as FrRepr},
-    sw6::Fr,
-    Field, PrimeField, SquareRootField,
-};
-use std::ops::{AddAssign, MulAssign, SubAssign};
-
-#[bench]
-fn bench_fr_repr_add_nocarry(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FrRepr, FrRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp1 = FrRepr::rand(&mut rng);
-            let mut tmp2 = FrRepr::rand(&mut rng);
-            // Shave a few bits off to avoid overflow.
-            for _ in 0..3 {
-                tmp1.div2();
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_nocarry(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_sub_noborrow(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(FrRepr, FrRepr)> = (0..SAMPLES)
-        .map(|_| {
-            let tmp1 = FrRepr::rand(&mut rng);
-            let mut tmp2 = tmp1;
-            // Ensure tmp2 is smaller than tmp1.
-            for _ in 0..10 {
-                tmp2.div2();
-            }
-            (tmp1, tmp2)
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_noborrow(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_num_bits(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES).map(|_| FrRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let tmp = v[count].num_bits();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_mul2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES).map(|_| FrRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.mul2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_repr_div2(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES).map(|_| FrRepr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.div2();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_add_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fr, Fr)> = (0..SAMPLES)
-        .map(|_| (Fr::rand(&mut rng), Fr::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.add_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_sub_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fr, Fr)> = (0..SAMPLES)
-        .map(|_| (Fr::rand(&mut rng), Fr::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.sub_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_mul_assign(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<(Fr, Fr)> = (0..SAMPLES)
-        .map(|_| (Fr::rand(&mut rng), Fr::rand(&mut rng)))
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count].0;
-        tmp.mul_assign(&v[count].1);
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_double(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.double_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_square(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp.square_in_place();
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_inverse(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].inverse()
-    });
-}
-
-#[bench]
-fn bench_fr_negate(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        let mut tmp = v[count];
-        tmp = -tmp;
-        count = (count + 1) % SAMPLES;
-        tmp
-    });
-}
-
-#[bench]
-fn bench_fr_sqrt(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES)
-        .map(|_| {
-            let mut tmp = Fr::rand(&mut rng);
-            tmp.square_in_place();
-            tmp
-        })
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].sqrt()
-    });
-}
-
-#[bench]
-fn bench_fr_into_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<Fr> = (0..SAMPLES).map(|_| Fr::rand(&mut rng)).collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        v[count].into_repr()
-    });
-}
-
-#[bench]
-fn bench_fr_from_repr(b: &mut ::test::Bencher) {
-    const SAMPLES: usize = 1000;
-
-    let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-    let v: Vec<FrRepr> = (0..SAMPLES)
-        .map(|_| Fr::rand(&mut rng).into_repr())
-        .collect();
-
-    let mut count = 0;
-    b.iter(|| {
-        count = (count + 1) % SAMPLES;
-        Fr::from_repr(v[count])
-    });
-}
diff --git a/algebra-benches/benches/sw6/mod.rs b/algebra-benches/benches/sw6/mod.rs
deleted file mode 100644
index 66eed9a18..000000000
--- a/algebra-benches/benches/sw6/mod.rs
+++ /dev/null
@@ -1,6 +0,0 @@
-mod ec;
-mod fq;
-mod fq3;
-mod fq6;
-mod fr;
-mod pairing;
diff --git a/algebra-benches/benches/sw6/pairing.rs b/algebra-benches/benches/sw6/pairing.rs
deleted file mode 100644
index 76bedb218..000000000
--- a/algebra-benches/benches/sw6/pairing.rs
+++ /dev/null
@@ -1,73 +0,0 @@
-mod pairing {
-    use algebra::{
-        sw6::{Fq6, G1Affine, G1Projective as G1, G2Affine, G2Projective as G2, SW6},
-        PairingEngine, UniformRand,
-    };
-    use rand::SeedableRng;
-    use rand_xorshift::XorShiftRng;
-
-    #[bench]
-    fn bench_pairing_miller_loop(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1Affine, G2Affine)> = (0..SAMPLES)
-            .map(|_| {
-                (
-                    G1Affine::from(G1::rand(&mut rng)).into(),
-                    G2Affine::from(G2::rand(&mut rng)).into(),
-                )
-            })
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let tmp = SW6::miller_loop(&[(v[count].0.clone(), v[count].1.clone())]);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_pairing_final_exponentiation(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<Fq6> = (0..SAMPLES)
-            .map(|_| {
-                (
-                    G1Affine::from(G1::rand(&mut rng)).into(),
-                    G2Affine::from(G2::rand(&mut rng)).into(),
-                )
-            })
-            .map(|(p, q)| SW6::miller_loop(&[(p, q)]))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let tmp = SW6::final_exponentiation(&v[count]);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-
-    #[bench]
-    fn bench_pairing_full(b: &mut ::test::Bencher) {
-        const SAMPLES: usize = 1000;
-
-        let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
-
-        let v: Vec<(G1, G2)> = (0..SAMPLES)
-            .map(|_| (G1::rand(&mut rng), G2::rand(&mut rng)))
-            .collect();
-
-        let mut count = 0;
-        b.iter(|| {
-            let tmp = SW6::pairing(v[count].0, v[count].1);
-            count = (count + 1) % SAMPLES;
-            tmp
-        });
-    }
-}
diff --git a/algebra-benches/src/bls12_377.rs b/algebra-benches/src/bls12_377.rs
new file mode 100644
index 000000000..ab8e006d0
--- /dev/null
+++ b/algebra-benches/src/bls12_377.rs
@@ -0,0 +1,17 @@
+use rand::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::ops::{AddAssign, MulAssign, SubAssign};
+
+use algebra::{bls12_377::{Fq12, fq2::Fq2, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
+    Bls12_377, G2Affine, G2Projective as G2, Parameters},
+    PairingEngine,
+    BigInteger, Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
+    biginteger::{BigInteger384 as FqRepr, BigInteger256 as FrRepr},
+    bls12::{G1Prepared, G2Prepared},};
+
+ec_bench!();
+f_bench!(1, Fq2, Fq2, fq2);
+f_bench!(2, Fq12, Fq12, fq12);
+f_bench!(Fq, Fq, FqRepr, FqRepr, fq);
+f_bench!(Fr, Fr, FrRepr, FrRepr, fr);
+pairing_bench!(Bls12_377, Fq12, prepared_v);
diff --git a/algebra-benches/src/bls12_381.rs b/algebra-benches/src/bls12_381.rs
new file mode 100644
index 000000000..431e3678a
--- /dev/null
+++ b/algebra-benches/src/bls12_381.rs
@@ -0,0 +1,17 @@
+use rand::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::ops::{AddAssign, MulAssign, SubAssign};
+
+use algebra::{bls12_381::{Fq12, fq2::Fq2, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
+    Bls12_381, G2Affine, G2Projective as G2, Parameters},
+    PairingEngine,
+    BigInteger, Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
+    biginteger::{BigInteger384 as FqRepr, BigInteger256 as FrRepr},
+    bls12::{G1Prepared, G2Prepared},};
+
+ec_bench!();
+f_bench!(1, Fq2, Fq2, fq2);
+f_bench!(2, Fq12, Fq12, fq12);
+f_bench!(Fq, Fq, FqRepr, FqRepr, fq);
+f_bench!(Fr, Fr, FrRepr, FrRepr, fr);
+pairing_bench!(Bls12_381, Fq12, prepared_v);
diff --git a/algebra-benches/src/lib.rs b/algebra-benches/src/lib.rs
index 8b1378917..90579f2f7 100644
--- a/algebra-benches/src/lib.rs
+++ b/algebra-benches/src/lib.rs
@@ -1 +1,10 @@
+#![feature(test)]
 
+extern crate test;
+
+#[macro_use]
+pub mod macros;
+
+mod bls12_377;
+mod bls12_381;
+mod sw6;
diff --git a/algebra-benches/src/macros/ec.rs b/algebra-benches/src/macros/ec.rs
new file mode 100644
index 000000000..f9a2188a4
--- /dev/null
+++ b/algebra-benches/src/macros/ec.rs
@@ -0,0 +1,167 @@
+macro_rules! ec_bench {
+    () => {
+        #[bench]
+        fn bench_g1_rand(b: &mut ::test::Bencher) {
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+            b.iter(|| G1::rand(&mut rng));
+        }
+
+        #[bench]
+        fn bench_g1_mul_assign(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<(G1, Fr)> = (0..SAMPLES)
+                .map(|_| (G1::rand(&mut rng), Fr::rand(&mut rng)))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let mut tmp = v[count].0;
+                tmp *= v[count].1;
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+
+        #[bench]
+        fn bench_g1_add_assign(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<(G1, G1)> = (0..SAMPLES)
+                .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng)))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let mut tmp = v[count].0;
+                tmp_dot_func!(tmp, v, add_assign, count);
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+
+        #[bench]
+        fn bench_g1_add_assign_mixed(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<(G1, G1Affine)> = (0..SAMPLES)
+                .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng).into()))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let mut tmp = v[count].0;
+                tmp.add_assign_mixed(&v[count].1);
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+
+        #[bench]
+        fn bench_g1_double(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<(G1, G1)> = (0..SAMPLES)
+                .map(|_| (G1::rand(&mut rng), G1::rand(&mut rng)))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let mut tmp = v[count].0;
+                tmp.double_in_place();
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+
+        #[bench]
+        fn bench_g2_rand(b: &mut ::test::Bencher) {
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+            b.iter(|| G2::rand(&mut rng));
+        }
+
+        #[bench]
+        fn bench_g2_mul_assign(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<(G2, Fr)> = (0..SAMPLES)
+                .map(|_| (G2::rand(&mut rng), Fr::rand(&mut rng)))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let mut tmp = v[count].0;
+                tmp *= v[count].1;
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+
+        #[bench]
+        fn bench_g2_add_assign(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<(G2, G2)> = (0..SAMPLES)
+                .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng)))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let mut tmp = v[count].0;
+                tmp.add_assign(&v[count].1);
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+
+        #[bench]
+        fn bench_g2_add_assign_mixed(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<(G2, G2Affine)> = (0..SAMPLES)
+                .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng).into()))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let mut tmp = v[count].0;
+                tmp.add_assign_mixed(&v[count].1);
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+
+        #[bench]
+        fn bench_g2_double(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<(G2, G2)> = (0..SAMPLES)
+                .map(|_| (G2::rand(&mut rng), G2::rand(&mut rng)))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let mut tmp = v[count].0;
+                tmp.double_in_place();
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+    }
+}
diff --git a/algebra-benches/src/macros/field.rs b/algebra-benches/src/macros/field.rs
new file mode 100644
index 000000000..196a928ac
--- /dev/null
+++ b/algebra-benches/src/macros/field.rs
@@ -0,0 +1,318 @@
+macro_rules! f_bench {
+    // Use this for base fields
+    ($f:ident, $f_type:ty, $f_repr:ident, $f_repr_type:ty, $field_ident:ident) => {
+        field_common!($f, $f_type, $field_ident);
+        sqrt!($f, $f_type, $field_ident);
+        field_base!($f, $f_type, $f_repr, $f_repr_type, $field_ident);
+    };
+    // use this for intermediate fields
+    (1, $f:ident, $f_type:ty, $field_ident:ident) => {
+        field_common!($f, $f_type, $field_ident);
+        sqrt!($f, $f_type, $field_ident);
+    };
+    // Use this for the full extension field Fqk
+    (2, $f:ident, $f_type:ty, $field_ident:ident) => {
+        field_common!($f, $f_type, $field_ident);
+    };
+}
+
+macro_rules! field_common {
+    ($f:ident, $f_type:ty, $field_ident:ident) => {
+        paste::item! {
+            #[bench]
+            fn [<bench_ $field_ident _add_assign>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<($f_type, $f_type)> = (0..SAMPLES)
+                    .map(|_| ($f::rand(&mut rng), $f::rand(&mut rng)))
+                    .collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count].0;
+                    tmp.add_assign(&v[count].1);
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _sub_assign>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<($f_type, $f_type)> = (0..SAMPLES)
+                    .map(|_| ($f::rand(&mut rng), $f::rand(&mut rng)))
+                    .collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count].0;
+                    tmp.sub_assign(&v[count].1);
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _mul_assign>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<($f_type, $f_type)> = (0..SAMPLES)
+                    .map(|_| ($f::rand(&mut rng), $f::rand(&mut rng)))
+                    .collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count].0;
+                    tmp.mul_assign(&v[count].1);
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _double>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_type> = (0..SAMPLES).map(|_| $f::rand(&mut rng)).collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count];
+                    tmp.double_in_place();
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _square>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_type> = (0..SAMPLES).map(|_| $f::rand(&mut rng)).collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count];
+                    tmp.square_in_place();
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _inverse>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_type> = (0..SAMPLES).map(|_| $f::rand(&mut rng)).collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let tmp = v[count].inverse();
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+        }
+    }
+}
+
+
+macro_rules! sqrt {
+    ($f:ident, $f_type:ty, $field_ident:ident) => {
+        paste::item! {
+            #[bench]
+            fn [<bench_ $field_ident _sqrt>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_type> = (0..SAMPLES)
+                    .map(|_| {
+                        let mut tmp = $f::rand(&mut rng);
+                        tmp.square_in_place();
+                        tmp
+                    })
+                    .collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    count = (count + 1) % SAMPLES;
+                    v[count].sqrt()
+                });
+            }
+        }
+    }
+}
+
+macro_rules! field_base {
+    ($f:ident, $f_type:ty, $f_repr:ident, $f_repr_type:ty, $field_ident:ident) => {
+            paste::item! {
+            #[bench]
+            fn [<bench_ $field_ident _repr_add_nocarry>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<($f_repr_type, $f_repr_type)> = (0..SAMPLES)
+                    .map(|_| {
+                        let mut tmp1 = $f_repr::rand(&mut rng);
+                        let mut tmp2 = $f_repr::rand(&mut rng);
+                        // Shave a few bits off to avoid overflow.
+                        for _ in 0..3 {
+                            tmp1.div2();
+                            tmp2.div2();
+                        }
+                        (tmp1, tmp2)
+                    })
+                    .collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count].0;
+                    tmp.add_nocarry(&v[count].1);
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _repr_sub_noborrow>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<($f_repr_type, $f_repr_type)> = (0..SAMPLES)
+                    .map(|_| {
+                        let tmp1 = $f_repr::rand(&mut rng);
+                        let mut tmp2 = tmp1;
+                        // Ensure tmp2 is smaller than tmp1.
+                        for _ in 0..10 {
+                            tmp2.div2();
+                        }
+                        (tmp1, tmp2)
+                    })
+                    .collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count].0;
+                    tmp.sub_noborrow(&v[count].1);
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _repr_num_bits>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_repr_type> = (0..SAMPLES).map(|_| $f_repr::rand(&mut rng)).collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let tmp = v[count].num_bits();
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _repr_mul2>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_repr_type> = (0..SAMPLES).map(|_| $f_repr::rand(&mut rng)).collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count];
+                    tmp.mul2();
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _repr_div2>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_repr_type> = (0..SAMPLES).map(|_| $f_repr::rand(&mut rng)).collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count];
+                    tmp.div2();
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _negate>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_type> = (0..SAMPLES).map(|_| $f::rand(&mut rng)).collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    let mut tmp = v[count];
+                    tmp = -tmp;
+                    count = (count + 1) % SAMPLES;
+                    tmp
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _into_repr>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_type> = (0..SAMPLES).map(|_| $f::rand(&mut rng)).collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    count = (count + 1) % SAMPLES;
+                    v[count].into_repr()
+                });
+            }
+
+            #[bench]
+            fn [<bench_ $field_ident _from_repr>](b: &mut ::test::Bencher) {
+                const SAMPLES: usize = 1000;
+
+                let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+                let v: Vec<$f_repr_type> = (0..SAMPLES)
+                    .map(|_| $f::rand(&mut rng).into_repr())
+                    .collect();
+
+                let mut count = 0;
+                b.iter(|| {
+                    count = (count + 1) % SAMPLES;
+                    $f::from_repr(v[count])
+                });
+            }
+        }
+    }
+}
diff --git a/algebra-benches/src/macros/mod.rs b/algebra-benches/src/macros/mod.rs
new file mode 100644
index 000000000..60f85138b
--- /dev/null
+++ b/algebra-benches/src/macros/mod.rs
@@ -0,0 +1,15 @@
+#[macro_use]
+mod ec;
+use ec::*;
+
+#[macro_use]
+mod field;
+use field::*;
+
+#[macro_use]
+mod pairing;
+use pairing::*;
+
+#[macro_use]
+mod utils;
+use utils::*;
diff --git a/algebra-benches/src/macros/pairing.rs b/algebra-benches/src/macros/pairing.rs
new file mode 100644
index 000000000..0709cdca2
--- /dev/null
+++ b/algebra-benches/src/macros/pairing.rs
@@ -0,0 +1,61 @@
+macro_rules! pairing_bench {
+    ($curve:ident, $pairing_field:ident, $pairing_type:ident) => {
+        #[bench]
+        fn bench_pairing_miller_loop(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            $pairing_type!(v, rng);
+
+            let mut count = 0;
+            b.iter(|| {
+                let tmp = $curve::miller_loop(&[(v[count].0.clone(), v[count].1.clone())]);
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+
+        #[bench]
+        fn bench_pairing_final_exponentiation(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<$pairing_field> = (0..SAMPLES)
+                .map(|_| {
+                    (
+                        G1Affine::from(G1::rand(&mut rng)).into(),
+                        G2Affine::from(G2::rand(&mut rng)).into(),
+                    )
+                })
+                .map(|(p, q)| $curve::miller_loop(&[(p, q)]))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let tmp = $curve::final_exponentiation(&v[count]);
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+
+        #[bench]
+        fn bench_pairing_full(b: &mut ::test::Bencher) {
+            const SAMPLES: usize = 1000;
+
+            let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
+
+            let v: Vec<(G1, G2)> = (0..SAMPLES)
+                .map(|_| (G1::rand(&mut rng), G2::rand(&mut rng)))
+                .collect();
+
+            let mut count = 0;
+            b.iter(|| {
+                let tmp = $curve::pairing(v[count].0, v[count].1);
+                count = (count + 1) % SAMPLES;
+                tmp
+            });
+        }
+    }
+}
diff --git a/algebra-benches/src/macros/utils.rs b/algebra-benches/src/macros/utils.rs
new file mode 100644
index 000000000..80ba1ac4f
--- /dev/null
+++ b/algebra-benches/src/macros/utils.rs
@@ -0,0 +1,36 @@
+const N: usize = 1000;
+
+macro_rules! tmp_dot_func {
+    ($tmp:ident, $v:ident, $func:ident, $count:ident) => {
+        #[cfg(not(feature = "n_fold"))]
+        $tmp.$func(&$v[$count].1);
+        #[cfg(feature = "n_fold")]
+        for _ in 0..N { $tmp.$func(&$v[$count].1); }
+    }
+}
+
+macro_rules! prepared_v {
+    ($v:ident, $rng:ident) => {
+        let $v: Vec<(G1Prepared<Parameters>, G2Prepared<Parameters>)> = (0..SAMPLES)
+            .map(|_| {
+                (
+                    G1Affine::from(G1::rand(&mut $rng)).into(),
+                    G2Affine::from(G2::rand(&mut $rng)).into(),
+                )
+            })
+            .collect();
+    }
+}
+
+macro_rules! affine_v {
+    ($v:ident, $rng:ident) => {
+        let $v: Vec<(G1Affine, G2Affine)> = (0..SAMPLES)
+            .map(|_| {
+                (
+                    G1Affine::from(G1::rand(&mut $rng)).into(),
+                    G2Affine::from(G2::rand(&mut $rng)).into(),
+                )
+            })
+            .collect();
+    }
+}
diff --git a/algebra-benches/src/sw6.rs b/algebra-benches/src/sw6.rs
new file mode 100644
index 000000000..10b4d3c71
--- /dev/null
+++ b/algebra-benches/src/sw6.rs
@@ -0,0 +1,17 @@
+use rand::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::ops::{AddAssign, MulAssign, SubAssign};
+
+use algebra::{sw6::{Fq6, fq3::Fq3, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
+    SW6, G2Affine, G2Projective as G2},
+    PairingEngine,
+    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
+    BigInteger, biginteger::{BigInteger832 as FqRepr, BigInteger384 as FrRepr}
+    };
+
+ec_bench!();
+f_bench!(1, Fq3, Fq3, fq3);
+f_bench!(2, Fq6, Fq6, fq6);
+f_bench!(Fq, Fq, FqRepr, FqRepr, fq);
+f_bench!(Fr, Fr, FrRepr, FrRepr, fr);
+pairing_bench!(SW6, Fq6, affine_v);

From c6265d59054bf4872e426f12ec0100ee44d24e75 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 22:05:47 +0800
Subject: [PATCH 17/43] feature = n_fold

---
 algebra-benches/src/macros/ec.rs    | 12 ++++++------
 algebra-benches/src/macros/field.rs | 18 +++++++++---------
 algebra-benches/src/macros/mod.rs   |  4 ----
 algebra-benches/src/macros/utils.rs | 20 +++++++++++++++-----
 4 files changed, 30 insertions(+), 24 deletions(-)

diff --git a/algebra-benches/src/macros/ec.rs b/algebra-benches/src/macros/ec.rs
index f9a2188a4..a6a78c2e3 100644
--- a/algebra-benches/src/macros/ec.rs
+++ b/algebra-benches/src/macros/ec.rs
@@ -38,7 +38,7 @@ macro_rules! ec_bench {
             let mut count = 0;
             b.iter(|| {
                 let mut tmp = v[count].0;
-                tmp_dot_func!(tmp, v, add_assign, count);
+                n_fold!(tmp, v, add_assign, count);
                 count = (count + 1) % SAMPLES;
                 tmp
             });
@@ -57,7 +57,7 @@ macro_rules! ec_bench {
             let mut count = 0;
             b.iter(|| {
                 let mut tmp = v[count].0;
-                tmp.add_assign_mixed(&v[count].1);
+                n_fold!(tmp, v, add_assign_mixed, count);
                 count = (count + 1) % SAMPLES;
                 tmp
             });
@@ -76,7 +76,7 @@ macro_rules! ec_bench {
             let mut count = 0;
             b.iter(|| {
                 let mut tmp = v[count].0;
-                tmp.double_in_place();
+                n_fold!(tmp, double_in_place);
                 count = (count + 1) % SAMPLES;
                 tmp
             });
@@ -120,7 +120,7 @@ macro_rules! ec_bench {
             let mut count = 0;
             b.iter(|| {
                 let mut tmp = v[count].0;
-                tmp.add_assign(&v[count].1);
+                n_fold!(tmp, v, add_assign, count);
                 count = (count + 1) % SAMPLES;
                 tmp
             });
@@ -139,7 +139,7 @@ macro_rules! ec_bench {
             let mut count = 0;
             b.iter(|| {
                 let mut tmp = v[count].0;
-                tmp.add_assign_mixed(&v[count].1);
+                n_fold!(tmp, v, add_assign_mixed, count);
                 count = (count + 1) % SAMPLES;
                 tmp
             });
@@ -158,7 +158,7 @@ macro_rules! ec_bench {
             let mut count = 0;
             b.iter(|| {
                 let mut tmp = v[count].0;
-                tmp.double_in_place();
+                n_fold!(tmp, double_in_place);
                 count = (count + 1) % SAMPLES;
                 tmp
             });
diff --git a/algebra-benches/src/macros/field.rs b/algebra-benches/src/macros/field.rs
index 196a928ac..7d484d155 100644
--- a/algebra-benches/src/macros/field.rs
+++ b/algebra-benches/src/macros/field.rs
@@ -32,7 +32,7 @@ macro_rules! field_common {
                 let mut count = 0;
                 b.iter(|| {
                     let mut tmp = v[count].0;
-                    tmp.add_assign(&v[count].1);
+                    n_fold!(tmp, v, add_assign, count);
                     count = (count + 1) % SAMPLES;
                     tmp
                 });
@@ -51,7 +51,7 @@ macro_rules! field_common {
                 let mut count = 0;
                 b.iter(|| {
                     let mut tmp = v[count].0;
-                    tmp.sub_assign(&v[count].1);
+                    n_fold!(tmp, v, sub_assign, count);
                     count = (count + 1) % SAMPLES;
                     tmp
                 });
@@ -70,7 +70,7 @@ macro_rules! field_common {
                 let mut count = 0;
                 b.iter(|| {
                     let mut tmp = v[count].0;
-                    tmp.mul_assign(&v[count].1);
+                    n_fold!(tmp, v, mul_assign, count);
                     count = (count + 1) % SAMPLES;
                     tmp
                 });
@@ -87,7 +87,7 @@ macro_rules! field_common {
                 let mut count = 0;
                 b.iter(|| {
                     let mut tmp = v[count];
-                    tmp.double_in_place();
+                    n_fold!(tmp, double_in_place);
                     count = (count + 1) % SAMPLES;
                     tmp
                 });
@@ -104,7 +104,7 @@ macro_rules! field_common {
                 let mut count = 0;
                 b.iter(|| {
                     let mut tmp = v[count];
-                    tmp.square_in_place();
+                    n_fold!(tmp, square_in_place);
                     count = (count + 1) % SAMPLES;
                     tmp
                 });
@@ -182,7 +182,7 @@ macro_rules! field_base {
                 let mut count = 0;
                 b.iter(|| {
                     let mut tmp = v[count].0;
-                    tmp.add_nocarry(&v[count].1);
+                    n_fold!(tmp, v, add_nocarry, count);
                     count = (count + 1) % SAMPLES;
                     tmp
                 });
@@ -209,7 +209,7 @@ macro_rules! field_base {
                 let mut count = 0;
                 b.iter(|| {
                     let mut tmp = v[count].0;
-                    tmp.sub_noborrow(&v[count].1);
+                    n_fold!(tmp, v, sub_noborrow, count);
                     count = (count + 1) % SAMPLES;
                     tmp
                 });
@@ -242,7 +242,7 @@ macro_rules! field_base {
                 let mut count = 0;
                 b.iter(|| {
                     let mut tmp = v[count];
-                    tmp.mul2();
+                    n_fold!(tmp, mul2);
                     count = (count + 1) % SAMPLES;
                     tmp
                 });
@@ -259,7 +259,7 @@ macro_rules! field_base {
                 let mut count = 0;
                 b.iter(|| {
                     let mut tmp = v[count];
-                    tmp.div2();
+                    n_fold!(tmp, div2);
                     count = (count + 1) % SAMPLES;
                     tmp
                 });
diff --git a/algebra-benches/src/macros/mod.rs b/algebra-benches/src/macros/mod.rs
index 60f85138b..5c936a240 100644
--- a/algebra-benches/src/macros/mod.rs
+++ b/algebra-benches/src/macros/mod.rs
@@ -1,15 +1,11 @@
 #[macro_use]
 mod ec;
-use ec::*;
 
 #[macro_use]
 mod field;
-use field::*;
 
 #[macro_use]
 mod pairing;
-use pairing::*;
 
 #[macro_use]
 mod utils;
-use utils::*;
diff --git a/algebra-benches/src/macros/utils.rs b/algebra-benches/src/macros/utils.rs
index 80ba1ac4f..e86e48c23 100644
--- a/algebra-benches/src/macros/utils.rs
+++ b/algebra-benches/src/macros/utils.rs
@@ -1,14 +1,24 @@
-const N: usize = 1000;
-
-macro_rules! tmp_dot_func {
+macro_rules! n_fold {
     ($tmp:ident, $v:ident, $func:ident, $count:ident) => {
+        const ITERS: usize = 1000;
+
         #[cfg(not(feature = "n_fold"))]
         $tmp.$func(&$v[$count].1);
         #[cfg(feature = "n_fold")]
-        for _ in 0..N { $tmp.$func(&$v[$count].1); }
-    }
+        for _ in 0..ITERS { $tmp.$func(&$v[$count].1); }
+    };
+
+    ($tmp:ident, $func:ident) => {
+        const ITERS: usize = 1000;
+
+        #[cfg(not(feature = "n_fold"))]
+        $tmp.$func();
+        #[cfg(feature = "n_fold")]
+        for _ in 0..ITERS { $tmp.$func(); }
+    };
 }
 
+
 macro_rules! prepared_v {
     ($v:ident, $rng:ident) => {
         let $v: Vec<(G1Prepared<Parameters>, G2Prepared<Parameters>)> = (0..SAMPLES)

From cfc3975e76023240570781708b173fd5dd3533d7 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 22:27:03 +0800
Subject: [PATCH 18/43] cleanup/refactor fields

---
 algebra-core/src/fields/arithmetic.rs    | 473 +++++++++++++++++++++
 algebra-core/src/fields/macros.rs        | 501 +----------------------
 algebra-core/src/fields/mod.rs           |   5 +-
 algebra-core/src/fields/models/fp_256.rs |  18 -
 algebra-core/src/fields/models/fp_320.rs |  18 -
 algebra-core/src/fields/models/fp_384.rs |  18 -
 algebra-core/src/fields/models/fp_768.rs |  18 -
 algebra-core/src/fields/models/fp_832.rs |  17 -
 algebra-core/src/fields/models/mod.rs    |  43 +-
 9 files changed, 517 insertions(+), 594 deletions(-)
 create mode 100644 algebra-core/src/fields/arithmetic.rs
 delete mode 100644 algebra-core/src/fields/models/fp_256.rs
 delete mode 100644 algebra-core/src/fields/models/fp_320.rs
 delete mode 100644 algebra-core/src/fields/models/fp_384.rs
 delete mode 100644 algebra-core/src/fields/models/fp_768.rs
 delete mode 100644 algebra-core/src/fields/models/fp_832.rs

diff --git a/algebra-core/src/fields/arithmetic.rs b/algebra-core/src/fields/arithmetic.rs
new file mode 100644
index 000000000..b7a98bbb6
--- /dev/null
+++ b/algebra-core/src/fields/arithmetic.rs
@@ -0,0 +1,473 @@
+
+/// This modular multiplication algorithm uses Montgomery
+/// reduction for efficient implementation. It also additionally
+/// uses the "no-carry optimization" outlined
+/// [here](https://hackmd.io/@zkteam/modular_multiplication) if
+/// `P::MODULUS` has (a) a non-zero MSB, and (b) at least one
+/// zero bit in the rest of the modulus.
+macro_rules! impl_field_mul_assign {
+    ($limbs:expr) => {
+        #[inline]
+        #[unroll_for_loops]
+        fn mul_assign(&mut self, other: &Self) {
+            // Checking the modulus at compile time
+            let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0;
+            let mut all_bits_set = P::MODULUS.0[$limbs - 1] == !0 - (1 << 63);
+            for i in 1..$limbs {
+                all_bits_set &= P::MODULUS.0[$limbs - i - 1] == !0u64;
+            }
+            let _no_carry: bool = !(first_bit_set || all_bits_set);
+
+            // No-carry optimisation applied to CIOS
+            if _no_carry {
+                #[cfg(all(feature = "asm", target_feature="bmi2",
+                target_feature="adx", target_arch = "x86_64"))]
+                if $limbs <= 6
+                {
+                    asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
+                    self.reduce();
+                    return;
+                }
+                let mut r = [0u64; $limbs];
+                let mut carry1 = 0u64;
+                let mut carry2 = 0u64;
+
+                for i in 0..$limbs {
+                    r[0] = fa::mac(r[0], (self.0).0[0], (other.0).0[i], &mut carry1);
+                    let k = r[0].wrapping_mul(P::INV);
+                    fa::mac_discard(r[0], k, P::MODULUS.0[0], &mut carry2);
+                    for j in 1..$limbs {
+                        r[j] = fa::mac_with_carry(r[j], (self.0).0[j], (other.0).0[i], &mut carry1);
+                        r[j - 1] = fa::mac_with_carry(r[j], k, P::MODULUS.0[j], &mut carry2);
+                    }
+                    r[$limbs - 1] = carry1 + carry2;
+                }
+                (self.0).0 = r;
+                self.reduce();
+            // Alternative implementation
+            } else {
+                let mut r = [0u64; $limbs * 2];
+
+                for i in 0..$limbs {
+                    let mut carry = 0;
+                    for j in 0..$limbs {
+                        r[j + i] =
+                            fa::mac_with_carry(r[j + i], (self.0).0[i], (other.0).0[j], &mut carry);
+                    }
+                    r[$limbs + i] = carry;
+                }
+                // Montgomery reduction
+                let mut _carry2 = 0;
+                for i in 0..$limbs {
+                    let k = r[i].wrapping_mul(P::INV);
+                    let mut carry = 0;
+                    fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
+                    for j in 1..$limbs {
+                        r[j + i] = fa::mac_with_carry(r[j + i], k, P::MODULUS.0[j], &mut carry);
+                    }
+                    r[$limbs + i] = fa::adc(r[$limbs + i], _carry2, &mut carry);
+                    _carry2 = carry;
+                }
+                (self.0).0.copy_from_slice(&r[$limbs..]);
+                self.reduce();
+            }
+        }
+    };
+}
+
+macro_rules! impl_field_into_repr {
+    ($limbs:expr, $BigIntegerType:ty) => {
+        #[inline]
+        #[unroll_for_loops]
+        fn into_repr(&self) -> $BigIntegerType {
+            let mut tmp = self.0;
+            let mut r = tmp.0;
+            // Montgomery Reduction
+            for i in 0..$limbs {
+                let k = r[i].wrapping_mul(P::INV);
+                let mut carry = 0;
+
+                fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
+                for j in 1..$limbs {
+                    r[(j + i) % $limbs] =
+                        fa::mac_with_carry(r[(j + i) % $limbs], k, P::MODULUS.0[j], &mut carry);
+                }
+                r[i % $limbs] = carry;
+            }
+            tmp.0 = r;
+            tmp
+        }
+    };
+}
+
+macro_rules! impl_field_square_in_place {
+    ($limbs: expr) => {
+        #[inline]
+        #[unroll_for_loops]
+        fn square_in_place(&mut self) -> &mut Self {
+            // Checking the modulus at compile time
+            let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0;
+            let mut all_bits_set = P::MODULUS.0[$limbs - 1] == !0 - (1 << 63);
+            for i in 1..$limbs {
+                all_bits_set &= P::MODULUS.0[$limbs - i - 1] == !0u64;
+            }
+            let _no_carry: bool = !(first_bit_set || all_bits_set);
+
+            #[cfg(all(feature = "asm", target_feature="bmi2",
+            target_feature="adx", target_arch = "x86_64"))]
+            if $limbs <= 6 && _no_carry
+            {
+                asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
+                self.reduce();
+                return self;
+            }
+            let mut r = [0u64; $limbs*2];
+
+            let mut carry = 0;
+            for i in 0..$limbs {
+                if i < $limbs-1 {
+                    for j in 0..$limbs {
+                        if j >= (i+1) { r[i+j] = fa::mac_with_carry(r[i+j], (self.0).0[i], (self.0).0[j], &mut carry); }
+                    }
+                    r[$limbs+i] = carry;
+                    carry = 0;
+                }
+            }
+            r[$limbs*2-1] = r[$limbs*2-2] >> 63;
+            for i in 0..$limbs { r[$limbs*2-2-i] = (r[$limbs*2-2-i] << 1) | (r[$limbs*2-3-i] >> 63); }
+            for i in 3..$limbs { r[$limbs+1-i] = (r[$limbs+1-i] << 1) | (r[$limbs-i] >> 63); }
+            r[1] = r[1] << 1;
+
+            for i in 0..$limbs {
+                r[2*i] = fa::mac_with_carry(r[2*i], (self.0).0[i], (self.0).0[i], &mut carry);
+                r[2*i+1] = fa::adc(r[2*i+1], 0, &mut carry);
+            }
+            // Montgomery reduction
+            let mut _carry2 = 0;
+            for i in 0..$limbs {
+                let k = r[i].wrapping_mul(P::INV);
+                let mut carry = 0;
+                fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
+                for j in 1..$limbs {
+                    r[j+i] = fa::mac_with_carry(r[j+i], k, P::MODULUS.0[j], &mut carry);
+                }
+                r[$limbs+i] = fa::adc(r[$limbs+i], _carry2, &mut carry);
+                _carry2 = carry;
+            }
+            (self.0).0.copy_from_slice(&r[$limbs..]);
+            self.reduce();
+            self
+        }
+    };
+}
+
+macro_rules! impl_field_bigint_conv {
+    ($field: ident, $bigint: ident, $params: ident) => {
+        impl<P: $params> Into<$bigint> for $field<P> {
+            fn into(self) -> $bigint {
+                self.into_repr()
+            }
+        }
+
+        impl<P: $params> From<$bigint> for $field<P> {
+            fn from(int: $bigint) -> Self {
+                Self::from_repr(int)
+            }
+        }
+    };
+}
+
+macro_rules! impl_prime_field_standard_sample {
+    ($field: ident, $params: ident) => {
+        impl<P: $params> rand::distributions::Distribution<$field<P>>
+            for rand::distributions::Standard
+        {
+            #[inline]
+            fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> $field<P> {
+                loop {
+                    let mut tmp = $field(rng.sample(rand::distributions::Standard), PhantomData);
+                    // Mask away the unused bits at the beginning.
+                    tmp.0
+                        .as_mut()
+                        .last_mut()
+                        .map(|val| *val &= core::u64::MAX >> P::REPR_SHAVE_BITS);
+
+                    if tmp.is_valid() {
+                        return tmp;
+                    }
+                }
+            }
+        }
+    };
+}
+
+macro_rules! impl_prime_field_from_int {
+    ($field: ident, u128, $params: ident) => {
+        impl<P: $params> From<u128> for $field<P> {
+            fn from(other: u128) -> Self {
+                let upper = (other >> 64) as u64;
+                let lower = ((other << 64) >> 64) as u64;
+                let mut default_int = P::BigInt::default();
+                default_int.0[0] = lower;
+                default_int.0[1] = upper;
+                Self::from_repr(default_int)
+            }
+        }
+    };
+    ($field: ident, $int: ident, $params: ident) => {
+        impl<P: $params> From<$int> for $field<P> {
+            fn from(other: $int) -> Self {
+                Self::from_repr(P::BigInt::from(u64::from(other)))
+            }
+        }
+    };
+}
+
+macro_rules! sqrt_impl {
+    ($Self:ident, $P:tt, $self:expr) => {{
+        use crate::fields::LegendreSymbol::*;
+        // https://eprint.iacr.org/2012/685.pdf (page 12, algorithm 5)
+        // Actually this is just normal Tonelli-Shanks; since `P::Generator`
+        // is a quadratic non-residue, `P::ROOT_OF_UNITY = P::GENERATOR ^ t`
+        // is also a quadratic non-residue (since `t` is odd).
+        match $self.legendre() {
+            Zero => Some(*$self),
+            QuadraticNonResidue => None,
+            QuadraticResidue => {
+                let mut z = $Self::qnr_to_t();
+                let mut w = $self.pow($P::T_MINUS_ONE_DIV_TWO);
+                let mut x = w * $self;
+                let mut b = x * &w;
+
+                let mut v = $P::TWO_ADICITY as usize;
+                // t = self^t
+                #[cfg(debug_assertions)]
+                {
+                    let mut check = b;
+                    for _ in 0..(v - 1) {
+                        check.square_in_place();
+                    }
+                    if !check.is_one() {
+                        panic!("Input is not a square root, but it passed the QR test")
+                    }
+                }
+
+                while !b.is_one() {
+                    let mut k = 0usize;
+
+                    let mut b2k = b;
+                    while !b2k.is_one() {
+                        // invariant: b2k = b^(2^k) after entering this loop
+                        b2k.square_in_place();
+                        k += 1;
+                    }
+
+                    let j = v - k - 1;
+                    w = z;
+                    for _ in 0..j {
+                        w.square_in_place();
+                    }
+
+                    z = w.square();
+                    b *= &z;
+                    x *= &w;
+                    v = k;
+                }
+
+                Some(x)
+            }
+        }
+    }};
+}
+
+// Implements AddAssign on Self by deferring to an implementation on &Self
+#[macro_export]
+macro_rules! impl_additive_ops_from_ref {
+    ($type: ident, $params: ident) => {
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::Add<Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn add(self, other: Self) -> Self {
+                let mut result = self;
+                result.add_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::Add<&'a mut Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn add(self, other: &'a mut Self) -> Self {
+                let mut result = self;
+                result.add_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::Sub<Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn sub(self, other: Self) -> Self {
+                let mut result = self;
+                result.sub_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::Sub<&'a mut Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn sub(self, other: &'a mut Self) -> Self {
+                let mut result = self;
+                result.sub_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::iter::Sum<Self> for $type<P> {
+            fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Self::zero(), core::ops::Add::add)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::iter::Sum<&'a Self> for $type<P> {
+            fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Self::zero(), core::ops::Add::add)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::AddAssign<Self> for $type<P> {
+            fn add_assign(&mut self, other: Self) {
+                self.add_assign(&other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::SubAssign<Self> for $type<P> {
+            fn sub_assign(&mut self, other: Self) {
+                self.sub_assign(&other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::AddAssign<&'a mut Self> for $type<P> {
+            fn add_assign(&mut self, other: &'a mut Self) {
+                self.add_assign(&*other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::SubAssign<&'a mut Self> for $type<P> {
+            fn sub_assign(&mut self, other: &'a mut Self) {
+                self.sub_assign(&*other)
+            }
+        }
+    };
+}
+
+// Implements AddAssign on Self by deferring to an implementation on &Self
+#[macro_export]
+macro_rules! impl_multiplicative_ops_from_ref {
+    ($type: ident, $params: ident) => {
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::Mul<Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn mul(self, other: Self) -> Self {
+                let mut result = self;
+                result.mul_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::Div<Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn div(self, other: Self) -> Self {
+                let mut result = self;
+                result.div_assign(&other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::Mul<&'a mut Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn mul(self, other: &'a mut Self) -> Self {
+                let mut result = self;
+                result.mul_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::Div<&'a mut Self> for $type<P> {
+            type Output = Self;
+
+            #[inline]
+            fn div(self, other: &'a mut Self) -> Self {
+                let mut result = self;
+                result.div_assign(&*other);
+                result
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::iter::Product<Self> for $type<P> {
+            fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
+                iter.fold(Self::one(), core::ops::Mul::mul)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::iter::Product<&'a Self> for $type<P> {
+            fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
+                iter.fold(Self::one(), Mul::mul)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::MulAssign<Self> for $type<P> {
+            fn mul_assign(&mut self, other: Self) {
+                self.mul_assign(&other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::DivAssign<&'a mut Self> for $type<P> {
+            fn div_assign(&mut self, other: &'a mut Self) {
+                self.div_assign(&*other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<'a, P: $params> core::ops::MulAssign<&'a mut Self> for $type<P> {
+            fn mul_assign(&mut self, other: &'a mut Self) {
+                self.mul_assign(&*other)
+            }
+        }
+
+        #[allow(unused_qualifications)]
+        impl<P: $params> core::ops::DivAssign<Self> for $type<P> {
+            fn div_assign(&mut self, other: Self) {
+                self.div_assign(&other)
+            }
+        }
+    };
+}
diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index 5c670db26..2b98a7cb5 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -1,12 +1,6 @@
 macro_rules! impl_Fp {
-    ($Fp:ident, $FpParameters:ident, $limbs:expr) => {
-        pub trait $FpParameters: FpParameters<BigInt = BigInteger> {}
-
-        #[cfg(all(feature = "asm", target_arch = "x86_64", target_feature="bmi2", target_feature="adx"))]
-        use std::mem::MaybeUninit;
-
-        #[cfg(all(feature = "asm", target_arch = "x86_64", target_feature="bmi2", target_feature="adx"))]
-        include!(concat!(env!("OUT_DIR"), "/field_assembly.rs"));
+    ($Fp:ident, $FpParameters:ident, $BigInteger:ident, $BigIntegerType:ty, $limbs:expr) => {
+        pub trait $FpParameters: FpParameters<BigInt = $BigIntegerType> {}
 
         #[derive(Derivative)]
         #[derivative(
@@ -19,7 +13,7 @@ macro_rules! impl_Fp {
             Eq(bound = "")
         )]
         pub struct $Fp<P>(
-            pub BigInteger,
+            pub $BigIntegerType,
             #[derivative(Debug = "ignore")]
             #[doc(hidden)]
             pub PhantomData<P>,
@@ -27,7 +21,7 @@ macro_rules! impl_Fp {
 
         impl<P> $Fp<P> {
             #[inline]
-            pub const fn new(element: BigInteger) -> Self {
+            pub const fn new(element: $BigIntegerType) -> Self {
                 Self(element, PhantomData)
             }
         }
@@ -49,7 +43,7 @@ macro_rules! impl_Fp {
         impl<P: $FpParameters> Zero for $Fp<P> {
             #[inline]
             fn zero() -> Self {
-                $Fp::<P>(BigInteger::from(0), PhantomData)
+                $Fp::<P>($BigInteger::from(0), PhantomData)
             }
 
             #[inline]
@@ -111,7 +105,7 @@ macro_rules! impl_Fp {
                     // Cryptography
                     // Algorithm 16 (BEA for Inversion in Fp)
 
-                    let one = BigInteger::from(1);
+                    let one = $BigInteger::from(1);
 
                     let mut u = self.0;
                     let mut v = P::MODULUS;
@@ -175,10 +169,10 @@ macro_rules! impl_Fp {
 
         impl<P: $FpParameters> PrimeField for $Fp<P> {
             type Params = P;
-            type BigInt = BigInteger;
+            type BigInt = $BigIntegerType;
 
             #[inline]
-            fn from_repr(r: BigInteger) -> Self {
+            fn from_repr(r: $BigIntegerType) -> Self {
                 let mut r = $Fp(r, PhantomData);
                 if r.is_valid() {
                     r.mul_assign(&$Fp(P::R2, PhantomData));
@@ -188,7 +182,7 @@ macro_rules! impl_Fp {
                 }
             }
 
-            impl_field_into_repr!($limbs);
+            impl_field_into_repr!($limbs, $BigIntegerType);
 
             #[inline]
             fn from_random_bytes(bytes: &[u8]) -> Option<Self> {
@@ -196,7 +190,7 @@ macro_rules! impl_Fp {
                 for (result_byte, in_byte) in result_bytes.iter_mut().zip(bytes.iter()) {
                     *result_byte = *in_byte;
                 }
-                BigInteger::read(result_bytes.as_ref())
+                $BigInteger::read(result_bytes.as_ref())
                     .ok()
                     .and_then(|mut res| {
                         res.as_mut()[$limbs-1] &= 0xffffffffffffffff >> P::REPR_SHAVE_BITS;
@@ -281,7 +275,7 @@ macro_rules! impl_Fp {
         impl<P: $FpParameters> FromBytes for $Fp<P> {
             #[inline]
             fn read<R: Read>(reader: R) -> IoResult<Self> {
-                BigInteger::read(reader).map($Fp::from_repr)
+                $BigInteger::read(reader).map($Fp::from_repr)
             }
         }
 
@@ -436,476 +430,3 @@ macro_rules! impl_Fp {
         }
     }
 }
-
-/// This modular multiplication algorithm uses Montgomery
-/// reduction for efficient implementation. It also additionally
-/// uses the "no-carry optimization" outlined
-/// [here](https://hackmd.io/@zkteam/modular_multiplication) if
-/// `P::MODULUS` has (a) a non-zero MSB, and (b) at least one
-/// zero bit in the rest of the modulus.
-macro_rules! impl_field_mul_assign {
-    ($limbs:expr) => {
-        #[inline]
-        #[unroll_for_loops]
-        fn mul_assign(&mut self, other: &Self) {
-            // Checking the modulus at compile time
-            let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0;
-            let mut all_bits_set = P::MODULUS.0[$limbs - 1] == !0 - (1 << 63);
-            for i in 1..$limbs {
-                all_bits_set &= P::MODULUS.0[$limbs - i - 1] == !0u64;
-            }
-            let _no_carry: bool = !(first_bit_set || all_bits_set);
-
-            // No-carry optimisation applied to CIOS
-            if _no_carry {
-                #[cfg(all(feature = "asm", target_feature="bmi2",
-                target_feature="adx", target_arch = "x86_64"))]
-                if $limbs <= 6
-                {
-                    asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
-                    self.reduce();
-                    return;
-                }
-                let mut r = [0u64; $limbs];
-                let mut carry1 = 0u64;
-                let mut carry2 = 0u64;
-
-                for i in 0..$limbs {
-                    r[0] = fa::mac(r[0], (self.0).0[0], (other.0).0[i], &mut carry1);
-                    let k = r[0].wrapping_mul(P::INV);
-                    fa::mac_discard(r[0], k, P::MODULUS.0[0], &mut carry2);
-                    for j in 1..$limbs {
-                        r[j] = fa::mac_with_carry(r[j], (self.0).0[j], (other.0).0[i], &mut carry1);
-                        r[j - 1] = fa::mac_with_carry(r[j], k, P::MODULUS.0[j], &mut carry2);
-                    }
-                    r[$limbs - 1] = carry1 + carry2;
-                }
-                (self.0).0 = r;
-                self.reduce();
-            // Alternative implementation
-            } else {
-                let mut r = [0u64; $limbs * 2];
-
-                for i in 0..$limbs {
-                    let mut carry = 0;
-                    for j in 0..$limbs {
-                        r[j + i] =
-                            fa::mac_with_carry(r[j + i], (self.0).0[i], (other.0).0[j], &mut carry);
-                    }
-                    r[$limbs + i] = carry;
-                }
-                // Montgomery reduction
-                let mut _carry2 = 0;
-                for i in 0..$limbs {
-                    let k = r[i].wrapping_mul(P::INV);
-                    let mut carry = 0;
-                    fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
-                    for j in 1..$limbs {
-                        r[j + i] = fa::mac_with_carry(r[j + i], k, P::MODULUS.0[j], &mut carry);
-                    }
-                    r[$limbs + i] = fa::adc(r[$limbs + i], _carry2, &mut carry);
-                    _carry2 = carry;
-                }
-                (self.0).0.copy_from_slice(&r[$limbs..]);
-                self.reduce();
-            }
-        }
-    };
-}
-
-macro_rules! impl_field_into_repr {
-    ($limbs:expr) => {
-        #[inline]
-        #[unroll_for_loops]
-        fn into_repr(&self) -> BigInteger {
-            let mut tmp = self.0;
-            let mut r = tmp.0;
-            // Montgomery Reduction
-            for i in 0..$limbs {
-                let k = r[i].wrapping_mul(P::INV);
-                let mut carry = 0;
-
-                fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
-                for j in 1..$limbs {
-                    r[(j + i) % $limbs] =
-                        fa::mac_with_carry(r[(j + i) % $limbs], k, P::MODULUS.0[j], &mut carry);
-                }
-                r[i % $limbs] = carry;
-            }
-            tmp.0 = r;
-            tmp
-        }
-    };
-}
-
-macro_rules! impl_field_square_in_place {
-    ($limbs: expr) => {
-        #[inline]
-        #[unroll_for_loops]
-        fn square_in_place(&mut self) -> &mut Self {
-            // Checking the modulus at compile time
-            let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0;
-            let mut all_bits_set = P::MODULUS.0[$limbs - 1] == !0 - (1 << 63);
-            for i in 1..$limbs {
-                all_bits_set &= P::MODULUS.0[$limbs - i - 1] == !0u64;
-            }
-            let _no_carry: bool = !(first_bit_set || all_bits_set);
-
-            #[cfg(all(feature = "asm", target_feature="bmi2",
-            target_feature="adx", target_arch = "x86_64"))]
-            if $limbs <= 6 && _no_carry
-            {
-                asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
-                self.reduce();
-                return self;
-            }
-            let mut r = [0u64; $limbs*2];
-
-            let mut carry = 0;
-            for i in 0..$limbs {
-                if i < $limbs-1 {
-                    for j in 0..$limbs {
-                        if j >= (i+1) { r[i+j] = fa::mac_with_carry(r[i+j], (self.0).0[i], (self.0).0[j], &mut carry); }
-                    }
-                    r[$limbs+i] = carry;
-                    carry = 0;
-                }
-            }
-            r[$limbs*2-1] = r[$limbs*2-2] >> 63;
-            for i in 0..$limbs { r[$limbs*2-2-i] = (r[$limbs*2-2-i] << 1) | (r[$limbs*2-3-i] >> 63); }
-            for i in 3..$limbs { r[$limbs+1-i] = (r[$limbs+1-i] << 1) | (r[$limbs-i] >> 63); }
-            r[1] = r[1] << 1;
-
-            for i in 0..$limbs {
-                r[2*i] = fa::mac_with_carry(r[2*i], (self.0).0[i], (self.0).0[i], &mut carry);
-                r[2*i+1] = fa::adc(r[2*i+1], 0, &mut carry);
-            }
-            // Montgomery reduction
-            let mut _carry2 = 0;
-            for i in 0..$limbs {
-                let k = r[i].wrapping_mul(P::INV);
-                let mut carry = 0;
-                fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
-                for j in 1..$limbs {
-                    r[j+i] = fa::mac_with_carry(r[j+i], k, P::MODULUS.0[j], &mut carry);
-                }
-                r[$limbs+i] = fa::adc(r[$limbs+i], _carry2, &mut carry);
-                _carry2 = carry;
-            }
-            (self.0).0.copy_from_slice(&r[$limbs..]);
-            self.reduce();
-            self
-        }
-    };
-}
-
-macro_rules! impl_field_bigint_conv {
-    ($field: ident, $bigint: ident, $params: ident) => {
-        impl<P: $params> Into<$bigint> for $field<P> {
-            fn into(self) -> $bigint {
-                self.into_repr()
-            }
-        }
-
-        impl<P: $params> From<$bigint> for $field<P> {
-            fn from(int: $bigint) -> Self {
-                Self::from_repr(int)
-            }
-        }
-    };
-}
-
-macro_rules! impl_prime_field_standard_sample {
-    ($field: ident, $params: ident) => {
-        impl<P: $params> rand::distributions::Distribution<$field<P>>
-            for rand::distributions::Standard
-        {
-            #[inline]
-            fn sample<R: rand::Rng + ?Sized>(&self, rng: &mut R) -> $field<P> {
-                loop {
-                    let mut tmp = $field(rng.sample(rand::distributions::Standard), PhantomData);
-                    // Mask away the unused bits at the beginning.
-                    tmp.0
-                        .as_mut()
-                        .last_mut()
-                        .map(|val| *val &= core::u64::MAX >> P::REPR_SHAVE_BITS);
-
-                    if tmp.is_valid() {
-                        return tmp;
-                    }
-                }
-            }
-        }
-    };
-}
-
-macro_rules! impl_prime_field_from_int {
-    ($field: ident, u128, $params: ident) => {
-        impl<P: $params> From<u128> for $field<P> {
-            fn from(other: u128) -> Self {
-                let upper = (other >> 64) as u64;
-                let lower = ((other << 64) >> 64) as u64;
-                let mut default_int = P::BigInt::default();
-                default_int.0[0] = lower;
-                default_int.0[1] = upper;
-                Self::from_repr(default_int)
-            }
-        }
-    };
-    ($field: ident, $int: ident, $params: ident) => {
-        impl<P: $params> From<$int> for $field<P> {
-            fn from(other: $int) -> Self {
-                Self::from_repr(P::BigInt::from(u64::from(other)))
-            }
-        }
-    };
-}
-
-macro_rules! sqrt_impl {
-    ($Self:ident, $P:tt, $self:expr) => {{
-        use crate::fields::LegendreSymbol::*;
-        // https://eprint.iacr.org/2012/685.pdf (page 12, algorithm 5)
-        // Actually this is just normal Tonelli-Shanks; since `P::Generator`
-        // is a quadratic non-residue, `P::ROOT_OF_UNITY = P::GENERATOR ^ t`
-        // is also a quadratic non-residue (since `t` is odd).
-        match $self.legendre() {
-            Zero => Some(*$self),
-            QuadraticNonResidue => None,
-            QuadraticResidue => {
-                let mut z = $Self::qnr_to_t();
-                let mut w = $self.pow($P::T_MINUS_ONE_DIV_TWO);
-                let mut x = w * $self;
-                let mut b = x * &w;
-
-                let mut v = $P::TWO_ADICITY as usize;
-                // t = self^t
-                #[cfg(debug_assertions)]
-                {
-                    let mut check = b;
-                    for _ in 0..(v - 1) {
-                        check.square_in_place();
-                    }
-                    if !check.is_one() {
-                        panic!("Input is not a square root, but it passed the QR test")
-                    }
-                }
-
-                while !b.is_one() {
-                    let mut k = 0usize;
-
-                    let mut b2k = b;
-                    while !b2k.is_one() {
-                        // invariant: b2k = b^(2^k) after entering this loop
-                        b2k.square_in_place();
-                        k += 1;
-                    }
-
-                    let j = v - k - 1;
-                    w = z;
-                    for _ in 0..j {
-                        w.square_in_place();
-                    }
-
-                    z = w.square();
-                    b *= &z;
-                    x *= &w;
-                    v = k;
-                }
-
-                Some(x)
-            }
-        }
-    }};
-}
-
-// Implements AddAssign on Self by deferring to an implementation on &Self
-#[macro_export]
-macro_rules! impl_additive_ops_from_ref {
-    ($type: ident, $params: ident) => {
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::ops::Add<Self> for $type<P> {
-            type Output = Self;
-
-            #[inline]
-            fn add(self, other: Self) -> Self {
-                let mut result = self;
-                result.add_assign(&other);
-                result
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::ops::Add<&'a mut Self> for $type<P> {
-            type Output = Self;
-
-            #[inline]
-            fn add(self, other: &'a mut Self) -> Self {
-                let mut result = self;
-                result.add_assign(&*other);
-                result
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::ops::Sub<Self> for $type<P> {
-            type Output = Self;
-
-            #[inline]
-            fn sub(self, other: Self) -> Self {
-                let mut result = self;
-                result.sub_assign(&other);
-                result
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::ops::Sub<&'a mut Self> for $type<P> {
-            type Output = Self;
-
-            #[inline]
-            fn sub(self, other: &'a mut Self) -> Self {
-                let mut result = self;
-                result.sub_assign(&*other);
-                result
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::iter::Sum<Self> for $type<P> {
-            fn sum<I: Iterator<Item = Self>>(iter: I) -> Self {
-                iter.fold(Self::zero(), core::ops::Add::add)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::iter::Sum<&'a Self> for $type<P> {
-            fn sum<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
-                iter.fold(Self::zero(), core::ops::Add::add)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::ops::AddAssign<Self> for $type<P> {
-            fn add_assign(&mut self, other: Self) {
-                self.add_assign(&other)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::ops::SubAssign<Self> for $type<P> {
-            fn sub_assign(&mut self, other: Self) {
-                self.sub_assign(&other)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::ops::AddAssign<&'a mut Self> for $type<P> {
-            fn add_assign(&mut self, other: &'a mut Self) {
-                self.add_assign(&*other)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::ops::SubAssign<&'a mut Self> for $type<P> {
-            fn sub_assign(&mut self, other: &'a mut Self) {
-                self.sub_assign(&*other)
-            }
-        }
-    };
-}
-
-// Implements AddAssign on Self by deferring to an implementation on &Self
-#[macro_export]
-macro_rules! impl_multiplicative_ops_from_ref {
-    ($type: ident, $params: ident) => {
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::ops::Mul<Self> for $type<P> {
-            type Output = Self;
-
-            #[inline]
-            fn mul(self, other: Self) -> Self {
-                let mut result = self;
-                result.mul_assign(&other);
-                result
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::ops::Div<Self> for $type<P> {
-            type Output = Self;
-
-            #[inline]
-            fn div(self, other: Self) -> Self {
-                let mut result = self;
-                result.div_assign(&other);
-                result
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::ops::Mul<&'a mut Self> for $type<P> {
-            type Output = Self;
-
-            #[inline]
-            fn mul(self, other: &'a mut Self) -> Self {
-                let mut result = self;
-                result.mul_assign(&*other);
-                result
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::ops::Div<&'a mut Self> for $type<P> {
-            type Output = Self;
-
-            #[inline]
-            fn div(self, other: &'a mut Self) -> Self {
-                let mut result = self;
-                result.div_assign(&*other);
-                result
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::iter::Product<Self> for $type<P> {
-            fn product<I: Iterator<Item = Self>>(iter: I) -> Self {
-                iter.fold(Self::one(), core::ops::Mul::mul)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::iter::Product<&'a Self> for $type<P> {
-            fn product<I: Iterator<Item = &'a Self>>(iter: I) -> Self {
-                iter.fold(Self::one(), Mul::mul)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::ops::MulAssign<Self> for $type<P> {
-            fn mul_assign(&mut self, other: Self) {
-                self.mul_assign(&other)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::ops::DivAssign<&'a mut Self> for $type<P> {
-            fn div_assign(&mut self, other: &'a mut Self) {
-                self.div_assign(&*other)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<'a, P: $params> core::ops::MulAssign<&'a mut Self> for $type<P> {
-            fn mul_assign(&mut self, other: &'a mut Self) {
-                self.mul_assign(&*other)
-            }
-        }
-
-        #[allow(unused_qualifications)]
-        impl<P: $params> core::ops::DivAssign<Self> for $type<P> {
-            fn div_assign(&mut self, other: Self) {
-                self.div_assign(&other)
-            }
-        }
-    };
-}
diff --git a/algebra-core/src/fields/mod.rs b/algebra-core/src/fields/mod.rs
index 33255df7e..dff28b00c 100644
--- a/algebra-core/src/fields/mod.rs
+++ b/algebra-core/src/fields/mod.rs
@@ -15,8 +15,11 @@ use num_traits::{One, Zero};
 
 #[macro_use]
 pub mod macros;
-pub mod models;
 
+#[macro_use]
+pub mod arithmetic;
+
+pub mod models;
 pub use self::models::*;
 
 #[macro_export]
diff --git a/algebra-core/src/fields/models/fp_256.rs b/algebra-core/src/fields/models/fp_256.rs
deleted file mode 100644
index ac0208be2..000000000
--- a/algebra-core/src/fields/models/fp_256.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-use core::{
-    cmp::{Ord, Ordering, PartialOrd},
-    fmt::{Display, Formatter, Result as FmtResult},
-    marker::PhantomData,
-    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign},
-    str::FromStr,
-};
-use num_traits::{One, Zero};
-use unroll::unroll_for_loops;
-
-use crate::{
-    biginteger::{arithmetic as fa, BigInteger as _BigInteger, BigInteger256 as BigInteger},
-    bytes::{FromBytes, ToBytes},
-    fields::{Field, FpParameters, LegendreSymbol, PrimeField, SquareRootField},
-    io::{Read, Result as IoResult, Write},
-};
-
-impl_Fp!(Fp256, Fp256Parameters, 4);
diff --git a/algebra-core/src/fields/models/fp_320.rs b/algebra-core/src/fields/models/fp_320.rs
deleted file mode 100644
index 8936336f6..000000000
--- a/algebra-core/src/fields/models/fp_320.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-use core::{
-    cmp::{Ord, Ordering, PartialOrd},
-    fmt::{Display, Formatter, Result as FmtResult},
-    marker::PhantomData,
-    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign},
-    str::FromStr,
-};
-use num_traits::{One, Zero};
-use unroll::unroll_for_loops;
-
-use crate::{
-    biginteger::{arithmetic as fa, BigInteger as _BigInteger, BigInteger320 as BigInteger},
-    bytes::{FromBytes, ToBytes},
-    fields::{Field, FpParameters, LegendreSymbol, PrimeField, SquareRootField},
-    io::{Read, Result as IoResult, Write},
-};
-
-impl_Fp!(Fp320, Fp320Parameters, 5);
diff --git a/algebra-core/src/fields/models/fp_384.rs b/algebra-core/src/fields/models/fp_384.rs
deleted file mode 100644
index bf0271b9f..000000000
--- a/algebra-core/src/fields/models/fp_384.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-use core::{
-    cmp::{Ord, Ordering, PartialOrd},
-    fmt::{Display, Formatter, Result as FmtResult},
-    marker::PhantomData,
-    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign},
-    str::FromStr,
-};
-use num_traits::{One, Zero};
-use unroll::unroll_for_loops;
-
-use crate::{
-    biginteger::{arithmetic as fa, BigInteger as _BigInteger, BigInteger384 as BigInteger},
-    bytes::{FromBytes, ToBytes},
-    fields::{Field, FpParameters, LegendreSymbol, PrimeField, SquareRootField},
-    io::{Read, Result as IoResult, Write},
-};
-
-impl_Fp!(Fp384, Fp384Parameters, 6);
diff --git a/algebra-core/src/fields/models/fp_768.rs b/algebra-core/src/fields/models/fp_768.rs
deleted file mode 100644
index ab84e0aba..000000000
--- a/algebra-core/src/fields/models/fp_768.rs
+++ /dev/null
@@ -1,18 +0,0 @@
-use core::{
-    cmp::{Ord, Ordering, PartialOrd},
-    fmt::{Display, Formatter, Result as FmtResult},
-    marker::PhantomData,
-    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign},
-    str::FromStr,
-};
-use num_traits::{One, Zero};
-use unroll::unroll_for_loops;
-
-use crate::{
-    biginteger::{arithmetic as fa, BigInteger as _BigInteger, BigInteger768 as BigInteger},
-    bytes::{FromBytes, ToBytes},
-    fields::{Field, FpParameters, LegendreSymbol, PrimeField, SquareRootField},
-    io::{Read, Result as IoResult, Write},
-};
-
-impl_Fp!(Fp768, Fp768Parameters, 12);
diff --git a/algebra-core/src/fields/models/fp_832.rs b/algebra-core/src/fields/models/fp_832.rs
deleted file mode 100644
index 77c7628be..000000000
--- a/algebra-core/src/fields/models/fp_832.rs
+++ /dev/null
@@ -1,17 +0,0 @@
-use crate::{
-    biginteger::{arithmetic as fa, BigInteger as _BigInteger, BigInteger832 as BigInteger},
-    bytes::{FromBytes, ToBytes},
-    fields::{Field, FpParameters, LegendreSymbol, PrimeField, SquareRootField},
-    io::{Read, Result as IoResult, Write},
-};
-use core::{
-    cmp::{Ord, Ordering, PartialOrd},
-    fmt::{Display, Formatter, Result as FmtResult},
-    marker::PhantomData,
-    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign},
-    str::FromStr,
-};
-use num_traits::{One, Zero};
-use unroll::unroll_for_loops;
-
-impl_Fp!(Fp832, Fp832Parameters, 13);
diff --git a/algebra-core/src/fields/models/mod.rs b/algebra-core/src/fields/models/mod.rs
index 2c591b00a..3a97225d3 100644
--- a/algebra-core/src/fields/models/mod.rs
+++ b/algebra-core/src/fields/models/mod.rs
@@ -1,17 +1,32 @@
-pub mod fp_256;
-pub use self::fp_256::*;
-
-pub mod fp_320;
-pub use self::fp_320::*;
-
-pub mod fp_384;
-pub use self::fp_384::*;
-
-pub mod fp_768;
-pub use self::fp_768::*;
-
-pub mod fp_832;
-pub use self::fp_832::*;
+use core::{
+    cmp::{Ord, Ordering, PartialOrd},
+    fmt::{Display, Formatter, Result as FmtResult},
+    marker::PhantomData,
+    ops::{Add, AddAssign, Div, DivAssign, Mul, MulAssign, Neg, Sub, SubAssign},
+    str::FromStr,
+};
+use num_traits::{One, Zero};
+use unroll::unroll_for_loops;
+
+use crate::{
+    biginteger::{arithmetic as fa, BigInteger as _BigInteger,
+        BigInteger256, BigInteger320, BigInteger384, BigInteger768, BigInteger832},
+    bytes::{FromBytes, ToBytes},
+    fields::{Field, FpParameters, LegendreSymbol, PrimeField, SquareRootField},
+    io::{Read, Result as IoResult, Write},
+};
+
+#[cfg(all(feature = "asm", target_arch = "x86_64", target_feature="bmi2", target_feature="adx"))]
+use std::mem::MaybeUninit;
+
+#[cfg(all(feature = "asm", target_arch = "x86_64", target_feature="bmi2", target_feature="adx"))]
+include!(concat!(env!("OUT_DIR"), "/field_assembly.rs"));
+
+impl_Fp!(Fp256, Fp256Parameters, BigInteger256, BigInteger256, 4);
+impl_Fp!(Fp320, Fp320Parameters, BigInteger320, BigInteger320, 5);
+impl_Fp!(Fp384, Fp384Parameters, BigInteger384, BigInteger384, 6);
+impl_Fp!(Fp768, Fp768Parameters, BigInteger768, BigInteger768, 12);
+impl_Fp!(Fp832, Fp832Parameters, BigInteger832, BigInteger832, 13);
 
 pub mod fp2;
 pub use self::fp2::*;

From f3c8ee967d240db74aff58859f2c91acef625f0c Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 22:50:09 +0800
Subject: [PATCH 19/43] added benchmarking support for all MNT curves

---
 algebra-benches/Cargo.toml          |  7 +++++++
 algebra-benches/src/lib.rs          | 11 +++++++++++
 algebra-benches/src/macros/utils.rs |  3 ++-
 algebra-benches/src/mnt4_298.rs     | 16 ++++++++++++++++
 algebra-benches/src/mnt4_753.rs     | 16 ++++++++++++++++
 algebra-benches/src/mnt6_298.rs     | 16 ++++++++++++++++
 algebra-benches/src/mnt6_753.rs     | 16 ++++++++++++++++
 7 files changed, 84 insertions(+), 1 deletion(-)
 create mode 100644 algebra-benches/src/mnt4_298.rs
 create mode 100644 algebra-benches/src/mnt4_753.rs
 create mode 100644 algebra-benches/src/mnt6_298.rs
 create mode 100644 algebra-benches/src/mnt6_753.rs

diff --git a/algebra-benches/Cargo.toml b/algebra-benches/Cargo.toml
index 49ff81dfb..df3de8f18 100644
--- a/algebra-benches/Cargo.toml
+++ b/algebra-benches/Cargo.toml
@@ -32,3 +32,10 @@ paste = "0.1"
 [features]
 asm = [ "algebra/asm"]
 n_fold = []
+mnt4_298 = []
+mnt6_298 = []
+mnt4_753 = []
+mnt6_753 = []
+bls12_381 = []
+bls12_377 = []
+sw6 = []
diff --git a/algebra-benches/src/lib.rs b/algebra-benches/src/lib.rs
index 90579f2f7..8a0c246ad 100644
--- a/algebra-benches/src/lib.rs
+++ b/algebra-benches/src/lib.rs
@@ -5,6 +5,17 @@ extern crate test;
 #[macro_use]
 pub mod macros;
 
+#[cfg(feature = "bls12_377")]
 mod bls12_377;
+#[cfg(feature = "bls12_381")]
 mod bls12_381;
+#[cfg(feature = "sw6")]
 mod sw6;
+#[cfg(feature = "mnt4_298")]
+mod mnt4_298;
+#[cfg(feature = "mnt6_298")]
+mod mnt6_298;
+#[cfg(feature = "mnt4_753")]
+mod mnt4_753;
+#[cfg(feature = "mnt6_753")]
+mod mnt6_753;
diff --git a/algebra-benches/src/macros/utils.rs b/algebra-benches/src/macros/utils.rs
index e86e48c23..d3b227a57 100644
--- a/algebra-benches/src/macros/utils.rs
+++ b/algebra-benches/src/macros/utils.rs
@@ -18,7 +18,7 @@ macro_rules! n_fold {
     };
 }
 
-
+#[allow(unused_macros)]
 macro_rules! prepared_v {
     ($v:ident, $rng:ident) => {
         let $v: Vec<(G1Prepared<Parameters>, G2Prepared<Parameters>)> = (0..SAMPLES)
@@ -32,6 +32,7 @@ macro_rules! prepared_v {
     }
 }
 
+#[allow(unused_macros)]
 macro_rules! affine_v {
     ($v:ident, $rng:ident) => {
         let $v: Vec<(G1Affine, G2Affine)> = (0..SAMPLES)
diff --git a/algebra-benches/src/mnt4_298.rs b/algebra-benches/src/mnt4_298.rs
new file mode 100644
index 000000000..23b01fdcd
--- /dev/null
+++ b/algebra-benches/src/mnt4_298.rs
@@ -0,0 +1,16 @@
+use rand::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::ops::{AddAssign, MulAssign, SubAssign};
+
+use algebra::{mnt4_298::{Fq4, fq2::Fq2, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
+    MNT4_298, G2Affine, G2Projective as G2, Parameters},
+    PairingEngine,
+    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
+    BigInteger, biginteger::{BigInteger320 as FqRepr},
+    mnt4::{G1Prepared, G2Prepared}};
+
+ec_bench!();
+f_bench!(1, Fq2, Fq2, fq2);
+f_bench!(2, Fq4, Fq4, fq4);
+f_bench!(Fq, Fq, FqRepr, FqRepr, fq);
+pairing_bench!(MNT4_298, Fq4, prepared_v);
diff --git a/algebra-benches/src/mnt4_753.rs b/algebra-benches/src/mnt4_753.rs
new file mode 100644
index 000000000..7eb090cd5
--- /dev/null
+++ b/algebra-benches/src/mnt4_753.rs
@@ -0,0 +1,16 @@
+use rand::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::ops::{AddAssign, MulAssign, SubAssign};
+
+use algebra::{mnt4_753::{Fq4, fq2::Fq2, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
+    MNT4_753, G2Affine, G2Projective as G2, Parameters},
+    PairingEngine,
+    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
+    BigInteger, biginteger::{BigInteger768 as FqRepr},
+    mnt4::{G1Prepared, G2Prepared}};
+
+ec_bench!();
+f_bench!(1, Fq2, Fq2, fq2);
+f_bench!(2, Fq4, Fq4, fq4);
+f_bench!(Fq, Fq, FqRepr, FqRepr, fq);
+pairing_bench!(MNT4_753, Fq4, prepared_v);
diff --git a/algebra-benches/src/mnt6_298.rs b/algebra-benches/src/mnt6_298.rs
new file mode 100644
index 000000000..59d7ffca3
--- /dev/null
+++ b/algebra-benches/src/mnt6_298.rs
@@ -0,0 +1,16 @@
+use rand::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::ops::{AddAssign, MulAssign, SubAssign};
+
+use algebra::{mnt6_298::{Fq6, fq3::Fq3, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
+    MNT6_298, G2Affine, G2Projective as G2, Parameters},
+    PairingEngine,
+    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
+    BigInteger, biginteger::{BigInteger320 as FqRepr},
+    mnt4::{G1Prepared, G2Prepared}};
+
+ec_bench!();
+f_bench!(1, Fq3, Fq3, fq3);
+f_bench!(2, Fq6, Fq6, fq6);
+f_bench!(Fq, Fq, FqRepr, FqRepr, fq);
+pairing_bench!(MNT6_298, Fq6, prepared_v);
diff --git a/algebra-benches/src/mnt6_753.rs b/algebra-benches/src/mnt6_753.rs
new file mode 100644
index 000000000..288ca8be4
--- /dev/null
+++ b/algebra-benches/src/mnt6_753.rs
@@ -0,0 +1,16 @@
+use rand::SeedableRng;
+use rand_xorshift::XorShiftRng;
+use std::ops::{AddAssign, MulAssign, SubAssign};
+
+use algebra::{mnt6_753::{Fq6, fq3::Fq3, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
+    MNT6_753, G2Affine, G2Projective as G2, Parameters},
+    PairingEngine,
+    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
+    BigInteger, biginteger::{BigInteger768 as FqRepr},
+    mnt6::{G1Prepared, G2Prepared}};
+
+ec_bench!();
+f_bench!(1, Fq3, Fq3, fq3);
+f_bench!(2, Fq6, Fq6, fq6);
+f_bench!(Fq, Fq, FqRepr, FqRepr, fq);
+pairing_bench!(MNT6_753, Fq6, prepared_v);

From 51631776194e3422d68854f7499fc75e2daad901 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 22:55:42 +0800
Subject: [PATCH 20/43] minor error

---
 algebra-benches/src/mnt6_298.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/algebra-benches/src/mnt6_298.rs b/algebra-benches/src/mnt6_298.rs
index 59d7ffca3..87e9ae1a8 100644
--- a/algebra-benches/src/mnt6_298.rs
+++ b/algebra-benches/src/mnt6_298.rs
@@ -7,7 +7,7 @@ use algebra::{mnt6_298::{Fq6, fq3::Fq3, fq::Fq, fr::Fr, G1Affine, G1Projective a
     PairingEngine,
     Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
     BigInteger, biginteger::{BigInteger320 as FqRepr},
-    mnt4::{G1Prepared, G2Prepared}};
+    mnt6::{G1Prepared, G2Prepared}};
 
 ec_bench!();
 f_bench!(1, Fq3, Fq3, fq3);

From 1366581708396487a6520ff7b6313495ab8898e6 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 23:00:50 +0800
Subject: [PATCH 21/43] further cleanup

---
 algebra-benches/src/{ => curves}/bls12_377.rs |  0
 algebra-benches/src/{ => curves}/bls12_381.rs |  0
 algebra-benches/src/{ => curves}/mnt4_298.rs  |  0
 algebra-benches/src/{ => curves}/mnt4_753.rs  |  0
 algebra-benches/src/{ => curves}/mnt6_298.rs  |  0
 algebra-benches/src/{ => curves}/mnt6_753.rs  |  0
 algebra-benches/src/curves/mod.rs             | 14 ++++++++++++++
 algebra-benches/src/{ => curves}/sw6.rs       |  0
 algebra-benches/src/lib.rs                    | 15 +--------------
 9 files changed, 15 insertions(+), 14 deletions(-)
 rename algebra-benches/src/{ => curves}/bls12_377.rs (100%)
 rename algebra-benches/src/{ => curves}/bls12_381.rs (100%)
 rename algebra-benches/src/{ => curves}/mnt4_298.rs (100%)
 rename algebra-benches/src/{ => curves}/mnt4_753.rs (100%)
 rename algebra-benches/src/{ => curves}/mnt6_298.rs (100%)
 rename algebra-benches/src/{ => curves}/mnt6_753.rs (100%)
 create mode 100644 algebra-benches/src/curves/mod.rs
 rename algebra-benches/src/{ => curves}/sw6.rs (100%)

diff --git a/algebra-benches/src/bls12_377.rs b/algebra-benches/src/curves/bls12_377.rs
similarity index 100%
rename from algebra-benches/src/bls12_377.rs
rename to algebra-benches/src/curves/bls12_377.rs
diff --git a/algebra-benches/src/bls12_381.rs b/algebra-benches/src/curves/bls12_381.rs
similarity index 100%
rename from algebra-benches/src/bls12_381.rs
rename to algebra-benches/src/curves/bls12_381.rs
diff --git a/algebra-benches/src/mnt4_298.rs b/algebra-benches/src/curves/mnt4_298.rs
similarity index 100%
rename from algebra-benches/src/mnt4_298.rs
rename to algebra-benches/src/curves/mnt4_298.rs
diff --git a/algebra-benches/src/mnt4_753.rs b/algebra-benches/src/curves/mnt4_753.rs
similarity index 100%
rename from algebra-benches/src/mnt4_753.rs
rename to algebra-benches/src/curves/mnt4_753.rs
diff --git a/algebra-benches/src/mnt6_298.rs b/algebra-benches/src/curves/mnt6_298.rs
similarity index 100%
rename from algebra-benches/src/mnt6_298.rs
rename to algebra-benches/src/curves/mnt6_298.rs
diff --git a/algebra-benches/src/mnt6_753.rs b/algebra-benches/src/curves/mnt6_753.rs
similarity index 100%
rename from algebra-benches/src/mnt6_753.rs
rename to algebra-benches/src/curves/mnt6_753.rs
diff --git a/algebra-benches/src/curves/mod.rs b/algebra-benches/src/curves/mod.rs
new file mode 100644
index 000000000..1401cd6cc
--- /dev/null
+++ b/algebra-benches/src/curves/mod.rs
@@ -0,0 +1,14 @@
+#[cfg(feature = "bls12_377")]
+mod bls12_377;
+#[cfg(feature = "bls12_381")]
+mod bls12_381;
+#[cfg(feature = "sw6")]
+mod sw6;
+#[cfg(feature = "mnt4_298")]
+mod mnt4_298;
+#[cfg(feature = "mnt6_298")]
+mod mnt6_298;
+#[cfg(feature = "mnt4_753")]
+mod mnt4_753;
+#[cfg(feature = "mnt6_753")]
+mod mnt6_753;
diff --git a/algebra-benches/src/sw6.rs b/algebra-benches/src/curves/sw6.rs
similarity index 100%
rename from algebra-benches/src/sw6.rs
rename to algebra-benches/src/curves/sw6.rs
diff --git a/algebra-benches/src/lib.rs b/algebra-benches/src/lib.rs
index 8a0c246ad..cd3802423 100644
--- a/algebra-benches/src/lib.rs
+++ b/algebra-benches/src/lib.rs
@@ -5,17 +5,4 @@ extern crate test;
 #[macro_use]
 pub mod macros;
 
-#[cfg(feature = "bls12_377")]
-mod bls12_377;
-#[cfg(feature = "bls12_381")]
-mod bls12_381;
-#[cfg(feature = "sw6")]
-mod sw6;
-#[cfg(feature = "mnt4_298")]
-mod mnt4_298;
-#[cfg(feature = "mnt6_298")]
-mod mnt6_298;
-#[cfg(feature = "mnt4_753")]
-mod mnt4_753;
-#[cfg(feature = "mnt6_753")]
-mod mnt6_753;
+mod curves;

From 4901bc29d2b9b5e38e0e68a129fa08e2abfc6426 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 23:10:46 +0800
Subject: [PATCH 22/43] removed unneccesarily clumped features

---
 algebra/Cargo.toml | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/algebra/Cargo.toml b/algebra/Cargo.toml
index a4f48e0f7..59531412c 100644
--- a/algebra/Cargo.toml
+++ b/algebra/Cargo.toml
@@ -48,9 +48,3 @@ std = [ "algebra-core/std" ]
 parallel = [ "std", "algebra-core/parallel" ]
 derive = [ "algebra-core/derive" ]
 asm = [ "algebra-core/asm" ]
-
-full_asm = [ "algebra-core/asm", "bls12_377", "bls12_381", "sw6", "mnt4_298", "mnt4_753", "mnt6_298", "mnt6_753", "edwards_bls12", "edwards_sw6", "jubjub" ]
-small_asm = ["algebra-core/asm", "mnt4_298", "mnt6_298" ]
-mid_asm = [ "algebra-core/asm", "bls12_377", "bls12_381", "edwards_bls12"]
-big_asm = [ "algebra-core/asm", "sw6", "mnt4_753", "mnt6_753", "edwards_sw6" ]
-mix_asm = [ "algebra-core/asm", "sw6", "mnt4_753", "bls12_381", "mnt6_298" ]

From 2a3c8877daec6669e6fdc1f89c0afffcc8316418 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 23:33:40 +0800
Subject: [PATCH 23/43] update readme

---
 README.md | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/README.md b/README.md
index 644573059..77d37ca10 100644
--- a/README.md
+++ b/README.md
@@ -86,6 +86,12 @@ To run with multiple features, make sure to double quote the features. e.g.
 RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test --features "asm bls12_381"
 ```
 
+To bench `algebra-benches` with greater accuracy, especially for functions with execution times on the order of nanoseconds, use the `n_fold` feature
+```bash
+cargo +nightly bench --features "n_fold bls12_381"
+```
+
+
 ## License
 
 ZEXE is licensed under either of the following licenses, at your discretion.

From 6410d836f950b0a470c7231190b9cc288eb2325a Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 23:34:17 +0800
Subject: [PATCH 24/43] update readme

---
 README.md | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/README.md b/README.md
index 77d37ca10..c9a729566 100644
--- a/README.md
+++ b/README.md
@@ -81,12 +81,7 @@ Compiling with `adcxq`, `adoxq` and `mulxq` instructions can lead to a 30-70% sp
 RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test/build/bench --features asm
 ```
 
-To run with multiple features, make sure to double quote the features. e.g.
-```bash
-RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test --features "asm bls12_381"
-```
-
-To bench `algebra-benches` with greater accuracy, especially for functions with execution times on the order of nanoseconds, use the `n_fold` feature
+To bench `algebra-benches` with greater accuracy, especially for functions with execution times on the order of nanoseconds, use the `n_fold` feature. To run with multiple features, make sure to double quote the features.
 ```bash
 cargo +nightly bench --features "n_fold bls12_381"
 ```

From d840a2bae3ec09f9016ab48addc0a3cb74b7a307 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Wed, 22 Apr 2020 23:35:34 +0800
Subject: [PATCH 25/43] update readme

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index c9a729566..5ebbdf2e8 100644
--- a/README.md
+++ b/README.md
@@ -81,7 +81,7 @@ Compiling with `adcxq`, `adoxq` and `mulxq` instructions can lead to a 30-70% sp
 RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test/build/bench --features asm
 ```
 
-To bench `algebra-benches` with greater accuracy, especially for functions with execution times on the order of nanoseconds, use the `n_fold` feature. To run with multiple features, make sure to double quote the features.
+To bench `algebra-benches` with greater accuracy, especially for functions with execution times on the order of nanoseconds, use the `n_fold` feature to run selected functions 1000x per iteration. To run with multiple features, make sure to double quote the features.
 ```bash
 cargo +nightly bench --features "n_fold bls12_381"
 ```

From 7faa72bfb8c05bcb5ac0f33a5b3ab47bae6b2d94 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 01:11:28 +0800
Subject: [PATCH 26/43] --emit=asm seemed to be hurting performance
 significantly... So lets not recommend it for now.

---
 README.md | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 5ebbdf2e8..e46b5c964 100644
--- a/README.md
+++ b/README.md
@@ -78,8 +78,9 @@ cargo +nightly bench
 
 Compiling with `adcxq`, `adoxq` and `mulxq` instructions can lead to a 30-70% speedup. These are available on most `x86_64` platforms (Broadwell onwards for Intel and Ryzen onwards for AMD). Run the following command:
 ```bash
-RUSTFLAGS="--emit=asm -C target-feature=+bmi2,+adx" cargo +nightly test/build/bench --features asm
+RUSTFLAGS="-C target-feature=+bmi2,+adx" cargo +nightly test/build/bench --features asm
 ```
+Tip: If optimising for performance, your mileage may vary with passing `--emit=asm` to `RUSTFLAGS`.
 
 To bench `algebra-benches` with greater accuracy, especially for functions with execution times on the order of nanoseconds, use the `n_fold` feature to run selected functions 1000x per iteration. To run with multiple features, make sure to double quote the features.
 ```bash

From 075e2a4e98085ebfe48c85bc1f64278e1589d634 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 08:12:02 +0800
Subject: [PATCH 27/43] remove n_fold for g2

---
 algebra-benches/src/macros/ec.rs | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/algebra-benches/src/macros/ec.rs b/algebra-benches/src/macros/ec.rs
index a6a78c2e3..b772bfb56 100644
--- a/algebra-benches/src/macros/ec.rs
+++ b/algebra-benches/src/macros/ec.rs
@@ -120,7 +120,7 @@ macro_rules! ec_bench {
             let mut count = 0;
             b.iter(|| {
                 let mut tmp = v[count].0;
-                n_fold!(tmp, v, add_assign, count);
+                tmp.add_assign(&v[count].1);
                 count = (count + 1) % SAMPLES;
                 tmp
             });
@@ -139,7 +139,7 @@ macro_rules! ec_bench {
             let mut count = 0;
             b.iter(|| {
                 let mut tmp = v[count].0;
-                n_fold!(tmp, v, add_assign_mixed, count);
+                tmp.add_assign_mixed(&v[count].1);
                 count = (count + 1) % SAMPLES;
                 tmp
             });
@@ -158,7 +158,7 @@ macro_rules! ec_bench {
             let mut count = 0;
             b.iter(|| {
                 let mut tmp = v[count].0;
-                n_fold!(tmp, double_in_place);
+                tmp.double_in_place();
                 count = (count + 1) % SAMPLES;
                 tmp
             });

From 04c8defff7be102aa10d29f64c673db3f57ea16e Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 08:21:28 +0800
Subject: [PATCH 28/43] canonicalDeserialise

---
 algebra-core/src/fields/models/mod.rs | 1 +
 1 file changed, 1 insertion(+)

diff --git a/algebra-core/src/fields/models/mod.rs b/algebra-core/src/fields/models/mod.rs
index 3a97225d3..53be8e912 100644
--- a/algebra-core/src/fields/models/mod.rs
+++ b/algebra-core/src/fields/models/mod.rs
@@ -14,6 +14,7 @@ use crate::{
     bytes::{FromBytes, ToBytes},
     fields::{Field, FpParameters, LegendreSymbol, PrimeField, SquareRootField},
     io::{Read, Result as IoResult, Write},
+    serialize::CanonicalDeserialize
 };
 
 #[cfg(all(feature = "asm", target_arch = "x86_64", target_feature="bmi2", target_feature="adx"))]

From 2fd1932c23334e3b6d890ea8172f6466e1040843 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 08:32:53 +0800
Subject: [PATCH 29/43] correct omitted macro ident $

---
 algebra-core/src/fields/macros.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index 248bcb1c5..efc54df28 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -280,7 +280,7 @@ macro_rules! impl_Fp {
         impl<P: $FpParameters> FromBytes for $Fp<P> {
             #[inline]
             fn read<R: Read>(reader: R) -> IoResult<Self> {
-                BigInteger::read(reader).and_then( |b|
+                $BigInteger::read(reader).and_then( |b|
                     if b.is_zero() {
                         Ok($Fp::zero())
                     } else {

From 270c7f02a3aa6c4bb974a9cdad5bab4a720335ca Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 08:47:14 +0800
Subject: [PATCH 30/43] addressed some warnings

---
 algebra-benches/src/lib.rs            | 1 +
 algebra-benches/src/macros/utils.rs   | 2 --
 algebra-core/src/fields/arithmetic.rs | 2 +-
 3 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/algebra-benches/src/lib.rs b/algebra-benches/src/lib.rs
index cd3802423..e1f5c27cb 100644
--- a/algebra-benches/src/lib.rs
+++ b/algebra-benches/src/lib.rs
@@ -1,4 +1,5 @@
 #![feature(test)]
+#![allow(unused_macros)]
 
 extern crate test;
 
diff --git a/algebra-benches/src/macros/utils.rs b/algebra-benches/src/macros/utils.rs
index d3b227a57..366b7f98c 100644
--- a/algebra-benches/src/macros/utils.rs
+++ b/algebra-benches/src/macros/utils.rs
@@ -18,7 +18,6 @@ macro_rules! n_fold {
     };
 }
 
-#[allow(unused_macros)]
 macro_rules! prepared_v {
     ($v:ident, $rng:ident) => {
         let $v: Vec<(G1Prepared<Parameters>, G2Prepared<Parameters>)> = (0..SAMPLES)
@@ -32,7 +31,6 @@ macro_rules! prepared_v {
     }
 }
 
-#[allow(unused_macros)]
 macro_rules! affine_v {
     ($v:ident, $rng:ident) => {
         let $v: Vec<(G1Affine, G2Affine)> = (0..SAMPLES)
diff --git a/algebra-core/src/fields/arithmetic.rs b/algebra-core/src/fields/arithmetic.rs
index b7a98bbb6..87e72037f 100644
--- a/algebra-core/src/fields/arithmetic.rs
+++ b/algebra-core/src/fields/arithmetic.rs
@@ -127,7 +127,7 @@ macro_rules! impl_field_square_in_place {
             for i in 0..$limbs {
                 if i < $limbs-1 {
                     for j in 0..$limbs {
-                        if j >= (i+1) { r[i+j] = fa::mac_with_carry(r[i+j], (self.0).0[i], (self.0).0[j], &mut carry); }
+                        if j >= i+1 { r[i+j] = fa::mac_with_carry(r[i+j], (self.0).0[i], (self.0).0[j], &mut carry); }
                     }
                     r[$limbs+i] = carry;
                     carry = 0;

From 9e1dc9c53f6a1dfa3fb654f9120d73719c546be9 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 09:08:07 +0800
Subject: [PATCH 31/43] gate bench, fix #[cfg] over if statement

---
 algebra-benches/Cargo.toml            |  5 +++++
 algebra-benches/build.rs              |  9 +++++++++
 algebra-benches/src/lib.rs            |  5 ++++-
 algebra-core/src/fields/arithmetic.rs | 20 ++++++++++++--------
 4 files changed, 30 insertions(+), 9 deletions(-)
 create mode 100644 algebra-benches/build.rs

diff --git a/algebra-benches/Cargo.toml b/algebra-benches/Cargo.toml
index df3de8f18..3c5a9e928 100644
--- a/algebra-benches/Cargo.toml
+++ b/algebra-benches/Cargo.toml
@@ -19,6 +19,7 @@ include = ["Cargo.toml", "src", "README.md", "LICENSE-APACHE", "LICENSE-MIT"]
 license = "MIT/Apache-2.0"
 edition = "2018"
 publish = false
+build = "build.rs"
 
 ################################# Dependencies ################################
 
@@ -39,3 +40,7 @@ mnt6_753 = []
 bls12_381 = []
 bls12_377 = []
 sw6 = []
+nightly = []
+
+[build-dependencies]
+rustc_version = "0.1.*"
diff --git a/algebra-benches/build.rs b/algebra-benches/build.rs
new file mode 100644
index 000000000..385dcce8b
--- /dev/null
+++ b/algebra-benches/build.rs
@@ -0,0 +1,9 @@
+extern crate rustc_version;
+
+use rustc_version::{version_meta, Channel};
+
+fn main() {
+    if version_meta().channel == Channel::Nightly {
+        println!("cargo:rustc-cfg=feature=\"nightly\"");
+    }
+}
diff --git a/algebra-benches/src/lib.rs b/algebra-benches/src/lib.rs
index e1f5c27cb..15eec42d8 100644
--- a/algebra-benches/src/lib.rs
+++ b/algebra-benches/src/lib.rs
@@ -1,9 +1,12 @@
-#![feature(test)]
+#![cfg_attr(feature = "nightly", feature(test))]
 #![allow(unused_macros)]
 
+
+#[cfg(feature = "nightly")]
 extern crate test;
 
 #[macro_use]
 pub mod macros;
 
+#[cfg(feature = "nightly")]
 mod curves;
diff --git a/algebra-core/src/fields/arithmetic.rs b/algebra-core/src/fields/arithmetic.rs
index 87e72037f..2c41009b5 100644
--- a/algebra-core/src/fields/arithmetic.rs
+++ b/algebra-core/src/fields/arithmetic.rs
@@ -22,11 +22,13 @@ macro_rules! impl_field_mul_assign {
             if _no_carry {
                 #[cfg(all(feature = "asm", target_feature="bmi2",
                 target_feature="adx", target_arch = "x86_64"))]
-                if $limbs <= 6
                 {
-                    asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
-                    self.reduce();
-                    return;
+                    if $limbs <= 6
+                    {
+                        asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
+                        self.reduce();
+                        return;
+                    }
                 }
                 let mut r = [0u64; $limbs];
                 let mut carry1 = 0u64;
@@ -115,11 +117,13 @@ macro_rules! impl_field_square_in_place {
 
             #[cfg(all(feature = "asm", target_feature="bmi2",
             target_feature="adx", target_arch = "x86_64"))]
-            if $limbs <= 6 && _no_carry
             {
-                asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
-                self.reduce();
-                return self;
+                if $limbs <= 6 && _no_carry
+                {
+                    asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
+                    self.reduce();
+                    return self;
+                }
             }
             let mut r = [0u64; $limbs*2];
 

From ee25c1ee1e960965d8255ae12c8a910b17c93cb2 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 11:11:06 +0800
Subject: [PATCH 32/43] fmt

---
 algebra-benches/src/curves/bls12_377.rs    |  15 +--
 algebra-benches/src/curves/bls12_381.rs    |  15 +--
 algebra-benches/src/curves/mnt4_298.rs     |  15 +--
 algebra-benches/src/curves/mnt4_753.rs     |  15 +--
 algebra-benches/src/curves/mnt6_298.rs     |  15 +--
 algebra-benches/src/curves/mnt6_753.rs     |  15 +--
 algebra-benches/src/curves/mod.rs          |   8 +-
 algebra-benches/src/curves/sw6.rs          |  14 +--
 algebra-benches/src/lib.rs                 |   1 -
 algebra-benches/src/macros/ec.rs           |   2 +-
 algebra-benches/src/macros/field.rs        |   9 +-
 algebra-benches/src/macros/pairing.rs      |   2 +-
 algebra-benches/src/macros/utils.rs        |  12 ++-
 algebra-core/build.rs                      |  13 +--
 algebra-core/field-assembly/src/context.rs | 115 +++++++++++++++------
 algebra-core/field-assembly/src/lib.rs     |  87 +++++++++-------
 algebra-core/field-assembly/src/utils.rs   |   4 +-
 algebra-core/mince/src/arithmetic.rs       |   3 +-
 algebra-core/mince/src/lib.rs              |   8 +-
 algebra-core/src/fields/arithmetic.rs      |   1 -
 algebra-core/src/fields/models/mod.rs      |  22 +++-
 algebra-core/src/lib.rs                    |   2 -
 22 files changed, 237 insertions(+), 156 deletions(-)

diff --git a/algebra-benches/src/curves/bls12_377.rs b/algebra-benches/src/curves/bls12_377.rs
index ab8e006d0..25887556e 100644
--- a/algebra-benches/src/curves/bls12_377.rs
+++ b/algebra-benches/src/curves/bls12_377.rs
@@ -2,12 +2,15 @@ use rand::SeedableRng;
 use rand_xorshift::XorShiftRng;
 use std::ops::{AddAssign, MulAssign, SubAssign};
 
-use algebra::{bls12_377::{Fq12, fq2::Fq2, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
-    Bls12_377, G2Affine, G2Projective as G2, Parameters},
-    PairingEngine,
-    BigInteger, Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
-    biginteger::{BigInteger384 as FqRepr, BigInteger256 as FrRepr},
-    bls12::{G1Prepared, G2Prepared},};
+use algebra::{
+    biginteger::{BigInteger256 as FrRepr, BigInteger384 as FqRepr},
+    bls12::{G1Prepared, G2Prepared},
+    bls12_377::{
+        fq::Fq, fq2::Fq2, fr::Fr, Bls12_377, Fq12, G1Affine, G1Projective as G1, G2Affine,
+        G2Projective as G2, Parameters,
+    },
+    BigInteger, Field, PairingEngine, PrimeField, ProjectiveCurve, SquareRootField, UniformRand,
+};
 
 ec_bench!();
 f_bench!(1, Fq2, Fq2, fq2);
diff --git a/algebra-benches/src/curves/bls12_381.rs b/algebra-benches/src/curves/bls12_381.rs
index 431e3678a..eb44ff0b8 100644
--- a/algebra-benches/src/curves/bls12_381.rs
+++ b/algebra-benches/src/curves/bls12_381.rs
@@ -2,12 +2,15 @@ use rand::SeedableRng;
 use rand_xorshift::XorShiftRng;
 use std::ops::{AddAssign, MulAssign, SubAssign};
 
-use algebra::{bls12_381::{Fq12, fq2::Fq2, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
-    Bls12_381, G2Affine, G2Projective as G2, Parameters},
-    PairingEngine,
-    BigInteger, Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
-    biginteger::{BigInteger384 as FqRepr, BigInteger256 as FrRepr},
-    bls12::{G1Prepared, G2Prepared},};
+use algebra::{
+    biginteger::{BigInteger256 as FrRepr, BigInteger384 as FqRepr},
+    bls12::{G1Prepared, G2Prepared},
+    bls12_381::{
+        fq::Fq, fq2::Fq2, fr::Fr, Bls12_381, Fq12, G1Affine, G1Projective as G1, G2Affine,
+        G2Projective as G2, Parameters,
+    },
+    BigInteger, Field, PairingEngine, PrimeField, ProjectiveCurve, SquareRootField, UniformRand,
+};
 
 ec_bench!();
 f_bench!(1, Fq2, Fq2, fq2);
diff --git a/algebra-benches/src/curves/mnt4_298.rs b/algebra-benches/src/curves/mnt4_298.rs
index 23b01fdcd..4cf2b826d 100644
--- a/algebra-benches/src/curves/mnt4_298.rs
+++ b/algebra-benches/src/curves/mnt4_298.rs
@@ -2,12 +2,15 @@ use rand::SeedableRng;
 use rand_xorshift::XorShiftRng;
 use std::ops::{AddAssign, MulAssign, SubAssign};
 
-use algebra::{mnt4_298::{Fq4, fq2::Fq2, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
-    MNT4_298, G2Affine, G2Projective as G2, Parameters},
-    PairingEngine,
-    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
-    BigInteger, biginteger::{BigInteger320 as FqRepr},
-    mnt4::{G1Prepared, G2Prepared}};
+use algebra::{
+    biginteger::BigInteger320 as FqRepr,
+    mnt4::{G1Prepared, G2Prepared},
+    mnt4_298::{
+        fq::Fq, fq2::Fq2, fr::Fr, Fq4, G1Affine, G1Projective as G1, G2Affine, G2Projective as G2,
+        Parameters, MNT4_298,
+    },
+    BigInteger, Field, PairingEngine, PrimeField, ProjectiveCurve, SquareRootField, UniformRand,
+};
 
 ec_bench!();
 f_bench!(1, Fq2, Fq2, fq2);
diff --git a/algebra-benches/src/curves/mnt4_753.rs b/algebra-benches/src/curves/mnt4_753.rs
index 7eb090cd5..7d1378308 100644
--- a/algebra-benches/src/curves/mnt4_753.rs
+++ b/algebra-benches/src/curves/mnt4_753.rs
@@ -2,12 +2,15 @@ use rand::SeedableRng;
 use rand_xorshift::XorShiftRng;
 use std::ops::{AddAssign, MulAssign, SubAssign};
 
-use algebra::{mnt4_753::{Fq4, fq2::Fq2, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
-    MNT4_753, G2Affine, G2Projective as G2, Parameters},
-    PairingEngine,
-    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
-    BigInteger, biginteger::{BigInteger768 as FqRepr},
-    mnt4::{G1Prepared, G2Prepared}};
+use algebra::{
+    biginteger::BigInteger768 as FqRepr,
+    mnt4::{G1Prepared, G2Prepared},
+    mnt4_753::{
+        fq::Fq, fq2::Fq2, fr::Fr, Fq4, G1Affine, G1Projective as G1, G2Affine, G2Projective as G2,
+        Parameters, MNT4_753,
+    },
+    BigInteger, Field, PairingEngine, PrimeField, ProjectiveCurve, SquareRootField, UniformRand,
+};
 
 ec_bench!();
 f_bench!(1, Fq2, Fq2, fq2);
diff --git a/algebra-benches/src/curves/mnt6_298.rs b/algebra-benches/src/curves/mnt6_298.rs
index 87e9ae1a8..b30e65fdb 100644
--- a/algebra-benches/src/curves/mnt6_298.rs
+++ b/algebra-benches/src/curves/mnt6_298.rs
@@ -2,12 +2,15 @@ use rand::SeedableRng;
 use rand_xorshift::XorShiftRng;
 use std::ops::{AddAssign, MulAssign, SubAssign};
 
-use algebra::{mnt6_298::{Fq6, fq3::Fq3, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
-    MNT6_298, G2Affine, G2Projective as G2, Parameters},
-    PairingEngine,
-    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
-    BigInteger, biginteger::{BigInteger320 as FqRepr},
-    mnt6::{G1Prepared, G2Prepared}};
+use algebra::{
+    biginteger::BigInteger320 as FqRepr,
+    mnt6::{G1Prepared, G2Prepared},
+    mnt6_298::{
+        fq::Fq, fq3::Fq3, fr::Fr, Fq6, G1Affine, G1Projective as G1, G2Affine, G2Projective as G2,
+        Parameters, MNT6_298,
+    },
+    BigInteger, Field, PairingEngine, PrimeField, ProjectiveCurve, SquareRootField, UniformRand,
+};
 
 ec_bench!();
 f_bench!(1, Fq3, Fq3, fq3);
diff --git a/algebra-benches/src/curves/mnt6_753.rs b/algebra-benches/src/curves/mnt6_753.rs
index 288ca8be4..197c8a861 100644
--- a/algebra-benches/src/curves/mnt6_753.rs
+++ b/algebra-benches/src/curves/mnt6_753.rs
@@ -2,12 +2,15 @@ use rand::SeedableRng;
 use rand_xorshift::XorShiftRng;
 use std::ops::{AddAssign, MulAssign, SubAssign};
 
-use algebra::{mnt6_753::{Fq6, fq3::Fq3, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
-    MNT6_753, G2Affine, G2Projective as G2, Parameters},
-    PairingEngine,
-    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
-    BigInteger, biginteger::{BigInteger768 as FqRepr},
-    mnt6::{G1Prepared, G2Prepared}};
+use algebra::{
+    biginteger::BigInteger768 as FqRepr,
+    mnt6::{G1Prepared, G2Prepared},
+    mnt6_753::{
+        fq::Fq, fq3::Fq3, fr::Fr, Fq6, G1Affine, G1Projective as G1, G2Affine, G2Projective as G2,
+        Parameters, MNT6_753,
+    },
+    BigInteger, Field, PairingEngine, PrimeField, ProjectiveCurve, SquareRootField, UniformRand,
+};
 
 ec_bench!();
 f_bench!(1, Fq3, Fq3, fq3);
diff --git a/algebra-benches/src/curves/mod.rs b/algebra-benches/src/curves/mod.rs
index 1401cd6cc..2156b14ce 100644
--- a/algebra-benches/src/curves/mod.rs
+++ b/algebra-benches/src/curves/mod.rs
@@ -2,13 +2,13 @@
 mod bls12_377;
 #[cfg(feature = "bls12_381")]
 mod bls12_381;
-#[cfg(feature = "sw6")]
-mod sw6;
 #[cfg(feature = "mnt4_298")]
 mod mnt4_298;
-#[cfg(feature = "mnt6_298")]
-mod mnt6_298;
 #[cfg(feature = "mnt4_753")]
 mod mnt4_753;
+#[cfg(feature = "mnt6_298")]
+mod mnt6_298;
 #[cfg(feature = "mnt6_753")]
 mod mnt6_753;
+#[cfg(feature = "sw6")]
+mod sw6;
diff --git a/algebra-benches/src/curves/sw6.rs b/algebra-benches/src/curves/sw6.rs
index 10b4d3c71..06cc5a9d5 100644
--- a/algebra-benches/src/curves/sw6.rs
+++ b/algebra-benches/src/curves/sw6.rs
@@ -2,12 +2,14 @@ use rand::SeedableRng;
 use rand_xorshift::XorShiftRng;
 use std::ops::{AddAssign, MulAssign, SubAssign};
 
-use algebra::{sw6::{Fq6, fq3::Fq3, fq::Fq, fr::Fr, G1Affine, G1Projective as G1,
-    SW6, G2Affine, G2Projective as G2},
-    PairingEngine,
-    Field, SquareRootField, PrimeField, ProjectiveCurve, UniformRand,
-    BigInteger, biginteger::{BigInteger832 as FqRepr, BigInteger384 as FrRepr}
-    };
+use algebra::{
+    biginteger::{BigInteger384 as FrRepr, BigInteger832 as FqRepr},
+    sw6::{
+        fq::Fq, fq3::Fq3, fr::Fr, Fq6, G1Affine, G1Projective as G1, G2Affine, G2Projective as G2,
+        SW6,
+    },
+    BigInteger, Field, PairingEngine, PrimeField, ProjectiveCurve, SquareRootField, UniformRand,
+};
 
 ec_bench!();
 f_bench!(1, Fq3, Fq3, fq3);
diff --git a/algebra-benches/src/lib.rs b/algebra-benches/src/lib.rs
index 15eec42d8..23188a435 100644
--- a/algebra-benches/src/lib.rs
+++ b/algebra-benches/src/lib.rs
@@ -1,7 +1,6 @@
 #![cfg_attr(feature = "nightly", feature(test))]
 #![allow(unused_macros)]
 
-
 #[cfg(feature = "nightly")]
 extern crate test;
 
diff --git a/algebra-benches/src/macros/ec.rs b/algebra-benches/src/macros/ec.rs
index b772bfb56..d0c041ccf 100644
--- a/algebra-benches/src/macros/ec.rs
+++ b/algebra-benches/src/macros/ec.rs
@@ -163,5 +163,5 @@ macro_rules! ec_bench {
                 tmp
             });
         }
-    }
+    };
 }
diff --git a/algebra-benches/src/macros/field.rs b/algebra-benches/src/macros/field.rs
index 7d484d155..cb2f73fa1 100644
--- a/algebra-benches/src/macros/field.rs
+++ b/algebra-benches/src/macros/field.rs
@@ -126,10 +126,9 @@ macro_rules! field_common {
                 });
             }
         }
-    }
+    };
 }
 
-
 macro_rules! sqrt {
     ($f:ident, $f_type:ty, $field_ident:ident) => {
         paste::item! {
@@ -154,12 +153,12 @@ macro_rules! sqrt {
                 });
             }
         }
-    }
+    };
 }
 
 macro_rules! field_base {
     ($f:ident, $f_type:ty, $f_repr:ident, $f_repr_type:ty, $field_ident:ident) => {
-            paste::item! {
+        paste::item! {
             #[bench]
             fn [<bench_ $field_ident _repr_add_nocarry>](b: &mut ::test::Bencher) {
                 const SAMPLES: usize = 1000;
@@ -314,5 +313,5 @@ macro_rules! field_base {
                 });
             }
         }
-    }
+    };
 }
diff --git a/algebra-benches/src/macros/pairing.rs b/algebra-benches/src/macros/pairing.rs
index 0709cdca2..117391a5f 100644
--- a/algebra-benches/src/macros/pairing.rs
+++ b/algebra-benches/src/macros/pairing.rs
@@ -57,5 +57,5 @@ macro_rules! pairing_bench {
                 tmp
             });
         }
-    }
+    };
 }
diff --git a/algebra-benches/src/macros/utils.rs b/algebra-benches/src/macros/utils.rs
index 366b7f98c..8d9881a47 100644
--- a/algebra-benches/src/macros/utils.rs
+++ b/algebra-benches/src/macros/utils.rs
@@ -5,7 +5,9 @@ macro_rules! n_fold {
         #[cfg(not(feature = "n_fold"))]
         $tmp.$func(&$v[$count].1);
         #[cfg(feature = "n_fold")]
-        for _ in 0..ITERS { $tmp.$func(&$v[$count].1); }
+        for _ in 0..ITERS {
+            $tmp.$func(&$v[$count].1);
+        }
     };
 
     ($tmp:ident, $func:ident) => {
@@ -14,7 +16,9 @@ macro_rules! n_fold {
         #[cfg(not(feature = "n_fold"))]
         $tmp.$func();
         #[cfg(feature = "n_fold")]
-        for _ in 0..ITERS { $tmp.$func(); }
+        for _ in 0..ITERS {
+            $tmp.$func();
+        }
     };
 }
 
@@ -28,7 +32,7 @@ macro_rules! prepared_v {
                 )
             })
             .collect();
-    }
+    };
 }
 
 macro_rules! affine_v {
@@ -41,5 +45,5 @@ macro_rules! affine_v {
                 )
             })
             .collect();
-    }
+    };
 }
diff --git a/algebra-core/build.rs b/algebra-core/build.rs
index 25acd792a..987dcf79a 100644
--- a/algebra-core/build.rs
+++ b/algebra-core/build.rs
@@ -8,24 +8,15 @@ use field_assembly::generate_macro_string;
 #[cfg(feature = "asm")]
 const NUM_LIMBS: usize = 8;
 
-
 fn main() {
     let out_dir = env::var_os("OUT_DIR").unwrap();
     let dest_path = Path::new(&out_dir).join("field_assembly.rs");
 
-
     #[cfg(feature = "asm")]
-    fs::write(
-        &dest_path,
-        generate_macro_string(NUM_LIMBS)
-    ).unwrap();
-
+    fs::write(&dest_path, generate_macro_string(NUM_LIMBS)).unwrap();
 
     #[cfg(not(feature = "asm"))]
-    fs::write(
-        &dest_path,
-        ""
-    ).unwrap();
+    fs::write(&dest_path, "").unwrap();
 
     println!("cargo:rerun-if-changed=build.rs");
 }
diff --git a/algebra-core/field-assembly/src/context.rs b/algebra-core/field-assembly/src/context.rs
index 5bfb09c41..0d60eecb9 100644
--- a/algebra-core/field-assembly/src/context.rs
+++ b/algebra-core/field-assembly/src/context.rs
@@ -1,5 +1,5 @@
-use std::rc::Rc;
 use std::collections::HashMap;
+use std::rc::Rc;
 
 pub const REG_CLOBBER: [&'static str; 8] = ["r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"];
 
@@ -8,7 +8,7 @@ pub struct Context {
     ctx_string: Rc<String>,
     declarations: HashMap<String, Declare>,
     declaration_vec: Vec<Declare>,
-    clobbers: Vec<String>
+    clobbers: Vec<String>,
 }
 
 #[derive(Clone)]
@@ -20,7 +20,7 @@ struct Declare {
 }
 
 impl Context {
-    pub fn new () -> Self {
+    pub fn new() -> Self {
         Context {
             ctx_string: Rc::new(String::new()),
             declarations: HashMap::new(),
@@ -29,86 +29,135 @@ impl Context {
         }
     }
 
-    pub fn get_string (&mut self) -> String {
+    pub fn get_string(&mut self) -> String {
         Rc::make_mut(&mut self.ctx_string).to_string()
     }
 
-    pub fn reset (&mut self) {
+    pub fn reset(&mut self) {
         self.declarations.clear();
         self.declaration_vec.clear();
         self.clobbers.clear();
     }
 
     pub fn get(self, id: &str) -> String {
-        self.declarations.get(&id.to_string()).unwrap().token.clone()
+        self.declarations
+            .get(&id.to_string())
+            .unwrap()
+            .token
+            .clone()
     }
 
     pub fn try_get(self, id: &str, fallback_id: &str) -> String {
         match self.declarations.get(&id.to_string()) {
             Some(dec) => dec.token.clone(),
-            None => self.declarations.get(&fallback_id.to_string()).unwrap().token.clone(),
+            None => self
+                .declarations
+                .get(&fallback_id.to_string())
+                .unwrap()
+                .token
+                .clone(),
         }
     }
 
-    pub fn add_declaration (&mut self, id: &str, ty: &str, var: &str) {
-        self.declarations.insert(id.to_string(), Declare {
-            ty: ty.to_string(),
-            var: var.to_string(),
-            pos: self.declarations.len(),
-            token: format!("${}", self.declarations.len()),
-        });
+    pub fn add_declaration(&mut self, id: &str, ty: &str, var: &str) {
+        self.declarations.insert(
+            id.to_string(),
+            Declare {
+                ty: ty.to_string(),
+                var: var.to_string(),
+                pos: self.declarations.len(),
+                token: format!("${}", self.declarations.len()),
+            },
+        );
         self.declaration_vec.push(Declare {
             ty: ty.to_string(),
             var: var.to_string(),
             pos: self.declaration_vec.len(),
             token: format!("${}", self.declaration_vec.len()),
         });
-
     }
 
-    pub fn add_limb (&mut self, limb: usize) {
-        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
-                {} => {{", limb)));
+    pub fn add_limb(&mut self, limb: usize) {
+        self.ctx_string = Rc::new(format!(
+            "{}{}",
+            Rc::clone(&self.ctx_string),
+            format!(
+                "
+                {} => {{",
+                limb
+            )
+        ));
     }
 
-    pub fn add_buffer (&mut self, extra_reg: usize) {
-        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
-                    let mut spill_buffer = MaybeUninit::<[u64; {}]>::uninit();", extra_reg)));
+    pub fn add_buffer(&mut self, extra_reg: usize) {
+        self.ctx_string = Rc::new(format!(
+            "{}{}",
+            Rc::clone(&self.ctx_string),
+            format!(
+                "
+                    let mut spill_buffer = MaybeUninit::<[u64; {}]>::uninit();",
+                extra_reg
+            )
+        ));
     }
 
-    pub fn add_asm (&mut self, ctx_string: String) {
-        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
+    pub fn add_asm(&mut self, ctx_string: String) {
+        self.ctx_string = Rc::new(format!(
+            "{}{}",
+            Rc::clone(&self.ctx_string),
+            format!(
+                "
                     unsafe {{
                         asm!({}
                             :
-                            :", ctx_string)));
+                            :",
+                ctx_string
+            )
+        ));
     }
 
-    pub fn add_clobber_from_vec (&mut self, clobbers: Vec<&str>) {
+    pub fn add_clobber_from_vec(&mut self, clobbers: Vec<&str>) {
         for clobber in clobbers {
             self.clobbers.push(format!(" \"{}\"", clobber));
         }
     }
 
-    pub fn add_clobber (&mut self, clobber: &str) {
+    pub fn add_clobber(&mut self, clobber: &str) {
         self.clobbers.push(format!(" \"{}\"", clobber));
     }
 
-    pub fn build (&mut self) {
+    pub fn build(&mut self) {
         for i in 0..self.declarations.len() {
             let dec = &self.declaration_vec[i];
-            let last = i == self.declarations.len()-1;
-            self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
-                            \"{}\"({}){}      // {}", dec.ty, dec.var, if last {""} else {","}, dec.pos)));
+            let last = i == self.declarations.len() - 1;
+            self.ctx_string = Rc::new(format!(
+                "{}{}",
+                Rc::clone(&self.ctx_string),
+                format!(
+                    "
+                            \"{}\"({}){}      // {}",
+                    dec.ty,
+                    dec.var,
+                    if last { "" } else { "," },
+                    dec.pos
+                )
+            ));
         }
-        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
+        self.ctx_string = Rc::new(format!(
+            "{}{}",
+            Rc::clone(&self.ctx_string),
+            format!(
+                "
                             : {}
                         );
                     }}
-                }}", self.clobbers.join(","))));
+                }}",
+                self.clobbers.join(",")
+            )
+        ));
     }
 
-    pub fn end (&mut self, num_limbs: usize) {
+    pub fn end(&mut self, num_limbs: usize) {
         self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
             x => panic!(\"asm_mul (no-carry): number of limbs supported is 2 up to {}. You had {{}}.\", x)
         }};
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index 785dfc567..6b23180b6 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -13,32 +13,39 @@ use std::cell::RefCell;
 
 const MAX_REGS: usize = 6;
 
-pub fn generate_macro_string (num_limbs:usize) -> std::string::String {
+pub fn generate_macro_string(num_limbs: usize) -> std::string::String {
     if num_limbs > 3 * MAX_REGS {
-        panic!("Number of limbs must be <= {} and MAX_REGS >= 6", 3*MAX_REGS);
+        panic!(
+            "Number of limbs must be <= {} and MAX_REGS >= 6",
+            3 * MAX_REGS
+        );
     }
     let mut macro_string = String::from(
-    "macro_rules! asm_mul {
+        "macro_rules! asm_mul {
         ($limbs:expr, $a:expr, $b:expr, $modulus:expr, $mod_prime:expr) => {
-            match $limbs {");
+            match $limbs {",
+    );
     macro_string = format!("{}{}", macro_string, generate_matches(num_limbs, true));
 
-    macro_string = format!("{}{}", macro_string,
-    "macro_rules! asm_square {
+    macro_string = format!(
+        "{}{}",
+        macro_string,
+        "macro_rules! asm_square {
         ($limbs:expr, $a:expr, $modulus:expr, $mod_prime:expr) => {
-            match $limbs {");
+            match $limbs {"
+    );
     macro_string = format!("{}{}", macro_string, generate_matches(num_limbs, false));
     macro_string
 }
 
 #[assemble]
-fn generate_asm_mul_string (
+fn generate_asm_mul_string(
     a: &str,
     b: &str,
     modulus: &str,
     zero: &str,
     mod_prime: &str,
-    limbs: usize
+    limbs: usize,
 ) -> String {
     reg!(a0, a1, a, limbs);
     reg!(b0, b1, b, limbs);
@@ -48,52 +55,54 @@ fn generate_asm_mul_string (
     // }
 
     // if limbs <= 8 {
-        xorq(RCX, RCX);
-        for i in 0..limbs {
-            if i == 0 {
-                mul_1!(a1[0], b1, zero, limbs);
-            } else {
-                mul_add_1!(a1, b1, zero, i, limbs);
-            }
-            mul_add_shift_1!(m1, mod_prime, zero, i, limbs);
-        }
-        for i in 0..limbs {
-            movq(R[i], a1[i]);
+    xorq(RCX, RCX);
+    for i in 0..limbs {
+        if i == 0 {
+            mul_1!(a1[0], b1, zero, limbs);
+        } else {
+            mul_add_1!(a1, b1, zero, i, limbs);
         }
+        mul_add_shift_1!(m1, mod_prime, zero, i, limbs);
+    }
+    for i in 0..limbs {
+        movq(R[i], a1[i]);
+    }
 
     // } else {
-        // asm.xorq(RCX, RCX);
-        // for i in 0..8 {
-        //     if i == 0 {
-        //         ar::mul_1_mov(&mut asm, a1[0], &b1, 0);
-        //     } else {
-        //         ar::mul_add_1(&mut asm, &a1, &b1, i);
-        //     }
-        // }
-        // for i in 0..8 {
-        //     ar::mul_add_1(&mut asm, &m1, 0);
-        // }
-        // for i in 0..asm.limbs {
-        //     asm.movq(R[i], a1[i]);
-        // }
+    // asm.xorq(RCX, RCX);
+    // for i in 0..8 {
+    //     if i == 0 {
+    //         ar::mul_1_mov(&mut asm, a1[0], &b1, 0);
+    //     } else {
+    //         ar::mul_add_1(&mut asm, &a1, &b1, i);
+    //     }
+    // }
+    // for i in 0..8 {
+    //     ar::mul_add_1(&mut asm, &m1, 0);
+    // }
+    // for i in 0..asm.limbs {
+    //     asm.movq(R[i], a1[i]);
+    // }
 
     // }
 }
 
-fn generate_matches (num_limbs: usize, is_mul: bool) -> String {
+fn generate_matches(num_limbs: usize, is_mul: bool) -> String {
     let mut ctx = Context::new();
-    for limbs in 2..(num_limbs+1) {
+    for limbs in 2..(num_limbs + 1) {
         ctx.reset();
 
         ctx.add_declaration("a", "r", "&mut $a");
-        if is_mul { ctx.add_declaration("b", "r", "&$b"); }
+        if is_mul {
+            ctx.add_declaration("b", "r", "&$b");
+        }
         ctx.add_declaration("modulus", "r", "&$modulus");
         ctx.add_declaration("0", "i", "0u64");
         ctx.add_declaration("mod_prime", "i", "$mod_prime");
 
         ctx.add_limb(limbs);
         if limbs > MAX_REGS {
-            ctx.add_buffer(2*limbs);
+            ctx.add_buffer(2 * limbs);
             ctx.add_declaration("buf", "r", "&mut spill_buffer");
         }
 
@@ -103,7 +112,7 @@ fn generate_matches (num_limbs: usize, is_mul: bool) -> String {
             &ctx.clone().get("modulus"),
             &ctx.clone().get("0"),
             &ctx.clone().get("mod_prime"),
-            limbs
+            limbs,
         );
 
         ctx.add_asm(asm_string);
diff --git a/algebra-core/field-assembly/src/utils.rs b/algebra-core/field-assembly/src/utils.rs
index 1120c4cd4..7d9ebba01 100644
--- a/algebra-core/field-assembly/src/utils.rs
+++ b/algebra-core/field-assembly/src/utils.rs
@@ -11,10 +11,10 @@ macro_rules! reg {
         let mut $a_0 = Vec::new();
         let mut $a_1 = Vec::new();
         for i in 0..$range {
-            $a_0.push(format!("{}({})", i*8, $a));
+            $a_0.push(format!("{}({})", i * 8, $a));
         }
         for i in 0..$range {
             $a_1.push(&*$a_0[i]);
         }
-    }
+    };
 }
diff --git a/algebra-core/mince/src/arithmetic.rs b/algebra-core/mince/src/arithmetic.rs
index d820b2139..dae7af3c2 100644
--- a/algebra-core/mince/src/arithmetic.rs
+++ b/algebra-core/mince/src/arithmetic.rs
@@ -55,5 +55,6 @@ pub fn define_arithmetic() -> TokenStream {
                 }
             }
         }
-    }).into()
+    })
+    .into()
 }
diff --git a/algebra-core/mince/src/lib.rs b/algebra-core/mince/src/lib.rs
index c070b84c6..8f2ac3ea9 100644
--- a/algebra-core/mince/src/lib.rs
+++ b/algebra-core/mince/src/lib.rs
@@ -1,4 +1,4 @@
-#![recursion_limit="256"]
+#![recursion_limit = "256"]
 
 extern crate proc_macro;
 
@@ -9,14 +9,14 @@ mod arithmetic;
 use arithmetic::*;
 
 use proc_macro::TokenStream;
-use syn;
 use quote::quote;
+use syn;
 
 #[proc_macro_attribute]
-pub fn assemble (_meta: TokenStream, input: TokenStream) -> TokenStream {
+pub fn assemble(_meta: TokenStream, input: TokenStream) -> TokenStream {
     let ast: syn::ItemFn = syn::parse(input).unwrap();
     let sig = ast.sig;
-    let block =  ast.block;
+    let block = ast.block;
     let attrs = ast.attrs;
 
     let arithmetic: syn::Block = syn::parse(define_arithmetic()).unwrap();
diff --git a/algebra-core/src/fields/arithmetic.rs b/algebra-core/src/fields/arithmetic.rs
index 2c41009b5..ca36aacae 100644
--- a/algebra-core/src/fields/arithmetic.rs
+++ b/algebra-core/src/fields/arithmetic.rs
@@ -1,4 +1,3 @@
-
 /// This modular multiplication algorithm uses Montgomery
 /// reduction for efficient implementation. It also additionally
 /// uses the "no-carry optimization" outlined
diff --git a/algebra-core/src/fields/models/mod.rs b/algebra-core/src/fields/models/mod.rs
index 53be8e912..741c5fb66 100644
--- a/algebra-core/src/fields/models/mod.rs
+++ b/algebra-core/src/fields/models/mod.rs
@@ -9,18 +9,30 @@ use num_traits::{One, Zero};
 use unroll::unroll_for_loops;
 
 use crate::{
-    biginteger::{arithmetic as fa, BigInteger as _BigInteger,
-        BigInteger256, BigInteger320, BigInteger384, BigInteger768, BigInteger832},
+    biginteger::{
+        arithmetic as fa, BigInteger as _BigInteger, BigInteger256, BigInteger320, BigInteger384,
+        BigInteger768, BigInteger832,
+    },
     bytes::{FromBytes, ToBytes},
     fields::{Field, FpParameters, LegendreSymbol, PrimeField, SquareRootField},
     io::{Read, Result as IoResult, Write},
-    serialize::CanonicalDeserialize
+    serialize::CanonicalDeserialize,
 };
 
-#[cfg(all(feature = "asm", target_arch = "x86_64", target_feature="bmi2", target_feature="adx"))]
+#[cfg(all(
+    feature = "asm",
+    target_arch = "x86_64",
+    target_feature = "bmi2",
+    target_feature = "adx"
+))]
 use std::mem::MaybeUninit;
 
-#[cfg(all(feature = "asm", target_arch = "x86_64", target_feature="bmi2", target_feature="adx"))]
+#[cfg(all(
+    feature = "asm",
+    target_arch = "x86_64",
+    target_feature = "bmi2",
+    target_feature = "adx"
+))]
 include!(concat!(env!("OUT_DIR"), "/field_assembly.rs"));
 
 impl_Fp!(Fp256, Fp256Parameters, BigInteger256, BigInteger256, 4);
diff --git a/algebra-core/src/lib.rs b/algebra-core/src/lib.rs
index 98976e1de..01ebf6d19 100644
--- a/algebra-core/src/lib.rs
+++ b/algebra-core/src/lib.rs
@@ -5,10 +5,8 @@
 #![deny(unused_extern_crates, renamed_and_removed_lints, unused_allocation)]
 #![deny(unused_comparisons, bare_trait_objects, const_err, unused_must_use)]
 #![deny(unused_mut, unused_unsafe, private_in_public)]
-
 #![cfg_attr(not(feature = "asm"), deny(unsafe_code))]
 #![cfg_attr(not(feature = "asm"), forbid(unsafe_code))]
-
 #![cfg_attr(feature = "asm", feature(asm))]
 
 #[cfg(all(test, not(feature = "std")))]

From 07a84f0b00f5607c63b8018663e014fce553be02 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 13:00:21 +0800
Subject: [PATCH 33/43] cfg_attr to avoid feature(asm) on stable

---
 algebra-core/Cargo.toml | 2 ++
 algebra-core/build.rs   | 7 +++++++
 algebra-core/src/lib.rs | 3 ++-
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/algebra-core/Cargo.toml b/algebra-core/Cargo.toml
index 9010cf8fd..c2324b017 100644
--- a/algebra-core/Cargo.toml
+++ b/algebra-core/Cargo.toml
@@ -32,6 +32,7 @@ unroll = "0.1.4"
 
 [build-dependencies]
 field-assembly = { path = "./field-assembly" }
+rustc_version = "0.1.*"
 
 [dev-dependencies]
 rand_xorshift = "0.2"
@@ -42,3 +43,4 @@ std = []
 parallel = [ "std", "rayon" ]
 derive = [ "algebra-core-derive" ]
 asm = []
+stable = []
diff --git a/algebra-core/build.rs b/algebra-core/build.rs
index 987dcf79a..95dbbd38d 100644
--- a/algebra-core/build.rs
+++ b/algebra-core/build.rs
@@ -2,6 +2,9 @@ use std::env;
 use std::fs;
 use std::path::Path;
 
+extern crate rustc_version;
+use rustc_version::{version_meta, Channel};
+
 #[cfg(feature = "asm")]
 use field_assembly::generate_macro_string;
 
@@ -19,4 +22,8 @@ fn main() {
     fs::write(&dest_path, "").unwrap();
 
     println!("cargo:rerun-if-changed=build.rs");
+
+    if version_meta().channel == Channel::Stable {
+        println!("cargo:rustc-cfg=feature=\"stable\"");
+    }
 }
diff --git a/algebra-core/src/lib.rs b/algebra-core/src/lib.rs
index 01ebf6d19..27550e96e 100644
--- a/algebra-core/src/lib.rs
+++ b/algebra-core/src/lib.rs
@@ -5,9 +5,10 @@
 #![deny(unused_extern_crates, renamed_and_removed_lints, unused_allocation)]
 #![deny(unused_comparisons, bare_trait_objects, const_err, unused_must_use)]
 #![deny(unused_mut, unused_unsafe, private_in_public)]
+
 #![cfg_attr(not(feature = "asm"), deny(unsafe_code))]
 #![cfg_attr(not(feature = "asm"), forbid(unsafe_code))]
-#![cfg_attr(feature = "asm", feature(asm))]
+#![cfg_attr(all(feature = "asm", not(feature = "stable")), feature(asm))]
 
 #[cfg(all(test, not(feature = "std")))]
 #[macro_use]

From 57de4753b6902babb183688f963852f944e073eb Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 13:36:55 +0800
Subject: [PATCH 34/43] fmt + more stable cfg

---
 algebra-benches/Cargo.toml | 1 +
 algebra-benches/build.rs   | 3 +++
 algebra-benches/src/lib.rs | 6 +++---
 algebra-core/src/lib.rs    | 1 -
 4 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/algebra-benches/Cargo.toml b/algebra-benches/Cargo.toml
index 3c5a9e928..abffbcc5a 100644
--- a/algebra-benches/Cargo.toml
+++ b/algebra-benches/Cargo.toml
@@ -41,6 +41,7 @@ bls12_381 = []
 bls12_377 = []
 sw6 = []
 nightly = []
+stable = []
 
 [build-dependencies]
 rustc_version = "0.1.*"
diff --git a/algebra-benches/build.rs b/algebra-benches/build.rs
index 385dcce8b..28099bfb2 100644
--- a/algebra-benches/build.rs
+++ b/algebra-benches/build.rs
@@ -6,4 +6,7 @@ fn main() {
     if version_meta().channel == Channel::Nightly {
         println!("cargo:rustc-cfg=feature=\"nightly\"");
     }
+    if version_meta().channel == Channel::Stable {
+        println!("cargo:rustc-cfg=feature=\"stable\"");
+    }
 }
diff --git a/algebra-benches/src/lib.rs b/algebra-benches/src/lib.rs
index 23188a435..bb5ffcb55 100644
--- a/algebra-benches/src/lib.rs
+++ b/algebra-benches/src/lib.rs
@@ -1,11 +1,11 @@
-#![cfg_attr(feature = "nightly", feature(test))]
+#![cfg_attr(all(feature = "nightly", not(feature = "stable")), feature(test))]
 #![allow(unused_macros)]
 
-#[cfg(feature = "nightly")]
+#[cfg(all(feature = "nightly", not(feature = "stable")))]
 extern crate test;
 
 #[macro_use]
 pub mod macros;
 
-#[cfg(feature = "nightly")]
+#[cfg(all(feature = "nightly", not(feature = "stable")))]
 mod curves;
diff --git a/algebra-core/src/lib.rs b/algebra-core/src/lib.rs
index 27550e96e..330335875 100644
--- a/algebra-core/src/lib.rs
+++ b/algebra-core/src/lib.rs
@@ -5,7 +5,6 @@
 #![deny(unused_extern_crates, renamed_and_removed_lints, unused_allocation)]
 #![deny(unused_comparisons, bare_trait_objects, const_err, unused_must_use)]
 #![deny(unused_mut, unused_unsafe, private_in_public)]
-
 #![cfg_attr(not(feature = "asm"), deny(unsafe_code))]
 #![cfg_attr(not(feature = "asm"), forbid(unsafe_code))]
 #![cfg_attr(all(feature = "asm", not(feature = "stable")), feature(asm))]

From a865b2e165e620eeeb06495a7d88e9fedb529886 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 15:00:28 +0800
Subject: [PATCH 35/43] remove unnecessary clone()s

---
 algebra-core/mince/src/intrinsics.rs | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/algebra-core/mince/src/intrinsics.rs b/algebra-core/mince/src/intrinsics.rs
index a89056e9a..6523f4290 100644
--- a/algebra-core/mince/src/intrinsics.rs
+++ b/algebra-core/mince/src/intrinsics.rs
@@ -10,16 +10,16 @@ pub fn define_intrinsics() -> TokenStream {
 
             let mut end = || {
                 asm_string.replace_with(|x| format!("{}{}", x, "
-                                        \"".to_string()).clone());
+                                        \"".to_string()));
             };
 
             let mut comment = | comment: &str | {
-                asm_string.replace_with(|x| format!("{}{}", x, format!("         // {}", comment)).clone());
+                asm_string.replace_with(|x| format!("{}{}", x, format!("         // {}", comment)));
             };
 
             let mut mulxq = | a: &str, b: &str, c: &str | {
                 asm_string.replace_with(|x| format!("{}{}", x, format!("
-                                        mulxq {}, {}, {}", a, b, c)).clone());
+                                        mulxq {}, {}, {}", a, b, c)));
             };
 
             let mut adcxq = | a: &str, b: &str| {
@@ -29,17 +29,17 @@ pub fn define_intrinsics() -> TokenStream {
 
             let mut adoxq = | a: &str, b: &str | {
                 asm_string.replace_with(|x| format!("{}{}", x, format!("
-                                        adoxq {}, {}", a, b)).clone());
+                                        adoxq {}, {}", a, b)));
             };
 
             let mut movq = | a: &str, b: &str | {
                 asm_string.replace_with(|x| format!("{}{}", x, format!("
-                                        movq {}, {}", a, b)).clone());
+                                        movq {}, {}", a, b)));
             };
 
             let mut xorq = | a: &str, b: &str | {
                 asm_string.replace_with(|x| format!("{}{}", x, format!("
-                                        xorq {}, {}", a, b)).clone());
+                                        xorq {}, {}", a, b)));
             };
         }
     }).into()

From 7f800325d46e77b196690eb868d361439bb53dd6 Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 15:29:08 +0800
Subject: [PATCH 36/43] more readable string manipulation

---
 algebra-core/mince/src/intrinsics.rs | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/algebra-core/mince/src/intrinsics.rs b/algebra-core/mince/src/intrinsics.rs
index 6523f4290..8db8f1997 100644
--- a/algebra-core/mince/src/intrinsics.rs
+++ b/algebra-core/mince/src/intrinsics.rs
@@ -5,41 +5,41 @@ pub fn define_intrinsics() -> TokenStream {
     (quote! {
         {
             let mut begin = || {
-                asm_string.replace_with(|_| "\"".to_string());
+                asm_string.borrow_mut().push_str("\"");
             };
 
             let mut end = || {
-                asm_string.replace_with(|x| format!("{}{}", x, "
-                                        \"".to_string()));
+                asm_string.borrow_mut().push_str("
+                                        \"");
             };
 
             let mut comment = | comment: &str | {
-                asm_string.replace_with(|x| format!("{}{}", x, format!("         // {}", comment)));
+                asm_string.borrow_mut().push_str(&format!("         // {}", comment));
             };
 
             let mut mulxq = | a: &str, b: &str, c: &str | {
-                asm_string.replace_with(|x| format!("{}{}", x, format!("
-                                        mulxq {}, {}, {}", a, b, c)));
+                asm_string.borrow_mut().push_str(&format!("
+                                        mulxq {}, {}, {}", a, b, c));
             };
 
             let mut adcxq = | a: &str, b: &str| {
-                asm_string.replace_with(|x| format!("{}{}", x, format!("
-                                        adcxq {}, {}", a, b)));
+                asm_string.borrow_mut().push_str(&format!("
+                                        adcxq {}, {}", a, b));
             };
 
             let mut adoxq = | a: &str, b: &str | {
-                asm_string.replace_with(|x| format!("{}{}", x, format!("
-                                        adoxq {}, {}", a, b)));
+                asm_string.borrow_mut().push_str(&format!("
+                                        adoxq {}, {}", a, b));
             };
 
             let mut movq = | a: &str, b: &str | {
-                asm_string.replace_with(|x| format!("{}{}", x, format!("
-                                        movq {}, {}", a, b)));
+                asm_string.borrow_mut().push_str(&format!("
+                                        movq {}, {}", a, b));
             };
 
             let mut xorq = | a: &str, b: &str | {
-                asm_string.replace_with(|x| format!("{}{}", x, format!("
-                                        xorq {}, {}", a, b)));
+                asm_string.borrow_mut().push_str(&format!("
+                                        xorq {}, {}", a, b));
             };
         }
     }).into()

From 1f8852c1dd17e7d9095ff6f2a7c141815dc6323b Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 15:45:06 +0800
Subject: [PATCH 37/43] fmt...

---
 algebra-core/mince/src/intrinsics.rs | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/algebra-core/mince/src/intrinsics.rs b/algebra-core/mince/src/intrinsics.rs
index 8db8f1997..fc59af5a2 100644
--- a/algebra-core/mince/src/intrinsics.rs
+++ b/algebra-core/mince/src/intrinsics.rs
@@ -42,5 +42,6 @@ pub fn define_intrinsics() -> TokenStream {
                                         xorq {}, {}", a, b));
             };
         }
-    }).into()
+    })
+    .into()
 }

From e0f5aaffcb90e92b6c39081ba416725aec7cd22f Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Thu, 23 Apr 2020 22:30:13 +0800
Subject: [PATCH 38/43] fixed omitted argument

---
 algebra-core/src/fields/macros.rs | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/algebra-core/src/fields/macros.rs b/algebra-core/src/fields/macros.rs
index 58242fa5a..eaf92cdec 100644
--- a/algebra-core/src/fields/macros.rs
+++ b/algebra-core/src/fields/macros.rs
@@ -206,7 +206,7 @@ macro_rules! impl_Fp {
                 }
             }
 
-            impl_field_into_repr!($limbs);
+            impl_field_into_repr!($limbs, $BigIntegerType);
         }
 
         impl<P: $FpParameters> FftField for $Fp<P> {

From 00b59da1c963e5f500b336cce2c6775a57e8bcea Mon Sep 17 00:00:00 2001
From: jon-chuang <9093549+jon-chuang@users.noreply.github.com>
Date: Fri, 24 Apr 2020 11:56:38 +0800
Subject: [PATCH 39/43] fmt

---
 algebra-core/src/fields/arithmetic.rs | 56 ++++++++++++++++++---------
 1 file changed, 37 insertions(+), 19 deletions(-)

diff --git a/algebra-core/src/fields/arithmetic.rs b/algebra-core/src/fields/arithmetic.rs
index ca36aacae..b3816fd52 100644
--- a/algebra-core/src/fields/arithmetic.rs
+++ b/algebra-core/src/fields/arithmetic.rs
@@ -19,11 +19,14 @@ macro_rules! impl_field_mul_assign {
 
             // No-carry optimisation applied to CIOS
             if _no_carry {
-                #[cfg(all(feature = "asm", target_feature="bmi2",
-                target_feature="adx", target_arch = "x86_64"))]
+                #[cfg(all(
+                    feature = "asm",
+                    target_feature = "bmi2",
+                    target_feature = "adx",
+                    target_arch = "x86_64"
+                ))]
                 {
-                    if $limbs <= 6
-                    {
+                    if $limbs <= 6 {
                         asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
                         self.reduce();
                         return;
@@ -114,36 +117,51 @@ macro_rules! impl_field_square_in_place {
             }
             let _no_carry: bool = !(first_bit_set || all_bits_set);
 
-            #[cfg(all(feature = "asm", target_feature="bmi2",
-            target_feature="adx", target_arch = "x86_64"))]
+            #[cfg(all(
+                feature = "asm",
+                target_feature = "bmi2",
+                target_feature = "adx",
+                target_arch = "x86_64"
+            ))]
             {
-                if $limbs <= 6 && _no_carry
-                {
+                if $limbs <= 6 && _no_carry {
                     asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
                     self.reduce();
                     return self;
                 }
             }
-            let mut r = [0u64; $limbs*2];
+            let mut r = [0u64; $limbs * 2];
 
             let mut carry = 0;
             for i in 0..$limbs {
-                if i < $limbs-1 {
+                if i < $limbs - 1 {
                     for j in 0..$limbs {
-                        if j >= i+1 { r[i+j] = fa::mac_with_carry(r[i+j], (self.0).0[i], (self.0).0[j], &mut carry); }
+                        if j >= i + 1 {
+                            r[i + j] = fa::mac_with_carry(
+                                r[i + j],
+                                (self.0).0[i],
+                                (self.0).0[j],
+                                &mut carry,
+                            );
+                        }
                     }
-                    r[$limbs+i] = carry;
+                    r[$limbs + i] = carry;
                     carry = 0;
                 }
             }
-            r[$limbs*2-1] = r[$limbs*2-2] >> 63;
-            for i in 0..$limbs { r[$limbs*2-2-i] = (r[$limbs*2-2-i] << 1) | (r[$limbs*2-3-i] >> 63); }
-            for i in 3..$limbs { r[$limbs+1-i] = (r[$limbs+1-i] << 1) | (r[$limbs-i] >> 63); }
+            r[$limbs * 2 - 1] = r[$limbs * 2 - 2] >> 63;
+            for i in 0..$limbs {
+                r[$limbs * 2 - 2 - i] =
+                    (r[$limbs * 2 - 2 - i] << 1) | (r[$limbs * 2 - 3 - i] >> 63);
+            }
+            for i in 3..$limbs {
+                r[$limbs + 1 - i] = (r[$limbs + 1 - i] << 1) | (r[$limbs - i] >> 63);
+            }
             r[1] = r[1] << 1;
 
             for i in 0..$limbs {
-                r[2*i] = fa::mac_with_carry(r[2*i], (self.0).0[i], (self.0).0[i], &mut carry);
-                r[2*i+1] = fa::adc(r[2*i+1], 0, &mut carry);
+                r[2 * i] = fa::mac_with_carry(r[2 * i], (self.0).0[i], (self.0).0[i], &mut carry);
+                r[2 * i + 1] = fa::adc(r[2 * i + 1], 0, &mut carry);
             }
             // Montgomery reduction
             let mut _carry2 = 0;
@@ -152,9 +170,9 @@ macro_rules! impl_field_square_in_place {
                 let mut carry = 0;
                 fa::mac_with_carry(r[i], k, P::MODULUS.0[0], &mut carry);
                 for j in 1..$limbs {
-                    r[j+i] = fa::mac_with_carry(r[j+i], k, P::MODULUS.0[j], &mut carry);
+                    r[j + i] = fa::mac_with_carry(r[j + i], k, P::MODULUS.0[j], &mut carry);
                 }
-                r[$limbs+i] = fa::adc(r[$limbs+i], _carry2, &mut carry);
+                r[$limbs + i] = fa::adc(r[$limbs + i], _carry2, &mut carry);
                 _carry2 = carry;
             }
             (self.0).0.copy_from_slice(&r[$limbs..]);

From 86ec1696192203d882dd6ab755fa1e2b269b577a Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Mon, 4 May 2020 05:15:26 -0700
Subject: [PATCH 40/43] Clean up features, and make nightly detection robust

---
 README.md                                  |  3 +-
 algebra-benches/Cargo.toml                 |  2 --
 algebra-benches/build.rs                   |  5 +---
 algebra-benches/src/lib.rs                 |  6 ++--
 algebra-core/Cargo.toml                    |  1 -
 algebra-core/build.rs                      | 17 +++++------
 algebra-core/field-assembly/src/context.rs |  6 ++--
 algebra-core/field-assembly/src/lib.rs     | 34 ++++++++++------------
 algebra-core/mince/src/intrinsics.rs       | 16 +++++-----
 algebra-core/mince/src/lib.rs              |  4 +--
 algebra-core/src/fields/arithmetic.rs      | 17 +++++++----
 algebra-core/src/fields/models/mod.rs      | 17 ++++++++---
 algebra-core/src/lib.rs                    | 21 +++++++++++--
 13 files changed, 85 insertions(+), 64 deletions(-)

diff --git a/README.md b/README.md
index e46b5c964..38f67acc2 100644
--- a/README.md
+++ b/README.md
@@ -31,7 +31,8 @@ Informally, the library provides the ability to create transactions that run arb
 
 This repository contains several Rust crates that implement the different building blocks of ZEXE. The high-level structure of the repository is as follows.
 
-* [`algebra`](algebra): Rust crate that provides finite fields and elliptic curves
+* [`algebra-core`](algebra-core): Rust crate that provides generic arithmetic for finite fields and elliptic curves
+* [`algebra`](algebra): Rust crate that provides concrete instantiations of some finite fields and elliptic curves
 * [`crypto-primitives`](crypto-primitives): Rust crate that implements some useful cryptographic primitives (and constraints for them)
 * [`dpc`](dpc): Rust crate that implements DPC schemes (the main cryptographic primitive in this repository)
 * [`ff-fft`](ff-fft): Rust crate that provides efficient finite field polynomial arithmetic based on finite field FFTs
diff --git a/algebra-benches/Cargo.toml b/algebra-benches/Cargo.toml
index abffbcc5a..248725b39 100644
--- a/algebra-benches/Cargo.toml
+++ b/algebra-benches/Cargo.toml
@@ -40,8 +40,6 @@ mnt6_753 = []
 bls12_381 = []
 bls12_377 = []
 sw6 = []
-nightly = []
-stable = []
 
 [build-dependencies]
 rustc_version = "0.1.*"
diff --git a/algebra-benches/build.rs b/algebra-benches/build.rs
index 28099bfb2..2a906f2b2 100644
--- a/algebra-benches/build.rs
+++ b/algebra-benches/build.rs
@@ -4,9 +4,6 @@ use rustc_version::{version_meta, Channel};
 
 fn main() {
     if version_meta().channel == Channel::Nightly {
-        println!("cargo:rustc-cfg=feature=\"nightly\"");
-    }
-    if version_meta().channel == Channel::Stable {
-        println!("cargo:rustc-cfg=feature=\"stable\"");
+        println!("cargo:rustc-cfg=nightly");
     }
 }
diff --git a/algebra-benches/src/lib.rs b/algebra-benches/src/lib.rs
index bb5ffcb55..1f8a6f2e3 100644
--- a/algebra-benches/src/lib.rs
+++ b/algebra-benches/src/lib.rs
@@ -1,11 +1,11 @@
-#![cfg_attr(all(feature = "nightly", not(feature = "stable")), feature(test))]
+#![cfg_attr(nightly, feature(test))]
 #![allow(unused_macros)]
 
-#[cfg(all(feature = "nightly", not(feature = "stable")))]
+#[cfg(nightly)]
 extern crate test;
 
 #[macro_use]
 pub mod macros;
 
-#[cfg(all(feature = "nightly", not(feature = "stable")))]
+#[cfg(nightly)]
 mod curves;
diff --git a/algebra-core/Cargo.toml b/algebra-core/Cargo.toml
index c2324b017..551b6eca8 100644
--- a/algebra-core/Cargo.toml
+++ b/algebra-core/Cargo.toml
@@ -43,4 +43,3 @@ std = []
 parallel = [ "std", "rayon" ]
 derive = [ "algebra-core-derive" ]
 asm = []
-stable = []
diff --git a/algebra-core/build.rs b/algebra-core/build.rs
index 95dbbd38d..fe488528a 100644
--- a/algebra-core/build.rs
+++ b/algebra-core/build.rs
@@ -5,25 +5,24 @@ use std::path::Path;
 extern crate rustc_version;
 use rustc_version::{version_meta, Channel};
 
-#[cfg(feature = "asm")]
 use field_assembly::generate_macro_string;
 
-#[cfg(feature = "asm")]
 const NUM_LIMBS: usize = 8;
 
 fn main() {
     let out_dir = env::var_os("OUT_DIR").unwrap();
     let dest_path = Path::new(&out_dir).join("field_assembly.rs");
+    let is_nightly = version_meta().channel == Channel::Nightly;
 
-    #[cfg(feature = "asm")]
-    fs::write(&dest_path, generate_macro_string(NUM_LIMBS)).unwrap();
-
-    #[cfg(not(feature = "asm"))]
-    fs::write(&dest_path, "").unwrap();
+    if cfg!(feature = "llvm_asm") && is_nightly {
+        fs::write(&dest_path, generate_macro_string(NUM_LIMBS)).unwrap();
+    } else {
+        fs::write(&dest_path, "").unwrap();
+    }
 
     println!("cargo:rerun-if-changed=build.rs");
 
-    if version_meta().channel == Channel::Stable {
-        println!("cargo:rustc-cfg=feature=\"stable\"");
+    if is_nightly {
+        println!("cargo:rustc-cfg=nightly");
     }
 }
diff --git a/algebra-core/field-assembly/src/context.rs b/algebra-core/field-assembly/src/context.rs
index 0d60eecb9..848d56783 100644
--- a/algebra-core/field-assembly/src/context.rs
+++ b/algebra-core/field-assembly/src/context.rs
@@ -101,14 +101,14 @@ impl Context {
         ));
     }
 
-    pub fn add_asm(&mut self, ctx_string: String) {
+    pub fn add_llvm_asm(&mut self, ctx_string: String) {
         self.ctx_string = Rc::new(format!(
             "{}{}",
             Rc::clone(&self.ctx_string),
             format!(
                 "
                     unsafe {{
-                        asm!({}
+                        llvm_asm!({}
                             :
                             :",
                 ctx_string
@@ -159,7 +159,7 @@ impl Context {
 
     pub fn end(&mut self, num_limbs: usize) {
         self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
-            x => panic!(\"asm_mul (no-carry): number of limbs supported is 2 up to {}. You had {{}}.\", x)
+            x => panic!(\"llvm_asm_mul (no-carry): number of limbs supported is 2 up to {}. You had {{}}.\", x)
         }};
     }}
 }}
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index 6b23180b6..37cd81724 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -21,25 +21,23 @@ pub fn generate_macro_string(num_limbs: usize) -> std::string::String {
         );
     }
     let mut macro_string = String::from(
-        "macro_rules! asm_mul {
+        "
+        macro_rules! llvm_asm_mul {
         ($limbs:expr, $a:expr, $b:expr, $modulus:expr, $mod_prime:expr) => {
             match $limbs {",
     );
-    macro_string = format!("{}{}", macro_string, generate_matches(num_limbs, true));
+    macro_string += &generate_matches(num_limbs, true);
 
-    macro_string = format!(
-        "{}{}",
-        macro_string,
-        "macro_rules! asm_square {
+    macro_string += &"
+        macro_rules! llvm_asm_square {
         ($limbs:expr, $a:expr, $modulus:expr, $mod_prime:expr) => {
-            match $limbs {"
-    );
-    macro_string = format!("{}{}", macro_string, generate_matches(num_limbs, false));
+            match $limbs {";
+    macro_string += &generate_matches(num_limbs, false);
     macro_string
 }
 
 #[assemble]
-fn generate_asm_mul_string(
+fn generate_llvm_asm_mul_string(
     a: &str,
     b: &str,
     modulus: &str,
@@ -69,19 +67,19 @@ fn generate_asm_mul_string(
     }
 
     // } else {
-    // asm.xorq(RCX, RCX);
+    // llvm_asm.xorq(RCX, RCX);
     // for i in 0..8 {
     //     if i == 0 {
-    //         ar::mul_1_mov(&mut asm, a1[0], &b1, 0);
+    //         ar::mul_1_mov(&mut llvm_asm, a1[0], &b1, 0);
     //     } else {
-    //         ar::mul_add_1(&mut asm, &a1, &b1, i);
+    //         ar::mul_add_1(&mut llvm_asm, &a1, &b1, i);
     //     }
     // }
     // for i in 0..8 {
-    //     ar::mul_add_1(&mut asm, &m1, 0);
+    //     ar::mul_add_1(&mut llvm_asm, &m1, 0);
     // }
-    // for i in 0..asm.limbs {
-    //     asm.movq(R[i], a1[i]);
+    // for i in 0..llvm_asm.limbs {
+    //     llvm_asm.movq(R[i], a1[i]);
     // }
 
     // }
@@ -106,7 +104,7 @@ fn generate_matches(num_limbs: usize, is_mul: bool) -> String {
             ctx.add_declaration("buf", "r", "&mut spill_buffer");
         }
 
-        let asm_string = generate_asm_mul_string(
+        let llvm_asm_string = generate_llvm_asm_mul_string(
             &ctx.clone().get("a"),
             &ctx.clone().try_get("b", "a"),
             &ctx.clone().get("modulus"),
@@ -115,7 +113,7 @@ fn generate_matches(num_limbs: usize, is_mul: bool) -> String {
             limbs,
         );
 
-        ctx.add_asm(asm_string);
+        ctx.add_llvm_asm(llvm_asm_string);
         ctx.add_clobber_from_vec(vec!["rcx", "rbx", "rdx", "rax"]);
         for j in 0..std::cmp::min(limbs, 8) {
             ctx.add_clobber(REG_CLOBBER[j]);
diff --git a/algebra-core/mince/src/intrinsics.rs b/algebra-core/mince/src/intrinsics.rs
index fc59af5a2..ba3edd0b0 100644
--- a/algebra-core/mince/src/intrinsics.rs
+++ b/algebra-core/mince/src/intrinsics.rs
@@ -5,40 +5,40 @@ pub fn define_intrinsics() -> TokenStream {
     (quote! {
         {
             let mut begin = || {
-                asm_string.borrow_mut().push_str("\"");
+                llvm_asm_string.borrow_mut().push_str("\"");
             };
 
             let mut end = || {
-                asm_string.borrow_mut().push_str("
+                llvm_asm_string.borrow_mut().push_str("
                                         \"");
             };
 
             let mut comment = | comment: &str | {
-                asm_string.borrow_mut().push_str(&format!("         // {}", comment));
+                llvm_asm_string.borrow_mut().push_str(&format!("         // {}", comment));
             };
 
             let mut mulxq = | a: &str, b: &str, c: &str | {
-                asm_string.borrow_mut().push_str(&format!("
+                llvm_asm_string.borrow_mut().push_str(&format!("
                                         mulxq {}, {}, {}", a, b, c));
             };
 
             let mut adcxq = | a: &str, b: &str| {
-                asm_string.borrow_mut().push_str(&format!("
+                llvm_asm_string.borrow_mut().push_str(&format!("
                                         adcxq {}, {}", a, b));
             };
 
             let mut adoxq = | a: &str, b: &str | {
-                asm_string.borrow_mut().push_str(&format!("
+                llvm_asm_string.borrow_mut().push_str(&format!("
                                         adoxq {}, {}", a, b));
             };
 
             let mut movq = | a: &str, b: &str | {
-                asm_string.borrow_mut().push_str(&format!("
+                llvm_asm_string.borrow_mut().push_str(&format!("
                                         movq {}, {}", a, b));
             };
 
             let mut xorq = | a: &str, b: &str | {
-                asm_string.borrow_mut().push_str(&format!("
+                llvm_asm_string.borrow_mut().push_str(&format!("
                                         xorq {}, {}", a, b));
             };
         }
diff --git a/algebra-core/mince/src/lib.rs b/algebra-core/mince/src/lib.rs
index 8f2ac3ea9..0c4027be5 100644
--- a/algebra-core/mince/src/lib.rs
+++ b/algebra-core/mince/src/lib.rs
@@ -24,7 +24,7 @@ pub fn assemble(_meta: TokenStream, input: TokenStream) -> TokenStream {
 
     let begin: syn::Stmt = syn::parse((quote! { begin(); }).into()).unwrap();
     let end: syn::Stmt = syn::parse((quote! { end(); }).into()).unwrap();
-    let ret: syn::Stmt = syn::parse((quote! { return asm_string.into_inner(); }).into()).unwrap();
+    let ret: syn::Stmt = syn::parse((quote! { return llvm_asm_string.into_inner(); }).into()).unwrap();
 
     let mut new_stmts = Vec::new();
     for stmt in &intrinsics.stmts {
@@ -52,7 +52,7 @@ pub fn assemble(_meta: TokenStream, input: TokenStream) -> TokenStream {
         #(#attrs)
         *
         #sig {
-            let mut asm_string = RefCell::new(String::new());
+            let mut llvm_asm_string = RefCell::new(String::new());
 
             #new_block
         }
diff --git a/algebra-core/src/fields/arithmetic.rs b/algebra-core/src/fields/arithmetic.rs
index b3816fd52..681a4d76d 100644
--- a/algebra-core/src/fields/arithmetic.rs
+++ b/algebra-core/src/fields/arithmetic.rs
@@ -20,14 +20,16 @@ macro_rules! impl_field_mul_assign {
             // No-carry optimisation applied to CIOS
             if _no_carry {
                 #[cfg(all(
-                    feature = "asm",
+                    feature = "llvm_asm",
                     target_feature = "bmi2",
                     target_feature = "adx",
-                    target_arch = "x86_64"
+                    target_arch = "x86_64",
+                    nightly,
                 ))]
                 {
                     if $limbs <= 6 {
-                        asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
+                        #[allow(unsafe_code)]
+                        llvm_asm_mul!($limbs, (self.0).0, (other.0).0, P::MODULUS.0, P::INV);
                         self.reduce();
                         return;
                     }
@@ -108,6 +110,7 @@ macro_rules! impl_field_square_in_place {
     ($limbs: expr) => {
         #[inline]
         #[unroll_for_loops]
+        #[allow(unused_braces)]
         fn square_in_place(&mut self) -> &mut Self {
             // Checking the modulus at compile time
             let first_bit_set = P::MODULUS.0[$limbs - 1] >> 63 != 0;
@@ -118,14 +121,16 @@ macro_rules! impl_field_square_in_place {
             let _no_carry: bool = !(first_bit_set || all_bits_set);
 
             #[cfg(all(
-                feature = "asm",
+                feature = "llvm_asm",
                 target_feature = "bmi2",
                 target_feature = "adx",
-                target_arch = "x86_64"
+                target_arch = "x86_64",
+                nightly,
             ))]
             {
                 if $limbs <= 6 && _no_carry {
-                    asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
+                    #[allow(unsafe_code)]
+                    llvm_asm_square!($limbs, (self.0).0, P::MODULUS.0, P::INV);
                     self.reduce();
                     return self;
                 }
diff --git a/algebra-core/src/fields/models/mod.rs b/algebra-core/src/fields/models/mod.rs
index 82bae7c02..22f7c895a 100644
--- a/algebra-core/src/fields/models/mod.rs
+++ b/algebra-core/src/fields/models/mod.rs
@@ -20,19 +20,28 @@ use crate::{
 };
 
 #[cfg(all(
-    feature = "asm",
+    feature = "llvm_asm",
     target_arch = "x86_64",
     target_feature = "bmi2",
-    target_feature = "adx"
+    target_feature = "adx",
+    nightly,
 ))]
 use std::mem::MaybeUninit;
 
 #[cfg(all(
-    feature = "asm",
+    feature = "llvm_asm",
     target_arch = "x86_64",
     target_feature = "bmi2",
-    target_feature = "adx"
+    target_feature = "adx",
+    nightly,
 ))]
+#[cfg_attr(all(
+    feature = "llvm_asm",
+    target_arch = "x86_64",
+    target_feature = "bmi2",
+    target_feature = "adx",
+    nightly,
+), allow(unsafe_code))]
 include!(concat!(env!("OUT_DIR"), "/field_assembly.rs"));
 
 impl_Fp!(Fp256, Fp256Parameters, BigInteger256, BigInteger256, 4);
diff --git a/algebra-core/src/lib.rs b/algebra-core/src/lib.rs
index 330335875..c85f2f53a 100644
--- a/algebra-core/src/lib.rs
+++ b/algebra-core/src/lib.rs
@@ -5,9 +5,24 @@
 #![deny(unused_extern_crates, renamed_and_removed_lints, unused_allocation)]
 #![deny(unused_comparisons, bare_trait_objects, const_err, unused_must_use)]
 #![deny(unused_mut, unused_unsafe, private_in_public)]
-#![cfg_attr(not(feature = "asm"), deny(unsafe_code))]
-#![cfg_attr(not(feature = "asm"), forbid(unsafe_code))]
-#![cfg_attr(all(feature = "asm", not(feature = "stable")), feature(asm))]
+
+#![cfg_attr(all(
+        feature = "llvm_asm",
+        target_arch = "x86_64",
+        target_feature = "bmi2",
+        target_feature = "adx",
+        nightly,
+), deny(unsafe_code))]
+#![cfg_attr(all(
+        feature = "llvm_asm",
+        target_arch = "x86_64",
+        target_feature = "bmi2",
+        target_feature = "adx",
+        nightly,
+), feature(llvm_asm))]
+
+#![cfg_attr(not(feature = "llvm_asm"), forbid(unsafe_code))]
+
 
 #[cfg(all(test, not(feature = "std")))]
 #[macro_use]

From 1cec7d206089397259a986265bf9b8e77ac22180 Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Mon, 4 May 2020 05:34:36 -0700
Subject: [PATCH 41/43] Small clean up of code

---
 algebra-core/field-assembly/src/context.rs | 60 ++++++++++------------
 algebra-core/field-assembly/src/lib.rs     | 24 +--------
 2 files changed, 28 insertions(+), 56 deletions(-)

diff --git a/algebra-core/field-assembly/src/context.rs b/algebra-core/field-assembly/src/context.rs
index 848d56783..f43127b12 100644
--- a/algebra-core/field-assembly/src/context.rs
+++ b/algebra-core/field-assembly/src/context.rs
@@ -1,11 +1,10 @@
 use std::collections::HashMap;
-use std::rc::Rc;
 
 pub const REG_CLOBBER: [&'static str; 8] = ["r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15"];
 
 #[derive(Clone)]
 pub struct Context {
-    ctx_string: Rc<String>,
+    ctx_string: String,
     declarations: HashMap<String, Declare>,
     declaration_vec: Vec<Declare>,
     clobbers: Vec<String>,
@@ -22,15 +21,19 @@ struct Declare {
 impl Context {
     pub fn new() -> Self {
         Context {
-            ctx_string: Rc::new(String::new()),
+            ctx_string: String::new(),
             declarations: HashMap::new(),
             declaration_vec: Vec::new(),
             clobbers: Vec::new(),
         }
     }
 
+    fn append(&mut self, other: &str) {
+        self.ctx_string += other;
+    } 
+
     pub fn get_string(&mut self) -> String {
-        Rc::make_mut(&mut self.ctx_string).to_string()
+        self.ctx_string.clone()
     }
 
     pub fn reset(&mut self) {
@@ -78,34 +81,28 @@ impl Context {
     }
 
     pub fn add_limb(&mut self, limb: usize) {
-        self.ctx_string = Rc::new(format!(
-            "{}{}",
-            Rc::clone(&self.ctx_string),
-            format!(
+        self.append( 
+            &format!(
                 "
                 {} => {{",
                 limb
             )
-        ));
+        )
     }
 
     pub fn add_buffer(&mut self, extra_reg: usize) {
-        self.ctx_string = Rc::new(format!(
-            "{}{}",
-            Rc::clone(&self.ctx_string),
-            format!(
+        self.append(
+            &format!(
                 "
                     let mut spill_buffer = MaybeUninit::<[u64; {}]>::uninit();",
                 extra_reg
             )
-        ));
+        );
     }
 
     pub fn add_llvm_asm(&mut self, ctx_string: String) {
-        self.ctx_string = Rc::new(format!(
-            "{}{}",
-            Rc::clone(&self.ctx_string),
-            format!(
+        self.append(
+            &format!(
                 "
                     unsafe {{
                         llvm_asm!({}
@@ -113,7 +110,7 @@ impl Context {
                             :",
                 ctx_string
             )
-        ));
+        );
     }
 
     pub fn add_clobber_from_vec(&mut self, clobbers: Vec<&str>) {
@@ -130,40 +127,37 @@ impl Context {
         for i in 0..self.declarations.len() {
             let dec = &self.declaration_vec[i];
             let last = i == self.declarations.len() - 1;
-            self.ctx_string = Rc::new(format!(
-                "{}{}",
-                Rc::clone(&self.ctx_string),
-                format!(
+            let dec = 
+                &format!(
                     "
                             \"{}\"({}){}      // {}",
                     dec.ty,
                     dec.var,
                     if last { "" } else { "," },
                     dec.pos
-                )
-            ));
+                );
+            self.append(dec);
         }
-        self.ctx_string = Rc::new(format!(
-            "{}{}",
-            Rc::clone(&self.ctx_string),
-            format!(
+        let clobbers = self.clobbers.join(",");
+        self.append(
+            &format!(
                 "
                             : {}
                         );
                     }}
                 }}",
-                self.clobbers.join(",")
+                clobbers
             )
-        ));
+        );
     }
 
     pub fn end(&mut self, num_limbs: usize) {
-        self.ctx_string = Rc::new(format!("{}{}", Rc::clone(&self.ctx_string), format!("
+        self.append(&format!("
             x => panic!(\"llvm_asm_mul (no-carry): number of limbs supported is 2 up to {}. You had {{}}.\", x)
         }};
     }}
 }}
 ",
-        num_limbs)));
+        num_limbs));
     }
 }
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index 37cd81724..6dd9cb7b8 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -48,11 +48,7 @@ fn generate_llvm_asm_mul_string(
     reg!(a0, a1, a, limbs);
     reg!(b0, b1, b, limbs);
     reg!(m, m1, modulus, limbs);
-    // if limbs > 8 {
-    //     reg!(s, s1, spills, limbs * 2);
-    // }
-
-    // if limbs <= 8 {
+    
     xorq(RCX, RCX);
     for i in 0..limbs {
         if i == 0 {
@@ -65,24 +61,6 @@ fn generate_llvm_asm_mul_string(
     for i in 0..limbs {
         movq(R[i], a1[i]);
     }
-
-    // } else {
-    // llvm_asm.xorq(RCX, RCX);
-    // for i in 0..8 {
-    //     if i == 0 {
-    //         ar::mul_1_mov(&mut llvm_asm, a1[0], &b1, 0);
-    //     } else {
-    //         ar::mul_add_1(&mut llvm_asm, &a1, &b1, i);
-    //     }
-    // }
-    // for i in 0..8 {
-    //     ar::mul_add_1(&mut llvm_asm, &m1, 0);
-    // }
-    // for i in 0..llvm_asm.limbs {
-    //     llvm_asm.movq(R[i], a1[i]);
-    // }
-
-    // }
 }
 
 fn generate_matches(num_limbs: usize, is_mul: bool) -> String {

From 27e9c4d0651a352270bb16a6de115f5d2a422830 Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Mon, 4 May 2020 05:48:18 -0700
Subject: [PATCH 42/43] Formatting

---
 algebra-core/field-assembly/src/context.rs | 57 +++++++++-------------
 algebra-core/field-assembly/src/lib.rs     |  2 +-
 algebra-core/mince/src/lib.rs              |  3 +-
 algebra-core/src/fields/models/mod.rs      | 17 ++++---
 algebra-core/src/lib.rs                    | 17 ++++---
 5 files changed, 47 insertions(+), 49 deletions(-)

diff --git a/algebra-core/field-assembly/src/context.rs b/algebra-core/field-assembly/src/context.rs
index f43127b12..01d276d22 100644
--- a/algebra-core/field-assembly/src/context.rs
+++ b/algebra-core/field-assembly/src/context.rs
@@ -30,7 +30,7 @@ impl Context {
 
     fn append(&mut self, other: &str) {
         self.ctx_string += other;
-    } 
+    }
 
     pub fn get_string(&mut self) -> String {
         self.ctx_string.clone()
@@ -81,36 +81,30 @@ impl Context {
     }
 
     pub fn add_limb(&mut self, limb: usize) {
-        self.append( 
-            &format!(
-                "
+        self.append(&format!(
+            "
                 {} => {{",
-                limb
-            )
-        )
+            limb
+        ))
     }
 
     pub fn add_buffer(&mut self, extra_reg: usize) {
-        self.append(
-            &format!(
-                "
+        self.append(&format!(
+            "
                     let mut spill_buffer = MaybeUninit::<[u64; {}]>::uninit();",
-                extra_reg
-            )
-        );
+            extra_reg
+        ));
     }
 
     pub fn add_llvm_asm(&mut self, ctx_string: String) {
-        self.append(
-            &format!(
-                "
+        self.append(&format!(
+            "
                     unsafe {{
                         llvm_asm!({}
                             :
                             :",
-                ctx_string
-            )
-        );
+            ctx_string
+        ));
     }
 
     pub fn add_clobber_from_vec(&mut self, clobbers: Vec<&str>) {
@@ -127,28 +121,25 @@ impl Context {
         for i in 0..self.declarations.len() {
             let dec = &self.declaration_vec[i];
             let last = i == self.declarations.len() - 1;
-            let dec = 
-                &format!(
-                    "
+            let dec = &format!(
+                "
                             \"{}\"({}){}      // {}",
-                    dec.ty,
-                    dec.var,
-                    if last { "" } else { "," },
-                    dec.pos
-                );
+                dec.ty,
+                dec.var,
+                if last { "" } else { "," },
+                dec.pos
+            );
             self.append(dec);
         }
         let clobbers = self.clobbers.join(",");
-        self.append(
-            &format!(
-                "
+        self.append(&format!(
+            "
                             : {}
                         );
                     }}
                 }}",
-                clobbers
-            )
-        );
+            clobbers
+        ));
     }
 
     pub fn end(&mut self, num_limbs: usize) {
diff --git a/algebra-core/field-assembly/src/lib.rs b/algebra-core/field-assembly/src/lib.rs
index 6dd9cb7b8..02746d966 100644
--- a/algebra-core/field-assembly/src/lib.rs
+++ b/algebra-core/field-assembly/src/lib.rs
@@ -48,7 +48,7 @@ fn generate_llvm_asm_mul_string(
     reg!(a0, a1, a, limbs);
     reg!(b0, b1, b, limbs);
     reg!(m, m1, modulus, limbs);
-    
+
     xorq(RCX, RCX);
     for i in 0..limbs {
         if i == 0 {
diff --git a/algebra-core/mince/src/lib.rs b/algebra-core/mince/src/lib.rs
index 0c4027be5..2b9021ce7 100644
--- a/algebra-core/mince/src/lib.rs
+++ b/algebra-core/mince/src/lib.rs
@@ -24,7 +24,8 @@ pub fn assemble(_meta: TokenStream, input: TokenStream) -> TokenStream {
 
     let begin: syn::Stmt = syn::parse((quote! { begin(); }).into()).unwrap();
     let end: syn::Stmt = syn::parse((quote! { end(); }).into()).unwrap();
-    let ret: syn::Stmt = syn::parse((quote! { return llvm_asm_string.into_inner(); }).into()).unwrap();
+    let ret: syn::Stmt =
+        syn::parse((quote! { return llvm_asm_string.into_inner(); }).into()).unwrap();
 
     let mut new_stmts = Vec::new();
     for stmt in &intrinsics.stmts {
diff --git a/algebra-core/src/fields/models/mod.rs b/algebra-core/src/fields/models/mod.rs
index 22f7c895a..4e45d9f00 100644
--- a/algebra-core/src/fields/models/mod.rs
+++ b/algebra-core/src/fields/models/mod.rs
@@ -35,13 +35,16 @@ use std::mem::MaybeUninit;
     target_feature = "adx",
     nightly,
 ))]
-#[cfg_attr(all(
-    feature = "llvm_asm",
-    target_arch = "x86_64",
-    target_feature = "bmi2",
-    target_feature = "adx",
-    nightly,
-), allow(unsafe_code))]
+#[cfg_attr(
+    all(
+        feature = "llvm_asm",
+        target_arch = "x86_64",
+        target_feature = "bmi2",
+        target_feature = "adx",
+        nightly,
+    ),
+    allow(unsafe_code)
+)]
 include!(concat!(env!("OUT_DIR"), "/field_assembly.rs"));
 
 impl_Fp!(Fp256, Fp256Parameters, BigInteger256, BigInteger256, 4);
diff --git a/algebra-core/src/lib.rs b/algebra-core/src/lib.rs
index c85f2f53a..8463ed735 100644
--- a/algebra-core/src/lib.rs
+++ b/algebra-core/src/lib.rs
@@ -5,25 +5,28 @@
 #![deny(unused_extern_crates, renamed_and_removed_lints, unused_allocation)]
 #![deny(unused_comparisons, bare_trait_objects, const_err, unused_must_use)]
 #![deny(unused_mut, unused_unsafe, private_in_public)]
-
-#![cfg_attr(all(
+#![cfg_attr(
+    all(
         feature = "llvm_asm",
         target_arch = "x86_64",
         target_feature = "bmi2",
         target_feature = "adx",
         nightly,
-), deny(unsafe_code))]
-#![cfg_attr(all(
+    ),
+    deny(unsafe_code)
+)]
+#![cfg_attr(
+    all(
         feature = "llvm_asm",
         target_arch = "x86_64",
         target_feature = "bmi2",
         target_feature = "adx",
         nightly,
-), feature(llvm_asm))]
-
+    ),
+    feature(llvm_asm)
+)]
 #![cfg_attr(not(feature = "llvm_asm"), forbid(unsafe_code))]
 
-
 #[cfg(all(test, not(feature = "std")))]
 #[macro_use]
 extern crate std;

From a52841d9b42cf32f7a36072ec98607602b4ad8ae Mon Sep 17 00:00:00 2001
From: Pratyush Mishra <pratyushmishra@berkeley.edu>
Date: Mon, 4 May 2020 06:02:24 -0700
Subject: [PATCH 43/43] Fix imports and features

---
 algebra-benches/Cargo.toml          | 16 ++++++++--------
 algebra-benches/src/lib.rs          |  3 ++-
 algebra-benches/src/macros/field.rs | 10 +++++-----
 3 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/algebra-benches/Cargo.toml b/algebra-benches/Cargo.toml
index 248725b39..3e45dde90 100644
--- a/algebra-benches/Cargo.toml
+++ b/algebra-benches/Cargo.toml
@@ -24,7 +24,7 @@ build = "build.rs"
 ################################# Dependencies ################################
 
 [dev-dependencies]
-algebra = { path = "../algebra", features = [ "full" ] }
+algebra = { path = "../algebra" }
 blake2 = "0.8.1"
 rand = "0.7"
 rand_xorshift = { version = "0.2" }
@@ -33,13 +33,13 @@ paste = "0.1"
 [features]
 asm = [ "algebra/asm"]
 n_fold = []
-mnt4_298 = []
-mnt6_298 = []
-mnt4_753 = []
-mnt6_753 = []
-bls12_381 = []
-bls12_377 = []
-sw6 = []
+mnt4_298 = [ "algebra/mnt4_298"]
+mnt6_298 = [ "algebra/mnt6_298"]
+mnt4_753 = [ "algebra/mnt4_753"]
+mnt6_753 = [ "algebra/mnt6_753"]
+bls12_381 = [ "algebra/bls12_381"]
+bls12_377 = [ "algebra/bls12_377"]
+sw6 = [ "algebra/sw6" ]
 
 [build-dependencies]
 rustc_version = "0.1.*"
diff --git a/algebra-benches/src/lib.rs b/algebra-benches/src/lib.rs
index 1f8a6f2e3..722a82390 100644
--- a/algebra-benches/src/lib.rs
+++ b/algebra-benches/src/lib.rs
@@ -4,8 +4,9 @@
 #[cfg(nightly)]
 extern crate test;
 
+#[cfg(all(nightly, test))]
 #[macro_use]
 pub mod macros;
 
-#[cfg(nightly)]
+#[cfg(all(nightly, test))]
 mod curves;
diff --git a/algebra-benches/src/macros/field.rs b/algebra-benches/src/macros/field.rs
index cb2f73fa1..5e786e2b5 100644
--- a/algebra-benches/src/macros/field.rs
+++ b/algebra-benches/src/macros/field.rs
@@ -25,7 +25,7 @@ macro_rules! field_common {
 
                 let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
 
-                let v: Vec<($f_type, $f_type)> = (0..SAMPLES)
+                let v: Vec<_> = (0..SAMPLES)
                     .map(|_| ($f::rand(&mut rng), $f::rand(&mut rng)))
                     .collect();
 
@@ -44,7 +44,7 @@ macro_rules! field_common {
 
                 let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
 
-                let v: Vec<($f_type, $f_type)> = (0..SAMPLES)
+                let v: Vec<_> = (0..SAMPLES)
                     .map(|_| ($f::rand(&mut rng), $f::rand(&mut rng)))
                     .collect();
 
@@ -63,7 +63,7 @@ macro_rules! field_common {
 
                 let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
 
-                let v: Vec<($f_type, $f_type)> = (0..SAMPLES)
+                let v: Vec<_> = (0..SAMPLES)
                     .map(|_| ($f::rand(&mut rng), $f::rand(&mut rng)))
                     .collect();
 
@@ -165,7 +165,7 @@ macro_rules! field_base {
 
                 let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
 
-                let v: Vec<($f_repr_type, $f_repr_type)> = (0..SAMPLES)
+                let v: Vec<_> = (0..SAMPLES)
                     .map(|_| {
                         let mut tmp1 = $f_repr::rand(&mut rng);
                         let mut tmp2 = $f_repr::rand(&mut rng);
@@ -193,7 +193,7 @@ macro_rules! field_base {
 
                 let mut rng = XorShiftRng::seed_from_u64(1231275789u64);
 
-                let v: Vec<($f_repr_type, $f_repr_type)> = (0..SAMPLES)
+                let v: Vec<_> = (0..SAMPLES)
                     .map(|_| {
                         let tmp1 = $f_repr::rand(&mut rng);
                         let mut tmp2 = tmp1;