diff --git a/.clang-format b/.clang-format
new file mode 100644
index 0000000..e9ab196
--- /dev/null
+++ b/.clang-format
@@ -0,0 +1,28 @@
+# This file is part of preseq
+#
+# Copyright (C) 2024: Andrew D. Smith
+#
+# Authors: Andrew D. Smith
+#
+# This is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This software is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+
+BasedOnStyle: LLVM
+ColumnLimit: 80
+IndentWidth: 2
+AlwaysBreakAfterReturnType: TopLevel
+ContinuationIndentWidth: 2
+ConstructorInitializerIndentWidth: 2
+BraceWrapping:
+  BeforeElse: true
+  BeforeCatch: true
+BreakBeforeBraces: Custom
+BreakConstructorInitializers: AfterColon
+SpacesBeforeTrailingComments: 2
diff --git a/.github/workflows/clang-format.yml b/.github/workflows/clang-format.yml
new file mode 100644
index 0000000..874410b
--- /dev/null
+++ b/.github/workflows/clang-format.yml
@@ -0,0 +1,39 @@
+# This file is part of preseq
+#
+# Copyright (C) 2024: Andrew D. Smith
+#
+# Authors: Andrew D. Smith
+#
+# This is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This software is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+
+name: Source formatting with clang-format
+
+on:
+  push:
+    branches: [ "main" ]
+  pull_request:
+    branches: [ "main" ]
+  workflow_dispatch:
+
+jobs:
+  clang-format:
+    runs-on: ubuntu-24.04
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Install dependencies
+      run: sudo apt-get install -y clang-format
+
+    - name: Run clang-format
+      run: |
+        clang-format --dry-run -Werror $(git ls-files '*.*pp')
diff --git a/.github/workflows/cpplint.yml b/.github/workflows/cpplint.yml
new file mode 100644
index 0000000..24627c4
--- /dev/null
+++ b/.github/workflows/cpplint.yml
@@ -0,0 +1,52 @@
+# This file is part of preseq
+#
+# Copyright (C) 2024: Andrew D. Smith
+#
+# Authors: Andrew D. Smith
+#
+# This is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This software is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+
+name: Linting with cpplint
+
+on:
+  push:
+    branches: [ "master" ]
+  pull_request:
+    branches: [ "master" ]
+  workflow_dispatch:
+
+jobs:
+  cpplint:
+    runs-on: ubuntu-24.04
+    strategy:
+      matrix:
+        python-version: ["3.12"]
+
+    steps:
+    - name: Checkout repository
+      uses: actions/checkout@v4
+
+    - name: Python setup ${{ matrix.python-version }}
+      uses: actions/setup-python@v5
+      with:
+        python-version: ${{ matrix.python-version }}
+
+    - name: Install cpplint
+      run: |
+        python -m pip install --upgrade pip
+        pip install cpplint
+
+    - name: Check cpplint version
+      run: cpplint --version
+
+    - name: Run cpplint
+      run: |
+        cpplint --quiet --repository=. $(git ls-files '*.*pp')
diff --git a/CPPLINT.cfg b/CPPLINT.cfg
new file mode 100644
index 0000000..52bd4ce
--- /dev/null
+++ b/CPPLINT.cfg
@@ -0,0 +1,28 @@
+# This file is part of preseq
+#
+# Copyright (C) 2024: Andrew D. Smith
+#
+# Authors: Andrew D. Smith
+#
+# This is free software: you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by
+# the Free Software Foundation, either version 3 of the License, or
+# (at your option) any later version.
+#
+# This software is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+# General Public License for more details.
+
+set noparent
+filter=-runtime/references
+filter=-build/include_subdir
+filter=-build/include_order
+filter=-build/c++11
+filter=-build/c++17
+# Formatting below handled by clang-format
+filter=-whitespace/line_length
+filter=-whitespace/newline
+filter=-readability/braces
+filter=-whitespace/semicolon
+filter=-whitespace/indent
diff --git a/autogen.sh b/autogen.sh
new file mode 100755
index 0000000..c4c9ee6
--- /dev/null
+++ b/autogen.sh
@@ -0,0 +1,37 @@
+#!/bin/sh
+#
+# Run 'autoreconf -i' to generate 'configure', 'Makefile.in', etc.
+#
+# The first time this is run on a new cloned git repo the configure
+# script will not be present, only the configure.ac and
+# Makefile.am. The rest must be generated by `autoreconf -i`.
+#
+# If you are working with a distribution (file ending with ".tar.gz"
+# or similar) then this script should not be needed, and should not be
+# present, as all the files should already exist. You should only run
+# this script if you know what you are doing with autoreconf.
+#
+# This script will only work with an argument to confirm the help
+# message has been read.
+
+runautoreconf() {
+    autoreconf -i;
+}
+
+if test -d .git && test "$(basename "${PWD}")" = "preseq"
+then
+    runautoreconf
+    exit 0
+else
+    echo "  It seems you are either attempting to run this script       "
+    echo "  from the wrong directory, or in a source tree that was      "
+    echo "  not obtained by cloning the preseq git repo.                "
+    echo "                                                              "
+    echo "  ./autogen.sh generates the configure script. Only run       "
+    echo "                                                              "
+    echo "  Only run this if you know what you are doing with           "
+    echo "  autoreconf and are simply avoiding doing that. If you       "
+    echo "  just want to use the software, download a release and       "
+    echo "  this script will not be needed.                             "
+    exit 1
+fi
diff --git a/src/continued_fraction.cpp b/src/continued_fraction.cpp
index 2a10dfb..00709c1 100644
--- a/src/continued_fraction.cpp
+++ b/src/continued_fraction.cpp
@@ -19,15 +19,16 @@
 
 #include "continued_fraction.hpp"
 
-#include <vector>
-#include <cmath>
+#include <algorithm>
 #include <cassert>
+#include <cmath>
+#include <vector>
 
-using std::vector;
-using std::min;
+using std::fabs;
 using std::isfinite;
+using std::min;
 using std::pow;
-using std::fabs;
+using std::vector;
 
 // ADS: the std::pow function is used frequently to get (-1)^x for
 // integer x. This doesn't make sense, and should be replaced at some
@@ -43,30 +44,29 @@ using std::fabs;
  */
 static void
 quotdiff_algorithm(const vector<double> &ps_coeffs, vector<double> &cf_coeffs) {
-
-  const size_t depth = ps_coeffs.size(); // degree of power series
+  const size_t depth = ps_coeffs.size();  // degree of power series
 
   // q_table[0] never used, and undefined
-  vector<vector<double> > q_table(depth, vector<double>(depth + 1, 0.0));
+  vector<vector<double>> q_table(depth, vector<double>(depth + 1, 0.0));
   // q_table[1][j] = ratio of ps coefficients
   for (size_t j = 0; j < depth - 1; j++)
-    q_table[1][j] = ps_coeffs[j + 1]/ps_coeffs[j];
+    q_table[1][j] = ps_coeffs[j + 1] / ps_coeffs[j];
 
   // e_table[0] is always 0
-  vector<vector<double> > e_table(depth, vector<double>(depth + 1, 0.0));
+  vector<vector<double>> e_table(depth, vector<double>(depth + 1, 0.0));
   // e_table[1] follows the general recurrence (same as in loop below)
   for (size_t j = 0; j < depth - 1; j++)
-    e_table[1][j] = q_table[1][j+1] - q_table[1][j] + e_table[0][j+1];
+    e_table[1][j] = q_table[1][j + 1] - q_table[1][j] + e_table[0][j + 1];
 
   // using intial values of E(i)(j)'s and Q(i)(j)'s, fill rest of the
   // q table and e table
   for (size_t i = 2; i < depth; i++) {
-
     for (size_t j = 0; j < depth; j++)
-      q_table[i][j] = q_table[i-1][j+1]*e_table[i-1][j+1]/e_table[i-1][j];
+      q_table[i][j] =
+        q_table[i - 1][j + 1] * e_table[i - 1][j + 1] / e_table[i - 1][j];
 
     for (size_t j = 0; j < depth; j++)
-      e_table[i][j] = q_table[i][j+1] - q_table[i][j] + e_table[i-1][j+1];
+      e_table[i][j] = q_table[i][j + 1] - q_table[i][j] + e_table[i - 1][j + 1];
   }
 
   cf_coeffs.resize(depth);
@@ -74,10 +74,9 @@ quotdiff_algorithm(const vector<double> &ps_coeffs, vector<double> &cf_coeffs) {
   cf_coeffs[0] = ps_coeffs[0];
   // set remaining CF coefficients from e and q table values
   for (size_t i = 1; i < depth; ++i)
-    cf_coeffs[i] = (i % 2 == 0) ? -e_table[i/2][0] : -q_table[(i + 1)/2][0];
+    cf_coeffs[i] = (i % 2 == 0) ? -e_table[i / 2][0] : -q_table[(i + 1) / 2][0];
 }
 
-
 /* compute CF coeffs when upper_offset > 0 above the diagonal; this
  * means degree of polynomial in numerator of Pade approximant is
  * greater than degree of polynomial in the denominator
@@ -86,7 +85,6 @@ static void
 quotdiff_above_diagonal(const vector<double> &ps_coeffs, const size_t offset,
                         vector<double> &cf_coeffs,
                         vector<double> &offset_coeffs) {
-
   // get the high order PS coeffs for approximation by CF
   vector<double> high_ps_coeffs(begin(ps_coeffs) + offset, end(ps_coeffs));
 
@@ -98,23 +96,20 @@ quotdiff_above_diagonal(const vector<double> &ps_coeffs, const size_t offset,
   offset_coeffs.resize(offset);
 }
 
-
 // calculate CF coeffs when lower_offset > 0
 static void
 quotdiff_below_diagonal(const vector<double> &ps_coeffs, const size_t offset,
                         vector<double> &cf_coeffs,
                         vector<double> &offset_coeffs) {
-
   // need to work with reciprocal series g = 1/f, then invert
   vector<double> recip_ps_coeffs(ps_coeffs.size());
-  recip_ps_coeffs[0] = 1.0/ps_coeffs[0];
+  recip_ps_coeffs[0] = 1.0 / ps_coeffs[0];
   for (size_t i = 1; i < ps_coeffs.size(); ++i) {
-
     double x = 0.0;
     for (size_t j = 0; j < i; ++j)
-      x += ps_coeffs[i - j]*recip_ps_coeffs[j];
+      x += ps_coeffs[i - j] * recip_ps_coeffs[j];
 
-    recip_ps_coeffs[i] = -x/ps_coeffs[0];
+    recip_ps_coeffs[i] = -x / ps_coeffs[0];
   }
 
   // qd to compute cf_coeffs using remaining coeffs
@@ -127,7 +122,6 @@ quotdiff_below_diagonal(const vector<double> &ps_coeffs, const size_t offset,
   offset_coeffs.resize(offset);
 }
 
-
 /* decrease degree of CF keeping coeffs equal to original */
 void
 decrease_degree(const size_t decrement, ContinuedFraction &the_cf) {
@@ -137,7 +131,6 @@ decrease_degree(const size_t decrement, ContinuedFraction &the_cf) {
   the_cf.degree -= decrement;
 }
 
-
 void
 truncate_degree(const size_t n_terms, ContinuedFraction &the_cf) {
   if (the_cf.degree < n_terms) {
@@ -150,15 +143,14 @@ truncate_degree(const size_t n_terms, ContinuedFraction &the_cf) {
   }
 }
 
-
-ContinuedFraction::ContinuedFraction(const vector<double> &ps_cf,
-                                     const int di, const size_t dg) :
+ContinuedFraction::ContinuedFraction(const vector<double> &ps_cf, const int di,
+                                     const size_t dg) :
   ps_coeffs(ps_cf), diagonal_idx(di), degree(dg) {
   if (diagonal_idx == 0)
     quotdiff_algorithm(ps_coeffs, cf_coeffs);
   else if (diagonal_idx > 0)
     quotdiff_above_diagonal(ps_coeffs, diagonal_idx, cf_coeffs, offset_coeffs);
-  else // if (cont_frac_estimate.lower_offset > 0) {
+  else  // if (cont_frac_estimate.lower_offset > 0) {
     quotdiff_below_diagonal(ps_coeffs, -diagonal_idx, cf_coeffs, offset_coeffs);
   // NOTE: negative sign "-" (-diagonal_idx > 0) for below diagonal
 }
@@ -168,23 +160,21 @@ ContinuedFraction::ContinuedFraction(const vector<double> &ps_cf,
 
 static double
 get_rescale_value(const double numerator, const double denominator) {
-  static const double tolerance = 1e-20; // magic
+  static const double tolerance = 1e-20;  // magic
   const double rescale_val = fabs(numerator) + fabs(denominator);
-  if (rescale_val > 1.0/tolerance)
-    return 1.0/rescale_val;
+  if (rescale_val > 1.0 / tolerance)
+    return 1.0 / rescale_val;
   else if (rescale_val < tolerance)
-    return 1.0/rescale_val;
+    return 1.0 / rescale_val;
   return 1.0;
 }
 
-
 /* calculate ContinuedFraction approx when there is no offset uses euler's
  * recursion
  */
 static double
-evaluate_on_diagonal(const vector<double> &cf_coeffs,
-                     const double val, const size_t depth) {
-
+evaluate_on_diagonal(const vector<double> &cf_coeffs, const double val,
+                     const size_t depth) {
   // initialize
   double current_num = 0.0;
   double prev_num1 = cf_coeffs[0];
@@ -195,10 +185,9 @@ evaluate_on_diagonal(const vector<double> &cf_coeffs,
   double prev_denom2 = 1.0;
 
   for (size_t i = 1; i < min(cf_coeffs.size(), depth); i++) {
-
     // calculate current values
-    current_num = prev_num1 + cf_coeffs[i]*val*prev_num2;
-    current_denom = prev_denom1 + cf_coeffs[i]*val*prev_denom2;
+    current_num = prev_num1 + cf_coeffs[i] * val * prev_num2;
+    current_denom = prev_denom1 + cf_coeffs[i] * val * prev_denom2;
 
     // update previous values
     prev_num2 = prev_num1;
@@ -219,49 +208,44 @@ evaluate_on_diagonal(const vector<double> &cf_coeffs,
     prev_denom1 *= rescale_val;
     prev_denom2 *= rescale_val;
   }
-  return current_num/current_denom;
+  return current_num / current_denom;
 }
 
 static double
 evaluate_power_series(const vector<double> &ps_coeffs, const double val) {
   double x = 0.0;
   for (size_t i = 0; i < ps_coeffs.size(); i++)
-    x += ps_coeffs[i]*pow(val, i);
+    x += ps_coeffs[i] * pow(val, i);
   return x;
 }
 
-
 /* evaluate CF when upper_offset > 0 using Euler's recursion */
 static double
 evaluate_above_diagonal(const vector<double> &cf_coeffs,
-                        const vector<double> &offset_coeffs,
-                        const double val, const size_t depth) {
-
+                        const vector<double> &offset_coeffs, const double val,
+                        const size_t depth) {
   const double cf_part =
     evaluate_on_diagonal(cf_coeffs, val, depth - offset_coeffs.size());
 
   const double ps_part = evaluate_power_series(offset_coeffs, val);
 
-  return ps_part + pow(val, offset_coeffs.size())*cf_part;
+  return ps_part + pow(val, offset_coeffs.size()) * cf_part;
 }
 
-
 // calculate ContinuedFraction approx when lower_offdiag > 0
 static double
 evaluate_below_diagonal(const vector<double> &cf_coeffs,
-                        const vector<double> &offset_coeffs,
-                        const double val, const size_t depth) {
-
+                        const vector<double> &offset_coeffs, const double val,
+                        const size_t depth) {
   const double cf_part =
     evaluate_on_diagonal(cf_coeffs, val, depth - offset_coeffs.size());
 
   const double ps_part = evaluate_power_series(offset_coeffs, val);
 
   // recall that if lower_offset > 0, we are working with 1/f, invert approx
-  return 1.0/(ps_part + pow(val, offset_coeffs.size())*cf_part);
+  return 1.0 / (ps_part + pow(val, offset_coeffs.size()) * cf_part);
 }
 
-
 // evaluate CF at a given point
 double
 ContinuedFraction::operator()(const double val) const {
@@ -273,9 +257,8 @@ ContinuedFraction::operator()(const double val) const {
     return evaluate_on_diagonal(cf_coeffs, val, degree);
 }
 
-
-std::ostream&
-operator<<(std::ostream& the_stream, const ContinuedFraction &cf) {
+std::ostream &
+operator<<(std::ostream &the_stream, const ContinuedFraction &cf) {
   using std::ios_base;
   using std::setw;
 
@@ -285,17 +268,16 @@ operator<<(std::ostream& the_stream, const ContinuedFraction &cf) {
   the_stream << "OFFSET_COEFFS" << '\t' << "PS_COEFFS" << '\n';
   const size_t offset = cf.offset_coeffs.size();
   for (size_t i = 0; i < offset; ++i)
-    the_stream << setw(12) << cf.offset_coeffs[i] << '\t'
-               << setw(12) << cf.ps_coeffs[i] << '\n';
+    the_stream << setw(12) << cf.offset_coeffs[i] << '\t' << setw(12)
+               << cf.ps_coeffs[i] << '\n';
   the_stream << "CF_COEFFS" << '\n';
   for (size_t i = 0; i < cf.cf_coeffs.size(); ++i)
-    the_stream << setw(12) << cf.cf_coeffs[i] << '\t'
-               << setw(12) << cf.ps_coeffs[i + offset] << '\n';
+    the_stream << setw(12) << cf.cf_coeffs[i] << '\t' << setw(12)
+               << cf.ps_coeffs[i + offset] << '\n';
   the_stream.flags(orig_flags);
   return the_stream;
 }
 
-
 // estimate yields by evaluating the CF at given points
 void
 ContinuedFraction::extrapolate_distinct(const double max_value,
@@ -304,7 +286,7 @@ ContinuedFraction::extrapolate_distinct(const double max_value,
   estimates.clear();
   estimates.push_back(0);
   for (double t = step_size; t <= max_value; t += step_size)
-    estimates.push_back(t*operator()(t));
+    estimates.push_back(t * operator()(t));
 }
 
 ////////////////////////////////////////////////////////////////////////
@@ -321,7 +303,6 @@ const double CFA::search_step_size = 0.05;
  */
 bool
 check_yield_estimates_stability(const vector<double> &estimates) {
-
   // require estimates are non-negative and finite
   for (size_t i = 0; i < estimates.size(); ++i)
     if (!std::isfinite(estimates[i]) || estimates[i] < 0.0)
@@ -334,7 +315,7 @@ check_yield_estimates_stability(const vector<double> &estimates) {
 
   // require negative second derivative
   for (size_t i = 2; i < estimates.size(); ++i)
-    if (estimates[i-1] - estimates[i-2] < estimates[i] - estimates[i-1])
+    if (estimates[i - 1] - estimates[i - 2] < estimates[i] - estimates[i - 1])
       return false;
 
   return !estimates.empty();
@@ -346,7 +327,6 @@ check_yield_estimates_stability(const vector<double> &estimates) {
  */
 ContinuedFraction
 CFA::optimal_cont_frac_distinct(const vector<double> &counts_hist) const {
-
   // we expect to use an underestimate, but this is dealt with outside
   // by ensuring we have an even number of max terms
 
@@ -355,7 +335,7 @@ CFA::optimal_cont_frac_distinct(const vector<double> &counts_hist) const {
 
   vector<double> ps_coeffs;
   for (size_t j = 1; j <= max_terms; j++)
-    ps_coeffs.push_back(counts_hist[j]*pow(-1.0, j + 1));
+    ps_coeffs.push_back(counts_hist[j] * pow(-1.0, j + 1));
 
   ContinuedFraction full_cf(ps_coeffs, diagonal_idx, max_terms);
 
@@ -367,13 +347,13 @@ CFA::optimal_cont_frac_distinct(const vector<double> &counts_hist) const {
       return full_cf;
   }
   else {
-
     // if max terms >= 7, start at 7 and check increasing cont frac's
     for (size_t i = 7 + (max_terms % 2 == 0); i <= max_terms; i += 2) {
       ContinuedFraction trunc_cf(full_cf);
       truncate_degree(i, trunc_cf);
       vector<double> estimates;
-      trunc_cf.extrapolate_distinct(search_max_val, search_step_size, estimates);
+      trunc_cf.extrapolate_distinct(search_max_val, search_step_size,
+                                    estimates);
       if (check_yield_estimates_stability(estimates))
         return trunc_cf;
     }
diff --git a/src/continued_fraction.hpp b/src/continued_fraction.hpp
index 33c4689..185b184 100644
--- a/src/continued_fraction.hpp
+++ b/src/continued_fraction.hpp
@@ -17,20 +17,20 @@
  *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef CONTINUED_FRACTION_HPP
-#define CONTINUED_FRACTION_HPP
+#ifndef SRC_CONTINUED_FRACTION_HPP_
+#define SRC_CONTINUED_FRACTION_HPP_
 
-#include <numeric>
-#include <vector>
+#include <cstddef>
 #include <fstream>
 #include <iomanip>
-#include <cstddef>
+#include <numeric>
+#include <vector>
 
 struct ContinuedFraction {
   // Constructors
   ContinuedFraction() : diagonal_idx(0), degree(0ul) {}
-  ContinuedFraction(const std::vector<double> &ps_cf,
-                    const int di, const size_t dg);
+  ContinuedFraction(const std::vector<double> &ps_cf, const int di,
+                    const size_t dg);
 
   // Evaluate the continued fraction
   double operator()(const double val) const;
@@ -40,12 +40,11 @@ struct ContinuedFraction {
 
   // Evaluate the continued fraction estimating distinct
   // along a curve from 0 to max_value
-  void
-  extrapolate_distinct(const double max_value, const double step_size,
-                       std::vector<double> &estimates) const;
+  void extrapolate_distinct(const double max_value, const double step_size,
+                            std::vector<double> &estimates) const;
 
-  bool is_valid() const {return !cf_coeffs.empty();}
-  size_t return_degree() const {return degree;}
+  bool is_valid() const { return !cf_coeffs.empty(); }
+  size_t return_degree() const { return degree; }
 
   std::vector<double> ps_coeffs;
   std::vector<double> cf_coeffs;
@@ -54,14 +53,15 @@ struct ContinuedFraction {
   size_t degree;
 };
 
-  // get continued fraction with lower degree
-void decrease_degree(const size_t decrement, ContinuedFraction &cf);
-void truncate_degree(const size_t truncated_degree, ContinuedFraction &cf);
+// get continued fraction with lower degree
+void
+decrease_degree(const size_t decrement, ContinuedFraction &cf);
+void
+truncate_degree(const size_t truncated_degree, ContinuedFraction &cf);
 
 std::ostream &
 operator<<(std::ostream &out, const ContinuedFraction &cf);
 
-
 class ContinuedFractionApproximation {
 public:
   ContinuedFractionApproximation(const int di, const size_t mt) :
@@ -71,12 +71,11 @@ class ContinuedFractionApproximation {
   ContinuedFraction
   optimal_cont_frac_distinct(const std::vector<double> &counts_hist) const;
 
-  int get_diagonal() const {return diagonal_idx;}
+  int get_diagonal() const { return diagonal_idx; }
 
 private:
-
-  int diagonal_idx; // the diagonal to work with for estimates
-  size_t max_terms; // the maximum number of terms to try for a CF
+  int diagonal_idx;  // the diagonal to work with for estimates
+  size_t max_terms;  // the maximum number of terms to try for a CF
 
   /* note: these never change */
   static const size_t min_allowed_degree;
@@ -84,12 +83,11 @@ class ContinuedFractionApproximation {
   // largest value to search for lowerbound and stability
   static const double search_max_val;
 
-  //step size for search of lowerbound and stability
+  // step size for search of lowerbound and stability
   static const double search_step_size;
-
 };
 
 bool
 check_yield_estimates_stability(const std::vector<double> &estimates);
 
-#endif
+#endif  // SRC_CONTINUED_FRACTION_HPP_
diff --git a/src/load_data_for_complexity.cpp b/src/load_data_for_complexity.cpp
index 7012cbf..1e05891 100644
--- a/src/load_data_for_complexity.cpp
+++ b/src/load_data_for_complexity.cpp
@@ -19,25 +19,32 @@
 
 #include "load_data_for_complexity.hpp"
 
+#include "GenomicRegion.hpp"
+#include "MappedRead.hpp"
+
+#include <unistd.h>
+
+#include <algorithm>  // std::min
+#include <cstddef>
+#include <cstdint>
 #include <queue>
+#include <random>
 #include <sstream>
-#include <unistd.h>
 #include <unordered_map>
-#include <random>
-
-#include "GenomicRegion.hpp"
-#include "MappedRead.hpp"
+#include <utility>  // std::swap
 
-using std::string;
-using std::vector;
-using std::priority_queue;
-using std::min;
+using std::cerr;
 using std::endl;
 using std::max;
-using std::cerr;
-using std::unordered_map;
-using std::runtime_error;
+using std::min;
 using std::mt19937;
+using std::priority_queue;
+using std::runtime_error;
+using std::size_t;
+using std::string;
+using std::uint64_t;
+using std::unordered_map;
+using std::vector;
 
 //////////////////////////////////////////////////////////////////////
 // Data imputation
@@ -47,7 +54,7 @@ static bool
 update_pe_duplicate_counts_hist(const GenomicRegion &curr_gr,
                                 const GenomicRegion &prev_gr,
                                 vector<double> &counts_hist,
-                                size_t &current_count){
+                                size_t &current_count) {
   // check if reads are sorted
   if (curr_gr.same_chrom(prev_gr) &&
       curr_gr.get_start() < prev_gr.get_start() &&
@@ -60,49 +67,42 @@ update_pe_duplicate_counts_hist(const GenomicRegion &curr_gr,
   if (!curr_gr.same_chrom(prev_gr) ||
       curr_gr.get_start() != prev_gr.get_start() ||
       curr_gr.get_end() != prev_gr.get_end()) {
-
     // histogram is too small, resize
     if (counts_hist.size() < current_count + 1)
       counts_hist.resize(current_count + 1, 0.0);
     ++counts_hist[current_count];
     current_count = 1;
   }
-  else // next read is same, update current_count
+  else  // next read is same, update current_count
     ++current_count;
 
   return true;
 }
 
-
 static void
 update_se_duplicate_counts_hist(const GenomicRegion &curr_gr,
                                 const GenomicRegion &prev_gr,
                                 const string input_file_name,
                                 vector<double> &counts_hist,
-                                size_t &current_count){
+                                size_t &current_count) {
   // check if reads are sorted
-  if (curr_gr.same_chrom(prev_gr) &&
-      curr_gr.get_start() < prev_gr.get_start())
-    throw runtime_error("locations unsorted in: "
-                        + input_file_name);
+  if (curr_gr.same_chrom(prev_gr) && curr_gr.get_start() < prev_gr.get_start())
+    throw runtime_error("locations unsorted in: " + input_file_name);
 
   if (!curr_gr.same_chrom(prev_gr) ||
-      curr_gr.get_start() != prev_gr.get_start())
+      curr_gr.get_start() != prev_gr.get_start()) {
     // next read is new, update counts_hist to include current_count
-    {
-      // histogram is too small, resize
-      if(counts_hist.size() < current_count + 1)
-        counts_hist.resize(current_count + 1, 0.0);
-      ++counts_hist[current_count];
-      current_count = 1;
-    }
-  else // next read is same, update current_count
+    // histogram is too small, resize
+    if (counts_hist.size() < current_count + 1)
+      counts_hist.resize(current_count + 1, 0.0);
+    ++counts_hist[current_count];
+    current_count = 1;
+  }
+  else  // next read is same, update current_count
     ++current_count;
 }
 
-
-
-/////comparison function for priority queue/////////////////
+// comparison function for priority queue
 
 /**************** FOR CLARITY BELOW WHEN COMPARING READS *************/
 static inline bool
@@ -123,47 +123,39 @@ end_greater(const GenomicRegion &a, const GenomicRegion &b) {
 }
 /******************************************************************************/
 
-
 struct GenomicRegionOrderChecker {
   bool operator()(const GenomicRegion &prev, const GenomicRegion &gr) const {
     return start_check(prev, gr);
   }
-  static bool
-  start_check(const GenomicRegion &prev, const GenomicRegion &gr) {
-    return (chrom_greater(prev, gr)
-            || (prev.same_chrom(gr) && start_greater(prev, gr))
-            || (prev.same_chrom(gr) && same_start(prev, gr)
-                && end_greater(prev, gr)));
+  static bool start_check(const GenomicRegion &prev, const GenomicRegion &gr) {
+    return (
+      chrom_greater(prev, gr) ||
+      (prev.same_chrom(gr) && start_greater(prev, gr)) ||
+      (prev.same_chrom(gr) && same_start(prev, gr) && end_greater(prev, gr)));
   }
 };
 
-
-typedef priority_queue<GenomicRegion,
-                       vector<GenomicRegion>,
-                       GenomicRegionOrderChecker> ReadPQ;
-
+typedef priority_queue<GenomicRegion, vector<GenomicRegion>,
+                       GenomicRegionOrderChecker>
+  ReadPQ;
 
 static bool
-is_ready_to_pop(const ReadPQ &pq,
-                const GenomicRegion &gr,
+is_ready_to_pop(const ReadPQ &pq, const GenomicRegion &gr,
                 const size_t max_width) {
   return !pq.top().same_chrom(gr) ||
-    pq.top().get_end() + max_width < gr.get_start();
+         pq.top().get_end() + max_width < gr.get_start();
 }
 
-
 static void
-empty_pq(GenomicRegion &curr_gr, GenomicRegion &prev_gr,
-         size_t &current_count, vector<double> &counts_hist,
-         ReadPQ &read_pq, const string &input_file_name) {
-
+empty_pq(GenomicRegion &curr_gr, GenomicRegion &prev_gr, size_t &current_count,
+         vector<double> &counts_hist, ReadPQ &read_pq,
+         const string &input_file_name) {
   curr_gr = read_pq.top();
   read_pq.pop();
 
   // update counts hist
-  const bool UPDATE_SUCCESS =
-    update_pe_duplicate_counts_hist(curr_gr, prev_gr, counts_hist,
-                                    current_count);
+  const bool UPDATE_SUCCESS = update_pe_duplicate_counts_hist(
+    curr_gr, prev_gr, counts_hist, current_count);
   if (!UPDATE_SUCCESS) {
     std::ostringstream oss;
     oss << "reads unsorted in: " << input_file_name << "\n"
@@ -175,7 +167,6 @@ empty_pq(GenomicRegion &curr_gr, GenomicRegion &prev_gr,
   prev_gr = curr_gr;
 }
 
-
 /*
  * This code is used to deal with read data in BAM format.
  */
@@ -184,13 +175,11 @@ empty_pq(GenomicRegion &curr_gr, GenomicRegion &prev_gr,
 #include "htslib_wrapper_deprecated.hpp"
 
 size_t
-load_counts_BAM_se(const string &input_file_name,
-                   vector<double> &counts_hist) {
+load_counts_BAM_se(const string &input_file_name, vector<double> &counts_hist) {
   const string mapper = "general";
   SAMReader_deprecated sam_reader(input_file_name, mapper);
-  if(!sam_reader)
-    throw runtime_error("problem opening input file "
-                        + input_file_name);
+  if (!sam_reader)
+    throw runtime_error("problem opening input file " + input_file_name);
 
   SAMRecord samr;
   sam_reader >> samr;
@@ -206,17 +195,13 @@ load_counts_BAM_se(const string &input_file_name,
   while (sam_reader >> samr) {
     // only convert mapped and primary reads
     if (samr.is_primary && samr.is_mapped) {
-
       // ignore unmapped reads & secondary alignments
       if (!(samr.is_mapping_paired) ||
-          (samr.is_mapping_paired && samr.is_Trich)){
-        //only count unpaired reads or the left mate of paired reads
-
+          (samr.is_mapping_paired && samr.is_Trich)) {
+        // only count unpaired reads or the left mate of paired reads
         curr_mr = samr.mr;
-        update_se_duplicate_counts_hist(curr_mr.r, prev_mr.r,
-                                        input_file_name,
-                                        counts_hist,
-                                        current_count);
+        update_se_duplicate_counts_hist(curr_mr.r, prev_mr.r, input_file_name,
+                                        counts_hist, current_count);
 
         // update number of reads and prev read
         ++n_reads;
@@ -239,7 +224,6 @@ static bool
 merge_mates(const size_t suffix_len, const size_t range,
             const GenomicRegion &one, const GenomicRegion &two,
             GenomicRegion &merged, int &len) {
-
   assert(one.same_chrom(two));
   const size_t read_start = min(one.get_start(), two.get_start());
   const size_t read_end = max(one.get_end(), two.get_end());
@@ -264,43 +248,38 @@ merge_mates(const size_t suffix_len, const size_t range,
 }
 
 inline static bool
-same_read(const size_t suffix_len,
-          const MappedRead &a, const MappedRead &b) {
+same_read(const size_t suffix_len, const MappedRead &a, const MappedRead &b) {
   const string sa(a.r.get_name());
   const string sb(b.r.get_name());
   bool SAME_NAME = false;
-  if(sa == sb)
+  if (sa == sb)
     SAME_NAME = true;
   return (SAME_NAME && a.r.same_chrom(b.r));
 }
 
 // return true if the genomic region is null
 static inline bool
-GenomicRegionIsNull(const GenomicRegion &gr){
+GenomicRegionIsNull(const GenomicRegion &gr) {
   GenomicRegion null_gr;
-  if(gr == null_gr)
+  if (gr == null_gr)
     return true;
 
   return false;
 }
 
-
 static void
 empty_pq(GenomicRegion &prev_gr,
-         priority_queue<GenomicRegion,
-         vector<GenomicRegion>,
-         GenomicRegionOrderChecker> &read_pq,
-         const string &input_file_name,
-         vector<double> &counts_hist,
+         priority_queue<GenomicRegion, vector<GenomicRegion>,
+                        GenomicRegionOrderChecker> &read_pq,
+         const string &input_file_name, vector<double> &counts_hist,
          size_t &current_count) {
-
   GenomicRegion curr_gr = read_pq.top();
   read_pq.pop();
 
   // check if reads are sorted
   if (curr_gr.same_chrom(prev_gr) &&
-      curr_gr.get_start() < prev_gr.get_start()
-      && curr_gr.get_end() < prev_gr.get_end()) {
+      curr_gr.get_start() < prev_gr.get_start() &&
+      curr_gr.get_end() < prev_gr.get_end()) {
     std::ostringstream oss;
     oss << "reads unsorted in: " << input_file_name << "\n"
         << "prev = \t" << prev_gr << "\n"
@@ -313,9 +292,8 @@ empty_pq(GenomicRegion &prev_gr,
     current_count = 1;
   else {
     std::ostringstream oss;
-    bool UPDATE_HIST =
-      update_pe_duplicate_counts_hist(curr_gr, prev_gr,
-                                      counts_hist, current_count);
+    bool UPDATE_HIST = update_pe_duplicate_counts_hist(
+      curr_gr, prev_gr, counts_hist, current_count);
     if (!UPDATE_HIST) {
       oss << "locations unsorted in: " << input_file_name << "\n"
           << "prev = \t" << prev_gr << "\n"
@@ -326,21 +304,16 @@ empty_pq(GenomicRegion &prev_gr,
   prev_gr = curr_gr;
 }
 
-
 size_t
-load_counts_BAM_pe(const bool VERBOSE,
-                   const string &input_file_name,
+load_counts_BAM_pe(const bool VERBOSE, const string &input_file_name,
                    const size_t MAX_SEGMENT_LENGTH,
-                   const size_t MAX_READS_TO_HOLD,
-                   size_t &n_paired,
-                   size_t &n_mates,
-                   vector<double> &counts_hist) {
-
+                   const size_t MAX_READS_TO_HOLD, size_t &n_paired,
+                   size_t &n_mates, vector<double> &counts_hist) {
   const string mapper = "general";
   SAMReader_deprecated sam_reader(input_file_name, mapper);
 
   // check sam_reader
-  if(!sam_reader)
+  if (!sam_reader)
     throw runtime_error("problem opening input file " + input_file_name);
 
   SAMRecord samr;
@@ -357,34 +330,31 @@ load_counts_BAM_pe(const bool VERBOSE,
   GenomicRegion prev_gr;
 
   std::priority_queue<GenomicRegion, vector<GenomicRegion>,
-                      GenomicRegionOrderChecker> read_pq;
+                      GenomicRegionOrderChecker>
+    read_pq;
 
   unordered_map<string, SAMRecord> dangling_mates;
 
   while (sam_reader >> samr) {
-
     // only convert mapped and primary reads
     if (samr.is_primary && samr.is_mapped) {
       ++n_mates;
 
       // deal with paired-end stuff
       if (samr.is_mapping_paired) {
-
         const size_t name_len = samr.mr.r.get_name().size() - suffix_len;
         const string read_name(samr.mr.r.get_name().substr(0, name_len));
 
         if (dangling_mates.find(read_name) != dangling_mates.end()) {
           // other end is in dangling mates, merge the two mates
-          if(same_read(suffix_len, samr.mr,
-                       dangling_mates[read_name].mr)) {
+          if (same_read(suffix_len, samr.mr, dangling_mates[read_name].mr)) {
             if (samr.is_Trich)
               std::swap(samr, dangling_mates[read_name]);
             GenomicRegion merged;
             int len = 0;
-            const bool MERGE_SUCCESS =
-              merge_mates(suffix_len, MAX_SEGMENT_LENGTH,
-                          dangling_mates[read_name].mr.r, samr.mr.r,
-                          merged, len);
+            const bool MERGE_SUCCESS = merge_mates(
+              suffix_len, MAX_SEGMENT_LENGTH, dangling_mates[read_name].mr.r,
+              samr.mr.r, merged, len);
             // merge success!
             if (MERGE_SUCCESS && len >= 0 &&
                 len <= static_cast<int>(MAX_SEGMENT_LENGTH)) {
@@ -394,8 +364,8 @@ load_counts_BAM_pe(const bool VERBOSE,
             else {
               // informative error message!
               if (VERBOSE) {
-                cerr << "problem merging read "
-                     << read_name << ", splitting read" << endl
+                cerr << "problem merging read " << read_name
+                     << ", splitting read" << endl
                      << samr.mr << "\t" << samr.is_mapping_paired << endl
                      << dangling_mates[read_name].mr << "\t"
                      << dangling_mates[read_name].is_mapping_paired << endl
@@ -415,7 +385,7 @@ load_counts_BAM_pe(const bool VERBOSE,
             n_unpaired += 2;
           }
         }
-        else // didn't find read in dangling_mates, store for later
+        else  // didn't find read in dangling_mates, store for later
           dangling_mates[read_name] = samr;
       }
       else {
@@ -423,36 +393,37 @@ load_counts_BAM_pe(const bool VERBOSE,
         ++n_unpaired;
       }
 
-
       // dangling mates is too large, flush dangling_mates of reads
       // on different chroms and too far away
       if (dangling_mates.size() > MAX_READS_TO_HOLD) {
         unordered_map<string, SAMRecord> tmp;
         for (unordered_map<string, SAMRecord>::iterator itr =
-               dangling_mates.begin(); itr != dangling_mates.end(); ++itr) {
-          if (itr->second.mr.r.get_chrom() != samr.mr.r.get_chrom()
-              || (itr->second.mr.r.get_chrom() == samr.mr.r.get_chrom()
-                  && itr->second.mr.r.get_end()
-                  + MAX_SEGMENT_LENGTH < samr.mr.r.get_start())) {
-            if(itr->second.seg_len >= 0) {
+               dangling_mates.begin();
+             itr != dangling_mates.end(); ++itr) {
+          if (itr->second.mr.r.get_chrom() != samr.mr.r.get_chrom() ||
+              (itr->second.mr.r.get_chrom() == samr.mr.r.get_chrom() &&
+               itr->second.mr.r.get_end() + MAX_SEGMENT_LENGTH <
+                 samr.mr.r.get_start())) {
+            if (itr->second.seg_len >= 0) {
               read_pq.push(itr->second.mr.r);
               ++n_unpaired;
             }
           }
-          else tmp[itr->first] = itr->second;
+          else
+            tmp[itr->first] = itr->second;
         }
         std::swap(tmp, dangling_mates);
         tmp.clear();
       }
 
-
       // now empty the priority queue
       if (!(read_pq.empty()) &&
           is_ready_to_pop(read_pq, samr.mr.r, MAX_SEGMENT_LENGTH)) {
-        //begin emptying priority queue
+        // begin emptying priority queue
         while (!(read_pq.empty()) &&
                is_ready_to_pop(read_pq, samr.mr.r, MAX_SEGMENT_LENGTH)) {
-          empty_pq(prev_gr, read_pq, input_file_name, counts_hist, current_count);
+          empty_pq(prev_gr, read_pq, input_file_name, counts_hist,
+                   current_count);
         }
       }
 
@@ -468,8 +439,8 @@ load_counts_BAM_pe(const bool VERBOSE,
     ++n_unpaired;
   }
 
-  //final iteration
-  while(!read_pq.empty())
+  // final iteration
+  while (!read_pq.empty())
     empty_pq(prev_gr, read_pq, input_file_name, counts_hist, current_count);
 
   if (counts_hist.size() < current_count + 1)
@@ -490,12 +461,10 @@ load_counts_BAM_pe(const bool VERBOSE,
 
 #endif
 
-
 /* this code is for BED file input */
 
 size_t
-load_counts_BED_se(const string input_file_name,
-                   vector<double> &counts_hist) {
+load_counts_BED_se(const string input_file_name, vector<double> &counts_hist) {
   // resize vals_hist
   counts_hist.clear();
   counts_hist.resize(2, 0.0);
@@ -518,40 +487,34 @@ load_counts_BED_se(const string input_file_name,
   }
 
   // to account for the last read compared to the one before it.
-  if(counts_hist.size() < current_count + 1)
+  if (counts_hist.size() < current_count + 1)
     counts_hist.resize(current_count + 1, 0.0);
   ++counts_hist[current_count];
 
   return n_reads;
 }
 
-
 size_t
-load_counts_BED_pe(const string input_file_name,
-                   vector<double> &counts_hist) {
-
+load_counts_BED_pe(const string input_file_name, vector<double> &counts_hist) {
   // resize vals_hist
   counts_hist.clear();
   counts_hist.resize(2, 0.0);
 
   std::ifstream in(input_file_name.c_str());
   if (!in)
-    throw runtime_error("problem opening file: "
-                        + input_file_name);
+    throw runtime_error("problem opening file: " + input_file_name);
 
   GenomicRegion curr_gr, prev_gr;
   if (!(in >> prev_gr))
-    throw runtime_error("problem opening file: "
-                        + input_file_name);
+    throw runtime_error("problem opening file: " + input_file_name);
 
   size_t n_reads = 1;
   size_t current_count = 1;
 
-  //read in file and compare each gr with the one before it
+  // read in file and compare each gr with the one before it
   while (in >> curr_gr) {
-    const bool UPDATE_SUCCESS =
-      update_pe_duplicate_counts_hist(curr_gr, prev_gr,
-                                      counts_hist, current_count);
+    const bool UPDATE_SUCCESS = update_pe_duplicate_counts_hist(
+      curr_gr, prev_gr, counts_hist, current_count);
     if (!UPDATE_SUCCESS)
       throw runtime_error("reads unsorted in " + input_file_name);
 
@@ -566,13 +529,11 @@ load_counts_BED_pe(const string input_file_name,
   ++counts_hist[current_count];
 
   return n_reads;
-
 }
 
 /* text file input */
 size_t
 load_counts(const string &input_file_name, vector<double> &counts_hist) {
-
   std::ifstream in(input_file_name);
   if (!in)
     throw runtime_error("problem opening file: " + input_file_name);
@@ -580,7 +541,6 @@ load_counts(const string &input_file_name, vector<double> &counts_hist) {
   size_t n_counts = 0;
   string buffer;
   while (getline(in, buffer)) {
-
     if (find(begin(buffer), end(buffer), '\r') != end(buffer))
       throw runtime_error("carriage returns in values file "
                           "(suggests dos or mac formatting)");
@@ -598,30 +558,27 @@ load_counts(const string &input_file_name, vector<double> &counts_hist) {
         n_counts += count;
       }
       else if (val != 0)
-        throw runtime_error("problem reading file at line "
-                            + toa(n_counts + 1));
+        throw runtime_error("problem reading file at line " +
+                            toa(n_counts + 1));
     }
     in.peek();
   }
   return n_counts;
 }
 
-
-//returns number of reads from file containing counts histogram
+// returns number of reads from file containing counts histogram
 size_t
 load_histogram(const string &filename, vector<double> &counts_hist) {
-
   counts_hist.clear();
 
   std::ifstream in(filename.c_str());
-  if (!in) //if file doesn't open
+  if (!in)  // if file doesn't open
     throw runtime_error("could not open histogram: " + filename);
 
   size_t n_reads = 0;
   size_t line_count = 0ul, prev_read_count = 0ul;
   string buffer;
   while (getline(in, buffer)) {
-
     if (find(begin(buffer), end(buffer), '\r') != end(buffer))
       throw runtime_error("carriage returns in histogram file "
                           "(suggests dos or mac formatting)");
@@ -643,76 +600,64 @@ load_histogram(const string &filename, vector<double> &counts_hist) {
     counts_hist[read_count] = frequency;
     if (read_count == 0ul) {
       throw runtime_error("counts histograms may not "
-                  "include an entry for zero");
+                          "include an entry for zero");
     }
     prev_read_count = read_count;
-    n_reads += static_cast<size_t>(read_count*frequency);
+    n_reads += static_cast<size_t>(read_count * frequency);
   }
 
   return n_reads;
 }
 
-
 /////////////////////////////////////////////////////////
 // Loading coverage counts
 ////////////////////////////////////////////////////////
 
-
 // probabilistically split genomic regions into mutiple
 // genomic regions of width equal to bin_size
 static void
-SplitGenomicRegion(const GenomicRegion &inputGR,
-                   mt19937 &generator, const size_t bin_size,
-                   vector<GenomicRegion> &outputGRs){
-
+SplitGenomicRegion(const GenomicRegion &inputGR, mt19937 &generator,
+                   const size_t bin_size, vector<GenomicRegion> &outputGRs) {
   outputGRs.clear();
   GenomicRegion gr(inputGR);
 
-  double frac = static_cast<double>(gr.get_start() % bin_size)/bin_size;
+  double frac = static_cast<double>(gr.get_start() % bin_size) / bin_size;
   const size_t width = gr.get_width();
 
   // ADS: this seems like a bunch of duplicated code just for a single
   // function difference
-  std::uniform_real_distribution<double> dist(0.0,1.0);
+  std::uniform_real_distribution<double> dist(0.0, 1.0);
   if (dist(generator) > frac) {
-    gr.set_start(std::floor(static_cast<double>(gr.get_start())/
-                            bin_size)*bin_size);
+    gr.set_start(std::floor(static_cast<double>(gr.get_start()) / bin_size) *
+                 bin_size);
     gr.set_end(gr.get_start() + width);
   }
   else {
-    gr.set_start(std::ceil(static_cast<double>(gr.get_start())/
-                           bin_size)*bin_size);
+    gr.set_start(std::ceil(static_cast<double>(gr.get_start()) / bin_size) *
+                 bin_size);
     gr.set_end(gr.get_start() + width);
   }
 
-  for(size_t i = 0; i < gr.get_width(); i += bin_size){
-
+  for (size_t i = 0; i < gr.get_width(); i += bin_size) {
     const size_t curr_start = gr.get_start() + i;
-    const size_t curr_end
-      = std::min(gr.get_end(), curr_start + bin_size);
-    frac = static_cast<double>(curr_end - curr_start)/bin_size;
+    const size_t curr_end = std::min(gr.get_end(), curr_start + bin_size);
+    frac = static_cast<double>(curr_end - curr_start) / bin_size;
 
-    if(dist(generator) <= frac){
-      GenomicRegion binned_gr(gr.get_chrom(), curr_start,
-                              curr_start + bin_size,
-                              gr.get_name(), gr.get_score(),
-                              gr.get_strand());
+    if (dist(generator) <= frac) {
+      GenomicRegion binned_gr(gr.get_chrom(), curr_start, curr_start + bin_size,
+                              gr.get_name(), gr.get_score(), gr.get_strand());
 
       outputGRs.push_back(binned_gr);
     }
   }
 }
 
-
 // split a mapped read into multiple genomic regions
 // based on the number of bases in each
 static void
-SplitMappedRead(const bool VERBOSE,
-                const MappedRead &inputMR,
-                mt19937 &generator,
-                const size_t bin_size,
-                vector<GenomicRegion> &outputGRs){
-
+SplitMappedRead(const bool VERBOSE, const MappedRead &inputMR,
+                mt19937 &generator, const size_t bin_size,
+                vector<GenomicRegion> &outputGRs) {
   outputGRs.clear();
 
   size_t covered_bases = 0;
@@ -727,14 +672,14 @@ SplitMappedRead(const bool VERBOSE,
     // if we reach the end of a bin, probabilistically create a binned read
     // with probability proportional to the number of covered bases
     if (read_iterator % bin_size == bin_size - 1) {
-      const double frac = static_cast<double>(covered_bases)/bin_size;
-      std::uniform_real_distribution<double> dist(0.0,1.0);
+      const double frac = static_cast<double>(covered_bases) / bin_size;
+      std::uniform_real_distribution<double> dist(0.0, 1.0);
       if (dist(generator) <= frac) {
         const size_t curr_start = read_iterator - (read_iterator % bin_size);
         const size_t curr_end = curr_start + bin_size;
-        const GenomicRegion binned_gr(inputMR.r.get_chrom(), curr_start, curr_end,
-                                      inputMR.r.get_name(), inputMR.r.get_score(),
-                                      inputMR.r.get_strand());
+        const GenomicRegion binned_gr(
+          inputMR.r.get_chrom(), curr_start, curr_end, inputMR.r.get_name(),
+          inputMR.r.get_score(), inputMR.r.get_strand());
         outputGRs.push_back(binned_gr);
       }
       total_covered_bases += covered_bases;
@@ -744,8 +689,8 @@ SplitMappedRead(const bool VERBOSE,
     read_iterator++;
   }
 
-  const double frac = static_cast<double>(covered_bases)/bin_size;
-  std::uniform_real_distribution<double> dist(0.0,1.0);
+  const double frac = static_cast<double>(covered_bases) / bin_size;
+  std::uniform_real_distribution<double> dist(0.0, 1.0);
   if (dist(generator) <= frac) {
     const size_t curr_start = read_iterator - (read_iterator % bin_size);
     const size_t curr_end = curr_start + bin_size;
@@ -756,17 +701,12 @@ SplitMappedRead(const bool VERBOSE,
   }
 }
 
-
 size_t
-load_coverage_counts_MR(const bool VERBOSE,
-                        const string input_file_name,
-      const unsigned long int seed,
-                        const size_t bin_size,
-                        const size_t max_width,
-                        vector<double> &coverage_hist) {
-
+load_coverage_counts_MR(const bool VERBOSE, const string input_file_name,
+                        const uint64_t seed, const size_t bin_size,
+                        const size_t max_width, vector<double> &coverage_hist) {
   srand(time(0) + getpid());
-  //Runif runif(rand());
+  // Runif runif(rand());
   std::mt19937 generator(seed);
 
   std::ifstream in(input_file_name.c_str());
@@ -786,10 +726,8 @@ load_coverage_counts_MR(const bool VERBOSE,
   size_t current_count = 1;
 
   do {
-
     if (mr.r.get_width() > max_width)
-      throw runtime_error("Encountered read of width " +
-                          toa(mr.r.get_width()) +
+      throw runtime_error("Encountered read of width " + toa(mr.r.get_width()) +
                           "max_width set too small");
 
     vector<GenomicRegion> splitGRs;
@@ -805,30 +743,24 @@ load_coverage_counts_MR(const bool VERBOSE,
     // remove Genomic Regions from the priority queue
     if (splitGRs.size() > 0)
       while (!PQ.empty() && is_ready_to_pop(PQ, splitGRs.back(), max_width))
-        empty_pq(curr_gr, prev_gr, current_count,
-                 coverage_hist, PQ, input_file_name);
-
-  }
-  while (in >> mr);
+        empty_pq(curr_gr, prev_gr, current_count, coverage_hist, PQ,
+                 input_file_name);
+  } while (in >> mr);
 
   // done adding reads, now spit the rest out
   while (!PQ.empty())
-    empty_pq(curr_gr, prev_gr, current_count,
-             coverage_hist, PQ, input_file_name);
+    empty_pq(curr_gr, prev_gr, current_count, coverage_hist, PQ,
+             input_file_name);
 
   return n_reads;
 }
 
-
 size_t
-load_coverage_counts_GR(const string input_file_name,
-      const unsigned long int seed,
-                        const size_t bin_size,
-                        const size_t max_width,
+load_coverage_counts_GR(const string input_file_name, const uint64_t seed,
+                        const size_t bin_size, const size_t max_width,
                         vector<double> &coverage_hist) {
-
   srand(time(0) + getpid());
-  //Runif runif(rand());
+  // Runif runif(rand());
   std::mt19937 generator(seed);
 
   std::ifstream in(input_file_name.c_str());
@@ -848,28 +780,26 @@ load_coverage_counts_GR(const string input_file_name,
   size_t current_count = 1;
 
   do {
-
     vector<GenomicRegion> splitGRs;
     SplitGenomicRegion(inputGR, generator, bin_size, splitGRs);
 
     // add split Genomic Regions to the priority queue
-    for(size_t i = 0; i < splitGRs.size(); i++)
+    for (size_t i = 0; i < splitGRs.size(); i++)
       PQ.push(splitGRs[i]);
 
     if (splitGRs.size() > 0) {
       // remove Genomic Regions from the priority queue
       while (!PQ.empty() && is_ready_to_pop(PQ, splitGRs.back(), max_width))
-        empty_pq(curr_gr, prev_gr, current_count,
-                 coverage_hist, PQ, input_file_name);
+        empty_pq(curr_gr, prev_gr, current_count, coverage_hist, PQ,
+                 input_file_name);
     }
     n_reads++;
-  }
-  while (in >> inputGR);
+  } while (in >> inputGR);
 
   // done adding reads, now spit the rest out
   while (!PQ.empty())
-    empty_pq(curr_gr, prev_gr, current_count,
-             coverage_hist, PQ, input_file_name);
+    empty_pq(curr_gr, prev_gr, current_count, coverage_hist, PQ,
+             input_file_name);
 
   return n_reads;
 }
diff --git a/src/load_data_for_complexity.hpp b/src/load_data_for_complexity.hpp
index c6e9616..1dc70fb 100644
--- a/src/load_data_for_complexity.hpp
+++ b/src/load_data_for_complexity.hpp
@@ -17,58 +17,51 @@
  *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef LOAD_DATA_FOR_COMPLEXITY_HPP
-#define LOAD_DATA_FOR_COMPLEXITY_HPP
+#ifndef SRC_LOAD_DATA_FOR_COMPLEXITY_HPP_
+#define SRC_LOAD_DATA_FOR_COMPLEXITY_HPP_
 
+#include <cstddef>
+#include <cstdint>
 #include <string>
 #include <vector>
-#include <cstddef>
 
-size_t
-load_coverage_counts_MR(const bool VERBOSE,
-                        const std::string input_file_name,
-			const unsigned long int seed,
-                        const size_t bin_size,
-                        const size_t max_width,
+std::size_t
+load_coverage_counts_MR(const bool VERBOSE, const std::string input_file_name,
+                        const std::uint64_t seed, const std::size_t bin_size,
+                        const std::size_t max_width,
                         std::vector<double> &coverage_hist);
 
-
-size_t
+std::size_t
 load_coverage_counts_GR(const std::string input_file_name,
-			const unsigned long int seed,
-                        const size_t bin_size,
-                        const size_t max_width,
+                        const std::uint64_t seed, const std::size_t bin_size,
+                        const std::size_t max_width,
                         std::vector<double> &coverage_hist);
 
-
-size_t
+std::size_t
 load_histogram(const std::string &filename, std::vector<double> &counts_hist);
 
-size_t
-load_counts(const std::string &input_file_name, std::vector<double> &counts_hist);
+std::size_t
+load_counts(const std::string &input_file_name,
+            std::vector<double> &counts_hist);
 
-size_t
+std::size_t
 load_counts_BED_pe(const std::string input_file_name,
                    std::vector<double> &counts_hist);
 
-size_t
+std::size_t
 load_counts_BED_se(const std::string input_file_name,
                    std::vector<double> &counts_hist);
 
 #ifdef HAVE_HTSLIB
-size_t
-load_counts_BAM_pe(const bool VERBOSE,
-                   const std::string &input_file_name,
-                   const size_t MAX_SEGMENT_LENGTH,
-                   const size_t MAX_READS_TO_HOLD,
-                   size_t &n_paired,
-                   size_t &n_mates,
-                   std::vector<double> &counts_hist);
+std::size_t
+load_counts_BAM_pe(const bool VERBOSE, const std::string &input_file_name,
+                   const std::size_t MAX_SEGMENT_LENGTH,
+                   const std::size_t MAX_READS_TO_HOLD, std::size_t &n_paired,
+                   std::size_t &n_mates, std::vector<double> &counts_hist);
 
-size_t
+std::size_t
 load_counts_BAM_se(const std::string &input_file_name,
                    std::vector<double> &counts_hist);
-#endif // HAVE_HTSLIB
-
+#endif  // HAVE_HTSLIB
 
-#endif
+#endif  // SRC_LOAD_DATA_FOR_COMPLEXITY_HPP_
diff --git a/src/moment_sequence.cpp b/src/moment_sequence.cpp
index 9f8934e..f10e67c 100644
--- a/src/moment_sequence.cpp
+++ b/src/moment_sequence.cpp
@@ -20,85 +20,86 @@
 
 #include "moment_sequence.hpp"
 
-#include <fstream>
+#include <algorithm>
+#include <cassert>
 #include <cmath>
-#include <numeric>
-#include <vector>
+#include <fstream>
 #include <iomanip>
 #include <iostream>
-#include <cassert>
-#include <algorithm>
+#include <numeric>
+#include <string>
+#include <utility>  // std::swap
+#include <vector>
 
-using std::string;
-using std::vector;
+using std::cerr;
 using std::endl;
+using std::find_if;
+using std::isfinite;
+using std::isinf;
 using std::max;
-using std::cerr;
 using std::setprecision;
+using std::string;
 using std::swap;
-using std::find_if;
 using std::transform;
-using std::isfinite;
-using std::isinf;
+using std::vector;
 
-void 
-LU_decomp(vector<vector<double> > &A, vector<int> &P) {
- 
-    const size_t N = A.size();
-    double absA;
-    size_t i, j, k;
-
-    P.clear();
-    for (size_t x = 0; x <= N; x++)
-        P.push_back(x);
-
-    for (i = 0; i < N; i++) {
-        double maxA = 0.0;
-        size_t imax = i;
-
-        for (k = i; k < N; k++)
-            if ((absA = fabs(A[k][i])) > maxA) {
-                maxA = absA;
-                imax = k;
-            }
-
-        if (imax != i) {
-            //pivoting P
-            size_t j = P[i];
-            P[i] = P[imax];
-            P[imax] = j;
-
-            //pivoting rows of A
-            vector<double> ptr(A[i]);
-            A[i] = A[imax];
-            A[imax] = ptr;
-
-            //counting pivots starting from N (for determinant)
-            P[N]++;
-        }
-
-        for (j = i + 1; j < N; j++) {
-            A[j][i] /= A[i][i];
-
-            for (k = i + 1; k < N; k++)
-                A[j][k] -= A[j][i] * A[i][k];
-        }
+void
+LU_decomp(vector<vector<double>> &A, vector<int> &P) {
+  const size_t N = A.size();
+  double absA;
+  size_t i, j, k;
+
+  P.clear();
+  for (size_t x = 0; x <= N; x++)
+    P.push_back(x);
+
+  for (i = 0; i < N; i++) {
+    double maxA = 0.0;
+    size_t imax = i;
+
+    for (k = i; k < N; k++)
+      if ((absA = fabs(A[k][i])) > maxA) {
+        maxA = absA;
+        imax = k;
+      }
+
+    if (imax != i) {
+      // pivoting P
+      size_t j = P[i];
+      P[i] = P[imax];
+      P[imax] = j;
+
+      // pivoting rows of A
+      vector<double> ptr(A[i]);
+      A[i] = A[imax];
+      A[imax] = ptr;
+
+      // counting pivots starting from N (for determinant)
+      P[N]++;
     }
-}
 
-double 
-LU_determinant(vector<vector<double> > &A, vector<int> &P) {
+    for (j = i + 1; j < N; j++) {
+      A[j][i] /= A[i][i];
 
-    const size_t N = A.size();
+      for (k = i + 1; k < N; k++)
+        A[j][k] -= A[j][i] * A[i][k];
+    }
+  }
+}
 
-    double det = A[0][0];
+double
+LU_determinant(vector<vector<double>> &A, vector<int> &P) {
+  const size_t N = A.size();
 
-    for (size_t i = 1; i < N; ++i)
-        det *= A[i][i];
+  double det = A[0][0];
 
-    if ((P[N] - N) % 2 == 0) return det;
+  for (size_t i = 1; i < N; ++i)
+    det *= A[i][i];
 
-    return -det;
+  if ((P[N] - N) % 2 == 0)
+    return det;
+
+  return -det;
 }
 
 /////////////////////////////////////////////////////
@@ -106,45 +107,43 @@ LU_determinant(vector<vector<double> > &A, vector<int> &P) {
 // ensure moment sequence is positive definite
 // truncate moment sequence to ensure pos def
 size_t
-ensure_pos_def_mom_seq(vector <double> &moments,
-                       const double tolerance,
+ensure_pos_def_mom_seq(vector<double> &moments, const double tolerance,
                        const bool VERBOSE) {
-
   const size_t min_hankel_dim = 1;
   size_t hankel_dim = 2;
-  if (moments.size() < 2*hankel_dim) {
+  if (moments.size() < 2 * hankel_dim) {
     if (VERBOSE)
       cerr << "too few moments" << endl;
     return min_hankel_dim;
   }
 
   bool ACCEPT_HANKEL = true;
-  while (ACCEPT_HANKEL && (2*hankel_dim - 1 < moments.size())) {
-
-    vector<vector<double> > hankel_mat(hankel_dim, vector<double>(hankel_dim, 0.0));
+  while (ACCEPT_HANKEL && (2 * hankel_dim - 1 < moments.size())) {
+    vector<vector<double>> hankel_mat(hankel_dim,
+                                      vector<double>(hankel_dim, 0.0));
     for (size_t c_idx = 0; c_idx < hankel_dim; c_idx++)
       for (size_t r_idx = 0; r_idx < hankel_dim; r_idx++)
-	hankel_mat[c_idx][r_idx] = moments[c_idx + r_idx];
+        hankel_mat[c_idx][r_idx] = moments[c_idx + r_idx];
 
     vector<int> perm;
     LU_decomp(hankel_mat, perm);
     const double hankel_mat_det = LU_determinant(hankel_mat, perm);
 
-    vector<vector<double> > shift_hankel_matrix(hankel_dim, vector<double>(hankel_dim, 0.0));
+    vector<vector<double>> shift_hankel_matrix(hankel_dim,
+                                               vector<double>(hankel_dim, 0.0));
     for (size_t c_idx = 0; c_idx < hankel_dim; c_idx++)
       for (size_t r_idx = 0; r_idx < hankel_dim; r_idx++)
-	shift_hankel_matrix[c_idx][r_idx] = moments[c_idx + r_idx + 1];
+        shift_hankel_matrix[c_idx][r_idx] = moments[c_idx + r_idx + 1];
 
     vector<int> s_perm;
     LU_decomp(shift_hankel_matrix, s_perm);
-    const double shift_hankel_mat_det = LU_determinant(shift_hankel_matrix, s_perm);
+    const double shift_hankel_mat_det =
+      LU_determinant(shift_hankel_matrix, s_perm);
 
     if (VERBOSE) {
-      cerr << "dim" << '\t'
-           << "hankel_det" << '\t'
-           << "shifted_hankel_det" << endl;
-      cerr << hankel_dim << '\t'
-           << hankel_mat_det << '\t'
+      cerr << "dim" << '\t' << "hankel_det" << '\t' << "shifted_hankel_det"
+           << endl;
+      cerr << hankel_dim << '\t' << hankel_mat_det << '\t'
            << shift_hankel_mat_det << endl;
     }
 
@@ -155,7 +154,7 @@ ensure_pos_def_mom_seq(vector <double> &moments,
     else {
       ACCEPT_HANKEL = false;
       hankel_dim--;
-      moments.resize(2*hankel_dim);
+      moments.resize(2 * hankel_dim);
       return hankel_dim;
     }
   }
@@ -170,7 +169,6 @@ ensure_pos_def_mom_seq(vector <double> &moments,
 // truncate if non-positive element found
 static void
 check_three_term_relation(vector<double> &a, vector<double> &b) {
-
   // abort if first entry is zero or smaller
   if (a[0] <= 0.0) {
     a.clear();
@@ -179,8 +177,8 @@ check_three_term_relation(vector<double> &a, vector<double> &b) {
 
   for (size_t i = 0; i < b.size(); i++)
     // ADS: some strange logic here
-    if (b[i] <= 0.0 || !isfinite(b[i]) ||
-        a[i + 1] <= 0.0 || !isfinite(a[i + 1])) {
+    if (b[i] <= 0.0 || !isfinite(b[i]) || a[i + 1] <= 0.0 ||
+        !isfinite(a[i + 1])) {
       b.resize(i);
       a.resize(i + 1);
       break;
@@ -191,7 +189,6 @@ check_three_term_relation(vector<double> &a, vector<double> &b) {
 // truncate at first non-positive element if found
 static void
 check_moment_sequence(vector<double> &obs_moms) {
-
   if (obs_moms[0] <= 0.0 || !isfinite(obs_moms[0]))
     obs_moms.clear();
 
@@ -203,30 +200,29 @@ check_moment_sequence(vector<double> &obs_moms) {
   }
 }
 
-
 void
 MomentSequence::unmodified_Chebyshev(const bool VERBOSE) {
-
-  const size_t n_points = static_cast<size_t>(floor(moments.size()/2));
+  const size_t n_points = static_cast<size_t>(floor(moments.size() / 2));
   vector<double> a(n_points, 0.0);
   vector<double> b(n_points - 1, 0.0);
 
-  vector< vector<double> > sigma(2*n_points, vector<double>(2*n_points, 0.0));
+  vector<vector<double>> sigma(2 * n_points, vector<double>(2 * n_points, 0.0));
   // initialization
-  a[0] = moments[1]/moments[0];
+  a[0] = moments[1] / moments[0];
   // sigma[-1][l] = 0
-  for (size_t l = 0; l < 2*n_points; l++)
+  for (size_t l = 0; l < 2 * n_points; l++)
     sigma[0][l] = moments[l];
 
   for (size_t k = 1; k <= n_points; k++) {
-    for (size_t l = k; l < 2*n_points - k; l++) {
-      sigma[k][l] = sigma[k-1][l+1] - a[k-1]*sigma[k-1][l];
+    for (size_t l = k; l < 2 * n_points - k; l++) {
+      sigma[k][l] = sigma[k - 1][l + 1] - a[k - 1] * sigma[k - 1][l];
       if (k > 1)
-        sigma[k][l] -= b[k-2]*sigma[k-2][l];
+        sigma[k][l] -= b[k - 2] * sigma[k - 2][l];
     }
     if (k != n_points) {
-      a[k] = sigma[k][k+1]/sigma[k][k] - sigma[k-1][k]/sigma[k-1][k-1];
-      b[k-1] = sigma[k][k]/sigma[k-1][k-1];
+      a[k] =
+        sigma[k][k + 1] / sigma[k][k] - sigma[k - 1][k] / sigma[k - 1][k - 1];
+      b[k - 1] = sigma[k][k] / sigma[k - 1][k - 1];
     }
   }
 
@@ -239,27 +235,27 @@ void
 MomentSequence::full_3term_recurrence(const bool VERBOSE,
                                       vector<double> &full_alpha,
                                       vector<double> &full_beta) {
-
-  const size_t n_points = static_cast<size_t>(floor(moments.size()/2));
+  const size_t n_points = static_cast<size_t>(floor(moments.size() / 2));
   vector<double> a(n_points, 0.0);
   vector<double> b(n_points - 1, 0.0);
 
-  vector< vector<double> > sigma(2*n_points, vector<double>(2*n_points, 0.0));
+  vector<vector<double>> sigma(2 * n_points, vector<double>(2 * n_points, 0.0));
   // initialization
-  a[0] = moments[1]/moments[0];
+  a[0] = moments[1] / moments[0];
   // sigma[-1][l] = 0
-  for (size_t l = 0; l < 2*n_points; l++)
+  for (size_t l = 0; l < 2 * n_points; l++)
     sigma[0][l] = moments[l];
 
   for (size_t k = 1; k <= n_points; k++) {
-    for (size_t l = k; l < 2*n_points - k; l++) {
-      sigma[k][l] = sigma[k-1][l+1] - a[k-1]*sigma[k-1][l];
+    for (size_t l = k; l < 2 * n_points - k; l++) {
+      sigma[k][l] = sigma[k - 1][l + 1] - a[k - 1] * sigma[k - 1][l];
       if (k > 1)
-        sigma[k][l] -= b[k-2]*sigma[k-2][l];
+        sigma[k][l] -= b[k - 2] * sigma[k - 2][l];
     }
     if (k != n_points) {
-      a[k] = sigma[k][k+1]/sigma[k][k] - sigma[k-1][k]/sigma[k-1][k-1];
-      b[k-1] = sigma[k][k]/sigma[k-1][k-1];
+      a[k] =
+        sigma[k][k + 1] / sigma[k][k] - sigma[k - 1][k] / sigma[k - 1][k - 1];
+      b[k - 1] = sigma[k][k] / sigma[k - 1][k - 1];
     }
   }
 
@@ -267,7 +263,6 @@ MomentSequence::full_3term_recurrence(const bool VERBOSE,
   full_beta.swap(b);
 }
 
-
 ////////////////////////////////////////////////////
 // Constructor
 
@@ -282,7 +277,6 @@ MomentSequence::MomentSequence(const vector<double> &obs_moms) :
   unmodified_Chebyshev(false);
 }
 
-
 /////////////////////////////////////////////////////
 // Quadrature Methods
 
@@ -293,7 +287,6 @@ MomentSequence::MomentSequence(const vector<double> &obs_moms) :
 static void
 QRiteration(vector<double> &alpha, vector<double> &beta,
             vector<double> &weights) {
-
   // initialize variables
   vector<double> sin_theta(alpha.size(), 0.0);
   vector<double> cos_theta(alpha.size(), 0.0);
@@ -316,39 +309,38 @@ QRiteration(vector<double> &alpha, vector<double> &beta,
   z_bar[0] = z[0];
 
   for (size_t j = 0; j < alpha.size() - 1; j++) {
-
     // for d and b_bar, j here is j-1 in G&W
     if (d[j] == 0.0 && b_bar[j] == 0.0) {
       sin_theta[j] = 0.0;
       cos_theta[j] = 1.0;
     }
     else {
-      sin_theta[j] = d[j]/sqrt(d[j]*d[j] + b_bar[j]*b_bar[j]);
-      cos_theta[j] = b_bar[j]/sqrt(d[j]*d[j] + b_bar[j]*b_bar[j]);
+      sin_theta[j] = d[j] / sqrt(d[j] * d[j] + b_bar[j] * b_bar[j]);
+      cos_theta[j] = b_bar[j] / sqrt(d[j] * d[j] + b_bar[j] * b_bar[j]);
     }
 
-    a[j] = (a_bar[j]*cos_theta[j]*cos_theta[j] +
-            2*b_tilde[j]*cos_theta[j]*sin_theta[j] +
-            alpha[j+1]*sin_theta[j]*sin_theta[j]);
+    a[j] = (a_bar[j] * cos_theta[j] * cos_theta[j] +
+            2 * b_tilde[j] * cos_theta[j] * sin_theta[j] +
+            alpha[j + 1] * sin_theta[j] * sin_theta[j]);
 
-    a_bar[j+1] = (a_bar[j]*sin_theta[j]*sin_theta[j] -
-                  2*b_tilde[j]*cos_theta[j]*sin_theta[j] +
-                  alpha[j+1]*cos_theta[j]*cos_theta[j]);
+    a_bar[j + 1] = (a_bar[j] * sin_theta[j] * sin_theta[j] -
+                    2 * b_tilde[j] * cos_theta[j] * sin_theta[j] +
+                    alpha[j + 1] * cos_theta[j] * cos_theta[j]);
 
     if (j != 0)
-      b[j-1] = sqrt(d[j]*d[j] + b_bar[j]*b_bar[j]);
+      b[j - 1] = sqrt(d[j] * d[j] + b_bar[j] * b_bar[j]);
 
-    b_bar[j+1] =
-      ((a_bar[j] - alpha[j+1])*sin_theta[j]*cos_theta[j] +
-       b_tilde[j]*(sin_theta[j]*sin_theta[j] - cos_theta[j]*cos_theta[j]));
+    b_bar[j + 1] = ((a_bar[j] - alpha[j + 1]) * sin_theta[j] * cos_theta[j] +
+                    b_tilde[j] * (sin_theta[j] * sin_theta[j] -
+                                  cos_theta[j] * cos_theta[j]));
 
-    b_tilde[j+1] = -beta[j+1]*cos_theta[j];
+    b_tilde[j + 1] = -beta[j + 1] * cos_theta[j];
 
-    d[j+1] = beta[j+1]*sin_theta[j];
+    d[j + 1] = beta[j + 1] * sin_theta[j];
 
-    z[j] = z_bar[j]*cos_theta[j] + weights[j+1]*sin_theta[j];
+    z[j] = z_bar[j] * cos_theta[j] + weights[j + 1] * sin_theta[j];
 
-    z_bar[j+1] = z_bar[j]*sin_theta[j] - weights[j+1]*cos_theta[j];
+    z_bar[j + 1] = z_bar[j] * sin_theta[j] - weights[j + 1] * cos_theta[j];
   }
 
   // last entries set equal to final "holding" values
@@ -361,22 +353,18 @@ QRiteration(vector<double> &alpha, vector<double> &beta,
   swap(weights, z);
 }
 
-
 static bool
 check_positivity(const vector<double> &v) {
   return find_if(begin(v), end(v),
-                 [](const double x) {return x <= 0.0 || isinf(x);}) == end(v);
+                 [](const double x) { return x <= 0.0 || isinf(x); }) == end(v);
 }
 
-
 bool
 MomentSequence::Lower_quadrature_rules(const bool VERBOSE,
-                                       const size_t n_points,
-                                       const double tol,
+                                       const size_t n_points, const double tol,
                                        const size_t max_iter,
                                        vector<double> &points,
                                        vector<double> &weights) {
-
   // make sure that points.size() will be less than n_points
   vector<double> a(alpha);
   a.resize((n_points < alpha.size()) ? n_points : alpha.size());
@@ -420,7 +408,7 @@ MomentSequence::Lower_quadrature_rules(const bool VERBOSE,
 
   // square entries in the weights vector
   transform(begin(weights), end(weights), begin(weights),
-            [](const double x) {return x*x;});
+            [](const double x) { return x * x; });
 
   return points_are_positive;
 }
diff --git a/src/moment_sequence.hpp b/src/moment_sequence.hpp
index 0f7ccbb..1898d74 100644
--- a/src/moment_sequence.hpp
+++ b/src/moment_sequence.hpp
@@ -17,47 +17,40 @@
  *    along with this program.  If not, see <http://www.gnu.org/licenses/>.
  */
 
-#ifndef MOMENT_SEQUENCE_HPP
-#define MOMENT_SEQUENCE_HPP
+#ifndef SRC_MOMENT_SEQUENCE_HPP_
+#define SRC_MOMENT_SEQUENCE_HPP_
 
-#include <vector>
-#include <numeric>
 #include <cstddef>
+#include <numeric>
+#include <vector>
 
 // test Hankel moment matrix to ensure the moment sequence
 // is positive definite
-size_t ensure_pos_def_mom_seq(std::vector<double> &moments,
-			      const double tolerance,
-			      const bool VERBOSE);
+std::size_t
+ensure_pos_def_mom_seq(std::vector<double> &moments, const double tolerance,
+                       const bool VERBOSE);
 
 struct MomentSequence {
-
-  // Constructors
   MomentSequence() {}
-  MomentSequence(const std::vector<double> &obs_moms);
+  explicit MomentSequence(const std::vector<double> &obs_moms);
 
-  MomentSequence(const std::vector<double> &a,
-		 const std::vector<double> &b):
+  MomentSequence(const std::vector<double> &a, const std::vector<double> &b) :
     alpha(a), beta(b) {};
 
-
-
   // Estimate 3-term recurrence
   // these will be removed from the header when they are tested
   void unmodified_Chebyshev(const bool VERBOSE);
 
   void full_3term_recurrence(const bool VERBOSE,
-			     std::vector<double> &full_alpha,
-			     std::vector<double> &full_beta);
-
-  // quadrature rules using QR on Jacobi matrix 
-  bool Lower_quadrature_rules(const bool VERBOSE,
-				 const size_t n_points,
-				 const double tolerance, 
-				 const size_t max_iter,
-				 std::vector<double> &points,
-				 std::vector<double> &weights);
+                             std::vector<double> &full_alpha,
+                             std::vector<double> &full_beta);
 
+  // quadrature rules using QR on Jacobi matrix
+  bool Lower_quadrature_rules(const bool VERBOSE, const std::size_t n_points,
+                              const double tolerance,
+                              const std::size_t max_iter,
+                              std::vector<double> &points,
+                              std::vector<double> &weights);
 
   std::vector<double> moments;
   // 3-term recurrence
@@ -65,5 +58,4 @@ struct MomentSequence {
   std::vector<double> beta;
 };
 
-
-#endif
+#endif  // SRC_MOMENT_SEQUENCE_HPP_
diff --git a/src/preseq.cpp b/src/preseq.cpp
index 349a3fa..4611a50 100644
--- a/src/preseq.cpp
+++ b/src/preseq.cpp
@@ -20,86 +20,92 @@
  *    along with this program. If not, see <http://www.gnu.org/licenses/>.
  */
 
-#include <numeric>
-#include <vector>
-#include <iomanip>
+#include <config.h>
+
+#include "continued_fraction.hpp"
+#include "load_data_for_complexity.hpp"
+#include "moment_sequence.hpp"
+
+#include <GenomicRegion.hpp>
+#include <OptionParser.hpp>
+#include <smithlab_os.hpp>
+#include <smithlab_utils.hpp>
+
 #include <sys/types.h>
 #include <unistd.h>
 
-#include <cstring>
-#include <unordered_map>
 #include <cmath>
+#include <cstddef>
+#include <cstdint>
+#include <cstring>
 #include <fstream>
+#include <iomanip>
 #include <iostream>
+#include <numeric>
 #include <random>
-#include <cstddef>
-
-#include <OptionParser.hpp>
-#include <smithlab_utils.hpp>
-#include <GenomicRegion.hpp>
-#include <smithlab_os.hpp>
-
-#include "continued_fraction.hpp"
-#include "load_data_for_complexity.hpp"
-#include "moment_sequence.hpp"
+#include <unordered_map>
+#include <vector>
 
-using std::string;
-using std::min;
-using std::vector;
-using std::endl;
 using std::cerr;
+using std::endl;
 using std::isfinite;
-
-using std::setprecision;
-using std::unordered_map;
+using std::max;
+using std::min;
+using std::mt19937;
 using std::runtime_error;
+using std::setprecision;
+using std::string;
 using std::to_string;
-using std::mt19937;
-using std::max;
-
-static const string preseq_version = "3.2.0";
+using std::uint64_t;
+using std::unordered_map;
+using std::vector;
 
-template <typename T> T
+template <typename T>
+T
 get_counts_from_hist(const vector<T> &h) {
   T c = 0.0;
   for (size_t i = 0; i < h.size(); ++i)
-    c += i*h[i];
+    c += i * h[i];
   return c;
 }
 
-template<typename T> T
-median_from_sorted_vector (const vector<T> sorted_data,
-                           const size_t stride, const size_t n) {
-
-  if (n == 0 || sorted_data.empty()) return 0.0;
+template <typename T>
+T
+median_from_sorted_vector(const vector<T> sorted_data, const size_t stride,
+                          const size_t n) {
+  if (n == 0 || sorted_data.empty())
+    return 0.0;
 
   const size_t lhs = (n - 1) / 2;
   const size_t rhs = n / 2;
 
-  if (lhs == rhs) return sorted_data[lhs * stride];
+  if (lhs == rhs)
+    return sorted_data[lhs * stride];
 
   return (sorted_data[lhs * stride] + sorted_data[rhs * stride]) / 2.0;
 }
 
-template<typename T> T
-quantile_from_sorted_vector (const vector<T> sorted_data,
-                             const size_t stride, const size_t n,
-                             const double f) {
+template <typename T>
+T
+quantile_from_sorted_vector(const vector<T> sorted_data, const size_t stride,
+                            const size_t n, const double f) {
   const double index = f * (n - 1);
-  const size_t lhs = (int)index;
+  const size_t lhs = static_cast<int>(index);
   const double delta = index - lhs;
 
-  if (n == 0 || sorted_data.empty()) return 0.0;
+  if (n == 0 || sorted_data.empty())
+    return 0.0;
 
-  if (lhs == n - 1) return sorted_data[lhs * stride];
+  if (lhs == n - 1)
+    return sorted_data[lhs * stride];
 
-  return (1 - delta) * sorted_data[lhs * stride]
-           + delta * sorted_data[(lhs + 1) * stride];
+  return (1 - delta) * sorted_data[lhs * stride] +
+         delta * sorted_data[(lhs + 1) * stride];
 }
 
 // Confidence interval stuff
 static void
-median_and_ci(vector<double> estimates, // by val so we can sort them
+median_and_ci(vector<double> estimates,  // by val so we can sort them
               const double ci_level, double &median_estimate,
               double &lower_ci_estimate, double &upper_ci_estimate) {
   assert(!estimates.empty());
@@ -110,17 +116,16 @@ median_and_ci(vector<double> estimates, // by val so we can sort them
   const size_t N = estimates.size();
 
   median_estimate = median_from_sorted_vector(estimates, 1, N);
-  lower_ci_estimate = quantile_from_sorted_vector(estimates, 1, N, alpha/2);
-  upper_ci_estimate = quantile_from_sorted_vector(estimates, 1, N, 1.0 - alpha/2);
+  lower_ci_estimate = quantile_from_sorted_vector(estimates, 1, N, alpha / 2);
+  upper_ci_estimate =
+    quantile_from_sorted_vector(estimates, 1, N, 1.0 - alpha / 2);
 }
 
 static void
-vector_median_and_ci(const vector<vector<double> > &bootstrap_estimates,
-                     const double ci_level,
-                     vector<double> &yield_estimates,
+vector_median_and_ci(const vector<vector<double>> &bootstrap_estimates,
+                     const double ci_level, vector<double> &yield_estimates,
                      vector<double> &lower_ci_lognorm,
                      vector<double> &upper_ci_lognorm) {
-
   yield_estimates.clear();
   lower_ci_lognorm.clear();
   upper_ci_lognorm.clear();
@@ -129,14 +134,13 @@ vector_median_and_ci(const vector<vector<double> > &bootstrap_estimates,
   const size_t n_est = bootstrap_estimates.size();
   vector<double> estimates_row(n_est, 0.0);
   for (size_t i = 0; i < bootstrap_estimates[0].size(); i++) {
-
     // estimates is in wrong order, work locally on const val
     for (size_t k = 0; k < n_est; ++k)
       estimates_row[k] = bootstrap_estimates[k][i];
 
     double median_estimate, lower_ci_estimate, upper_ci_estimate;
-    median_and_ci(estimates_row, ci_level, median_estimate,
-                  lower_ci_estimate, upper_ci_estimate);
+    median_and_ci(estimates_row, ci_level, median_estimate, lower_ci_estimate,
+                  upper_ci_estimate);
     sort(begin(estimates_row), end(estimates_row));
 
     yield_estimates.push_back(median_estimate);
@@ -147,10 +151,9 @@ vector_median_and_ci(const vector<vector<double> > &bootstrap_estimates,
 
 template <typename uint_type>
 void
-multinomial(mt19937 &gen, const vector<double> &mult_probs,
-            uint_type trials, vector<uint_type> &result) {
-
-  typedef std::binomial_distribution<unsigned int> binom_dist;
+multinomial(mt19937 &gen, const vector<double> &mult_probs, uint_type trials,
+            vector<uint_type> &result) {
+  typedef std::binomial_distribution<uint32_t> binom_dist;
 
   result.clear();
   result.resize(mult_probs.size());
@@ -160,12 +163,11 @@ multinomial(mt19937 &gen, const vector<double> &mult_probs,
   auto r(begin(result));
   auto p(begin(mult_probs));
 
-  while (p != end(mult_probs)) { // iterate to sample for each category
+  while (p != end(mult_probs)) {  // iterate to sample for each category
+    *r = binom_dist(trials, (*p) / remaining_prob)(gen);  // take the sample
 
-    *r = binom_dist(trials, (*p)/remaining_prob)(gen); // take the sample
-
-    remaining_prob -= *p++; // update remaining probability mass
-    trials -= *r++;         // update remaining trials needed
+    remaining_prob -= *p++;  // update remaining probability mass
+    trials -= *r++;          // update remaining trials needed
   }
 
   if (trials > 0)
@@ -175,8 +177,7 @@ multinomial(mt19937 &gen, const vector<double> &mult_probs,
 // Lanczos approximation for gamma function for x >= 0.5 - essentially an
 // approximation for (x-1)!
 static double
-factorial (double x) {
-
+factorial(double x) {
   // constants
   double LogRootTwoPi = 0.9189385332046727;
   double Euler = 2.71828182845904523536028747135;
@@ -184,21 +185,19 @@ factorial (double x) {
   // Approximation for factorial is actually x-1
   x -= 1.0;
 
-  vector<double> lanczos {
-                          0.99999999999980993227684700473478,
-                          676.520368121885098567009190444019,
-                          -1259.13921672240287047156078755283,
-                          771.3234287776530788486528258894,
-                          -176.61502916214059906584551354,
-                          12.507343278686904814458936853,
-                          -0.13857109526572011689554707,
-                          9.984369578019570859563e-6,
-                          1.50563273514931155834e-7
-  };
+  vector<double> lanczos{0.99999999999980993227684700473478,
+                         676.520368121885098567009190444019,
+                         -1259.13921672240287047156078755283,
+                         771.3234287776530788486528258894,
+                         -176.61502916214059906584551354,
+                         12.507343278686904814458936853,
+                         -0.13857109526572011689554707,
+                         9.984369578019570859563e-6,
+                         1.50563273514931155834e-7};
 
   double Ag = lanczos[0];
 
-  for (size_t k=1; k < lanczos.size(); k++)
+  for (size_t k = 1; k < lanczos.size(); k++)
     Ag += lanczos[k] / (x + k);
 
   double term1 = (x + 0.5) * log((x + 7.5) / Euler);
@@ -219,15 +218,13 @@ void
 resample_hist(mt19937 &gen, const vector<size_t> &vals_hist_distinct_counts,
               const vector<double> &distinct_counts_hist,
               vector<double> &out_hist) {
-
   const size_t hist_size = distinct_counts_hist.size();
-  vector<unsigned int> sample_distinct_counts_hist(hist_size, 0);
+  vector<uint32_t> sample_distinct_counts_hist(hist_size, 0);
 
-  unsigned int distinct =
+  const uint32_t distinct =
     accumulate(begin(distinct_counts_hist), end(distinct_counts_hist), 0.0);
 
-  multinomial(gen, distinct_counts_hist, distinct,
-              sample_distinct_counts_hist);
+  multinomial(gen, distinct_counts_hist, distinct, sample_distinct_counts_hist);
 
   out_hist.clear();
   out_hist.resize(vals_hist_distinct_counts.back() + 1, 0.0);
@@ -241,10 +238,10 @@ resample_hist(mt19937 &gen, const vector<size_t> &vals_hist_distinct_counts,
 // N total sample size; S the total number of distincts
 // n sub sample size
 static double
-interpolate_distinct(const vector<double> &hist, const size_t N,
-                     const size_t S, const size_t n) {
-
-  const double denom = factorial(N + 1) - factorial(n + 1) - factorial(N - n + 1);
+interpolate_distinct(const vector<double> &hist, const size_t N, const size_t S,
+                     const size_t n) {
+  const double denom =
+    factorial(N + 1) - factorial(n + 1) - factorial(N - n + 1);
 
   vector<double> numer(hist.size(), 0);
   for (size_t i = 1; i < hist.size(); i++) {
@@ -253,46 +250,39 @@ interpolate_distinct(const vector<double> &hist, const size_t N,
       numer[i] = 0;
     }
     else {
-
-      const double x = (factorial(N - i + 1) - factorial(n + 1) -
-                        factorial(N - i - n + 1));
-      numer[i] = exp(x - denom)*hist[i];
+      const double x =
+        (factorial(N - i + 1) - factorial(n + 1) - factorial(N - i - n + 1));
+      numer[i] = exp(x - denom) * hist[i];
     }
   }
   return S - accumulate(begin(numer), end(numer), 0);
 }
 
-
 static void
 extrapolate_curve(const ContinuedFraction &the_cf,
-                  const double initial_distinct,
-                  const double vals_sum,
-                  const double initial_sample_size,
-                  const double step_size,
-                  const double max_sample_size,
-                  vector<double> &estimates) {
-
+                  const double initial_distinct, const double vals_sum,
+                  const double initial_sample_size, const double step_size,
+                  const double max_sample_size, vector<double> &estimates) {
   double curr_samp_sz = initial_sample_size;
   while (curr_samp_sz < max_sample_size) {
-    const double fold = (curr_samp_sz - vals_sum)/vals_sum;
+    const double fold = (curr_samp_sz - vals_sum) / vals_sum;
     assert(fold >= 0.0);
-    estimates.push_back(initial_distinct + fold*the_cf(fold));
+    estimates.push_back(initial_distinct + fold * the_cf(fold));
     curr_samp_sz += step_size;
   }
 }
 
 void
 extrap_bootstrap(const bool VERBOSE, const bool allow_defects,
-                 const unsigned long int seed,
-                 const vector<double> &orig_hist,
+                 const uint64_t seed, const vector<double> &orig_hist,
                  const size_t n_bootstraps, const size_t orig_max_terms,
                  const int diagonal, const double bin_step_size,
                  const double max_extrap, const size_t max_iter,
-                 vector<vector<double> > &bootstrap_estimates) {
+                 vector<vector<double>> &bootstrap_estimates) {
   // clear returning vectors
   bootstrap_estimates.clear();
 
-  //setup rng
+  // setup rng
   srand(time(0) + getpid());
   mt19937 rng(seed);
 
@@ -308,10 +298,10 @@ extrap_bootstrap(const bool VERBOSE, const bool allow_defects,
       distinct_orig_hist.push_back(orig_hist[i]);
     }
 
-  for (size_t iter = 0; (iter < max_iter && bootstrap_estimates.size() < n_bootstraps); ++iter) {
-
+  for (size_t iter = 0;
+       (iter < max_iter && bootstrap_estimates.size() < n_bootstraps); ++iter) {
     if (VERBOSE && iter > 0 && iter % 72 == 0)
-      cerr << endl; // bootstrap success progress only 72 char wide
+      cerr << endl;  // bootstrap success progress only 72 char wide
 
     vector<double> yield_vector;
     vector<double> hist;
@@ -327,8 +317,8 @@ extrap_bootstrap(const bool VERBOSE, const bool allow_defects,
     const size_t distinct = accumulate(begin(hist), end(hist), 0.0);
     size_t curr_sample_sz = bin_step_size;
     while (curr_sample_sz < sample_vals_sum) {
-      yield_vector.push_back(interpolate_distinct(hist, sample_vals_sum,
-                                                  distinct, curr_sample_sz));
+      yield_vector.push_back(
+        interpolate_distinct(hist, sample_vals_sum, distinct, curr_sample_sz));
       curr_sample_sz += bin_step_size;
     }
 
@@ -345,32 +335,30 @@ extrap_bootstrap(const bool VERBOSE, const bool allow_defects,
     bool successful_bootstrap = false;
     // defect mode, simple extrapolation
     if (allow_defects) {
-
       vector<double> ps_coeffs;
       for (size_t j = 1; j <= max_terms; j++)
-        ps_coeffs.push_back(hist[j]*std::pow(-1.0, j + 1));
+        ps_coeffs.push_back(hist[j] * std::pow(-1.0, j + 1));
 
       const ContinuedFraction defect_cf(ps_coeffs, diagonal, max_terms);
 
       extrapolate_curve(defect_cf, initial_distinct, sample_vals_sum,
-                        curr_sample_sz, bin_step_size,
-                        max_extrap, yield_vector);
+                        curr_sample_sz, bin_step_size, max_extrap,
+                        yield_vector);
       // no checking of curve in defect mode
       bootstrap_estimates.push_back(yield_vector);
       successful_bootstrap = true;
     }
     else {
-
       // refit curve for lower bound
       const ContinuedFractionApproximation lower_cfa(diagonal, max_terms);
-      const ContinuedFraction lower_cf(lower_cfa.optimal_cont_frac_distinct(hist));
+      const ContinuedFraction lower_cf(
+        lower_cfa.optimal_cont_frac_distinct(hist));
 
       // extrapolate the curve start
       if (lower_cf.is_valid()) {
-
         extrapolate_curve(lower_cf, initial_distinct, sample_vals_sum,
-                          curr_sample_sz, bin_step_size,
-                          max_extrap, yield_vector);
+                          curr_sample_sz, bin_step_size, max_extrap,
+                          yield_vector);
         // sanity check
         if (check_yield_estimates_stability(yield_vector)) {
           bootstrap_estimates.push_back(yield_vector);
@@ -390,12 +378,10 @@ extrap_bootstrap(const bool VERBOSE, const bool allow_defects,
 
 static bool
 extrap_single_estimate(const bool VERBOSE, const bool allow_defects,
-                       vector<double> &hist,
-                       size_t max_terms, const int diagonal,
-                       const double step_size,
+                       vector<double> &hist, size_t max_terms,
+                       const int diagonal, const double step_size,
                        const double max_extrap,
                        vector<double> &yield_estimate) {
-
   yield_estimate.clear();
 
   const double vals_sum = get_counts_from_hist(hist);
@@ -406,8 +392,8 @@ extrap_single_estimate(const bool VERBOSE, const bool allow_defects,
   size_t step = static_cast<size_t>(step_size);
   size_t sample = static_cast<size_t>(step_size);
   for (; sample < upper_limit; sample += step)
-    yield_estimate.push_back(interpolate_distinct(hist, upper_limit,
-                                                  initial_distinct, sample));
+    yield_estimate.push_back(
+      interpolate_distinct(hist, upper_limit, initial_distinct, sample));
 
   // ENSURE THAT THE MAX TERMS ARE ACCEPTABLE
   size_t first_zero = 1;
@@ -422,29 +408,28 @@ extrap_single_estimate(const bool VERBOSE, const bool allow_defects,
   max_terms = max_terms - (max_terms % 2 == 1);
 
   if (allow_defects) {
-
     vector<double> ps_coeffs;
     for (size_t j = 1; j <= max_terms; j++)
-      ps_coeffs.push_back(hist[j]*std::pow(-1.0, j + 1));
+      ps_coeffs.push_back(hist[j] * std::pow(-1.0, j + 1));
 
     const ContinuedFraction defect_cf(ps_coeffs, diagonal, max_terms);
 
-    extrapolate_curve(defect_cf, initial_distinct, vals_sum,
-                      sample, step_size, max_extrap, yield_estimate);
+    extrapolate_curve(defect_cf, initial_distinct, vals_sum, sample, step_size,
+                      max_extrap, yield_estimate);
 
     if (VERBOSE)
       cerr << defect_cf << endl;
     // NO FAIL! defect mode doesn't care about failure
   }
   else {
-
     const ContinuedFractionApproximation lower_cfa(diagonal, max_terms);
-    const ContinuedFraction lower_cf(lower_cfa.optimal_cont_frac_distinct(hist));
+    const ContinuedFraction lower_cf(
+      lower_cfa.optimal_cont_frac_distinct(hist));
 
     // extrapolate curve
     if (lower_cf.is_valid()) {
-      extrapolate_curve(lower_cf, initial_distinct, vals_sum,
-                        sample, step_size, max_extrap, yield_estimate);
+      extrapolate_curve(lower_cf, initial_distinct, vals_sum, sample, step_size,
+                        max_extrap, yield_estimate);
     }
     else {
       // FAIL!
@@ -459,24 +444,23 @@ extrap_single_estimate(const bool VERBOSE, const bool allow_defects,
   return true;
 }
 
-
 static double
 GoodToulmin2xExtrap(const vector<double> &counts_hist) {
   double two_fold_extrap = 0.0;
   for (size_t i = 0; i < counts_hist.size(); i++)
-    two_fold_extrap += pow(-1.0, i + 1)*counts_hist[i];
+    two_fold_extrap += pow(-1.0, i + 1) * counts_hist[i];
   return two_fold_extrap;
 }
 
-
 static void
-write_predicted_complexity_curve(const string &outfile,
-                                 const double c_level, const double step_size,
+write_predicted_complexity_curve(const string &outfile, const double c_level,
+                                 const double step_size,
                                  const vector<double> &yield_estimates,
                                  const vector<double> &yield_lower_ci_lognorm,
                                  const vector<double> &yield_upper_ci_lognorm) {
   std::ofstream of;
-  if (!outfile.empty()) of.open(outfile.c_str());
+  if (!outfile.empty())
+    of.open(outfile.c_str());
   std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
   out << "TOTAL_READS\tEXPECTED_DISTINCT\t"
@@ -488,47 +472,41 @@ write_predicted_complexity_curve(const string &outfile,
 
   out << 0 << '\t' << 0 << '\t' << 0 << '\t' << 0 << endl;
   for (size_t i = 0; i < yield_estimates.size(); ++i)
-    out << (i + 1)*step_size << '\t'
-        << yield_estimates[i] << '\t'
-        << yield_lower_ci_lognorm[i] << '\t'
-        << yield_upper_ci_lognorm[i] << endl;
+    out << (i + 1) * step_size << '\t' << yield_estimates[i] << '\t'
+        << yield_lower_ci_lognorm[i] << '\t' << yield_upper_ci_lognorm[i]
+        << endl;
 }
 
-
 // ADS: functions same, header different (above and this one)
 static void
-write_predicted_coverage_curve(const string &outfile,
-                               const double c_level,
+write_predicted_coverage_curve(const string &outfile, const double c_level,
                                const double base_step_size,
                                const size_t bin_size,
                                const vector<double> &cvrg_estimates,
                                const vector<double> &cvrg_lower_ci_lognorm,
                                const vector<double> &cvrg_upper_ci_lognorm) {
   std::ofstream of;
-  if (!outfile.empty()) of.open(outfile.c_str());
+  if (!outfile.empty())
+    of.open(outfile.c_str());
   std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
   out << "TOTAL_BASES\tEXPECTED_COVERED_BASES\t"
-      << "LOWER_" << 100*c_level << "%CI\t"
-      << "UPPER_" << 100*c_level << "%CI" << endl;
+      << "LOWER_" << 100 * c_level << "%CI\t"
+      << "UPPER_" << 100 * c_level << "%CI" << endl;
 
   out.setf(std::ios_base::fixed, std::ios_base::floatfield);
   out.precision(1);
 
   out << 0 << '\t' << 0 << '\t' << 0 << '\t' << 0 << endl;
   for (size_t i = 0; i < cvrg_estimates.size(); ++i)
-    out << (i + 1)*base_step_size << '\t'
-        << cvrg_estimates[i]*bin_size << '\t'
-        << cvrg_lower_ci_lognorm[i]*bin_size << '\t'
-        << cvrg_upper_ci_lognorm[i]*bin_size << endl;
+    out << (i + 1) * base_step_size << '\t' << cvrg_estimates[i] * bin_size
+        << '\t' << cvrg_lower_ci_lognorm[i] * bin_size << '\t'
+        << cvrg_upper_ci_lognorm[i] * bin_size << endl;
 }
 
-
 static int
 lc_extrap(const int argc, const char **argv) {
-
   try {
-
     static const size_t min_required_counts = 4;
     static const string min_required_counts_error_message =
       "max count before zero is less than min required count (" +
@@ -542,7 +520,7 @@ lc_extrap(const int argc, const char **argv) {
     size_t n_bootstraps = 100;
     int diagonal = 0;
     double c_level = 0.95;
-    unsigned long int seed = 408;
+    uint64_t seed = 408;
 
     /* FLAGS */
     bool VERBOSE = false;
@@ -557,49 +535,57 @@ lc_extrap(const int argc, const char **argv) {
     size_t MAX_SEGMENT_LENGTH = 5000;
 #endif
 
-    string description = "Extrapolate the complexity of a library. This is the approach   \
-        described in Daley & Smith (2013). The method applies rational   \
-        function approximation via continued fractions with the          \
-        original goal of estimating the number of distinct reads that a  \
-        sequencing library would yield upon deeper sequencing. This      \
-        method has been used for many different purposes since then.";
+    string description =
+      R"(Extrapolate the complexity of a library. This is the approach   \
+described in Daley & Smith (2013). The method applies rational  \
+function approximation via continued fractions with the         \
+original goal of estimating the number of distinct reads that a \
+sequencing library would yield upon deeper sequencing. This     \
+method has been used for many different purposes since then.
+)";
 
     /********** GET COMMAND LINE ARGUMENTS  FOR LC EXTRAP ***********/
 
     OptionParser opt_parse(strip_path(argv[1]), description, "<input-file>");
     opt_parse.add_opt("output", 'o', "yield output file (default: stdout)",
-                      false , outfile);
-    opt_parse.add_opt("extrap",'e',"maximum extrapolation", false, max_extrap);
-    opt_parse.add_opt("step",'s',"extrapolation step size", false, step_size);
-    opt_parse.add_opt("boots",'n',"number of bootstraps", false, n_bootstraps);
-    opt_parse.add_opt("cval", 'c', "level for confidence intervals", false, c_level);
-    opt_parse.add_opt("terms",'x',"maximum terms in estimator", false, orig_max_terms);
+                      false, outfile);
+    opt_parse.add_opt("extrap", 'e', "maximum extrapolation", false,
+                      max_extrap);
+    opt_parse.add_opt("step", 's', "extrapolation step size", false, step_size);
+    opt_parse.add_opt("boots", 'n', "number of bootstraps", false,
+                      n_bootstraps);
+    opt_parse.add_opt("cval", 'c', "level for confidence intervals", false,
+                      c_level);
+    opt_parse.add_opt("terms", 'x', "maximum terms in estimator", false,
+                      orig_max_terms);
     opt_parse.add_opt("verbose", 'v', "print more info", false, VERBOSE);
 #ifdef HAVE_HTSLIB
-    opt_parse.add_opt("bam", 'B', "input is in BAM format",
-                      false, BAM_FORMAT_INPUT);
-    opt_parse.add_opt("seg_len", 'l', "maximum segment length when merging "
+    opt_parse.add_opt("bam", 'B', "input is in BAM format", false,
+                      BAM_FORMAT_INPUT);
+    opt_parse.add_opt("seg_len", 'l',
+                      "maximum segment length when merging "
                       "paired end bam reads",
                       false, MAX_SEGMENT_LENGTH);
 #endif
-    opt_parse.add_opt("pe", 'P', "input is paired end read file",
-                      false, PAIRED_END);
-    opt_parse.add_opt("vals", 'V',
-                      "input is a text file containing only the observed counts",
-                      false, VALS_INPUT);
+    opt_parse.add_opt("pe", 'P', "input is paired end read file", false,
+                      PAIRED_END);
+    opt_parse.add_opt(
+      "vals", 'V', "input is a text file containing only the observed counts",
+      false, VALS_INPUT);
     opt_parse.add_opt("hist", 'H',
                       "input is a text file containing the observed histogram",
                       false, HIST_INPUT);
     opt_parse.add_opt("quick", 'Q',
                       "quick mode (no bootstraps) for confidence intervals",
                       false, SINGLE_ESTIMATE);
-    opt_parse.add_opt("defects", 'D', "no testing for defects", false, allow_defects);
-    opt_parse.add_opt("seed", 'r', "seed for random number generator",
-                      false, seed);
+    opt_parse.add_opt("defects", 'D', "no testing for defects", false,
+                      allow_defects);
+    opt_parse.add_opt("seed", 'r', "seed for random number generator", false,
+                      seed);
     opt_parse.set_show_defaults();
     vector<string> leftover_args;
     // ADS: suspect bug below; "-about" isn't working.
-    opt_parse.parse(argc-1, argv+1, leftover_args);
+    opt_parse.parse(argc - 1, argv + 1, leftover_args);
     if (argc == 2 || opt_parse.help_requested()) {
       cerr << opt_parse.help_message() << endl;
       return EXIT_SUCCESS;
@@ -640,10 +626,9 @@ lc_extrap(const int argc, const char **argv) {
       const size_t MAX_READS_TO_HOLD = 5000000;
       size_t n_paired = 0;
       size_t n_mates = 0;
-      n_reads = load_counts_BAM_pe(VERBOSE, input_file_name,
-                                   MAX_SEGMENT_LENGTH,
-                                   MAX_READS_TO_HOLD, n_paired,
-                                   n_mates, counts_hist);
+      n_reads =
+        load_counts_BAM_pe(VERBOSE, input_file_name, MAX_SEGMENT_LENGTH,
+                           MAX_READS_TO_HOLD, n_paired, n_mates, counts_hist);
       if (VERBOSE) {
         cerr << "MERGED PAIRED END READS = " << n_paired << endl;
         cerr << "MATES PROCESSED = " << n_mates << endl;
@@ -660,7 +645,7 @@ lc_extrap(const int argc, const char **argv) {
         cerr << "PAIRED_END_BED_INPUT" << endl;
       n_reads = load_counts_BED_pe(input_file_name, counts_hist);
     }
-    else { // default is single end bed file
+    else {  // default is single end bed file
       if (VERBOSE)
         cerr << "BED_INPUT" << endl;
       n_reads = load_counts_BED_se(input_file_name, counts_hist);
@@ -681,7 +666,7 @@ lc_extrap(const int argc, const char **argv) {
 
     const size_t distinct_counts =
       std::count_if(begin(counts_hist), end(counts_hist),
-                    [](const double x) {return x > 0.0;});
+                    [](const double x) { return x > 0.0; });
 
     if (VERBOSE)
       cerr << "TOTAL READS     = " << n_reads << endl
@@ -708,7 +693,7 @@ lc_extrap(const int argc, const char **argv) {
 
     // const size_t total_reads = get_counts_from_hist(counts_hist);
 
-    //assert(total_reads == n_reads); // ADS: why commented out?
+    // assert(total_reads == n_reads); // ADS: why commented out?
 
     // check that min required count is satisfied
     if (orig_max_terms < min_required_counts)
@@ -719,17 +704,17 @@ lc_extrap(const int argc, const char **argv) {
     vector<double> yield_estimates;
 
     if (SINGLE_ESTIMATE) {
-
-      const bool single_estimate_success =
-        extrap_single_estimate(VERBOSE, allow_defects, counts_hist, orig_max_terms,
-                               diagonal, step_size, max_extrap, yield_estimates);
+      const bool single_estimate_success = extrap_single_estimate(
+        VERBOSE, allow_defects, counts_hist, orig_max_terms, diagonal,
+        step_size, max_extrap, yield_estimates);
       // IF FAILURE, EXIT
       if (!single_estimate_success)
         throw runtime_error("single estimate failed, run "
                             "full mode for estimates");
 
       std::ofstream of;
-      if (!outfile.empty()) of.open(outfile.c_str());
+      if (!outfile.empty())
+        of.open(outfile.c_str());
       std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
       out << "TOTAL_READS\tEXPECTED_DISTINCT" << endl;
@@ -738,15 +723,15 @@ lc_extrap(const int argc, const char **argv) {
 
       out << 0 << '\t' << 0 << endl;
       for (size_t i = 0; i < yield_estimates.size(); ++i)
-        out << (i + 1)*step_size << '\t' << yield_estimates[i] << endl;
+        out << (i + 1) * step_size << '\t' << yield_estimates[i] << endl;
     }
     else {
       if (VERBOSE)
         cerr << "[BOOTSTRAPPING HISTOGRAM]" << endl;
 
-      const size_t max_iter = 100*n_bootstraps;
+      const size_t max_iter = 100 * n_bootstraps;
 
-      vector<vector <double> > bootstrap_estimates;
+      vector<vector<double>> bootstrap_estimates;
       extrap_bootstrap(VERBOSE, allow_defects, seed, counts_hist, n_bootstraps,
                        orig_max_terms, diagonal, step_size, max_extrap,
                        max_iter, bootstrap_estimates);
@@ -786,9 +771,7 @@ lc_extrap(const int argc, const char **argv) {
 
 static int
 gc_extrap(const int argc, const char **argv) {
-
   try {
-
     const size_t MIN_REQUIRED_COUNTS = 4;
 
     int diagonal = 0;
@@ -801,58 +784,57 @@ gc_extrap(const int argc, const char **argv) {
     bool SINGLE_ESTIMATE = false;
     double max_extrap = 1.0e12;
     size_t n_bootstraps = 100;
-    unsigned long int seed = 408;
+    uint64_t seed = 408;
     bool allow_defects = false;
 
     bool NO_SEQUENCE = false;
     double c_level = 0.95;
 
-    const string description =
+    const string description = R"(
       "Extrapolate the size of the covered genome by mapped reads. This \
       approach is described in Daley & Smith (2014). The method is the  \
       same as for lc_extrap: using rational function approximation to   \
       a power-series expansion for the number of \"unobserved\" bases   \
       in the initial sample. The gc_extrap method is adapted to deal    \
-      with individual nucleotides rather than distinct reads.";
+      with individual nucleotides rather than distinct reads.)";
 
     // ********* GET COMMAND LINE ARGUMENTS  FOR GC EXTRAP **********
-    OptionParser opt_parse(strip_path(argv[1]), description,
-                           "<input-file>");
-    opt_parse.add_opt("output", 'o', "coverage yield output file (default: stdout)",
-                      false , outfile);
-    opt_parse.add_opt("max_width", 'w', "max fragment length, "
+    OptionParser opt_parse(strip_path(argv[1]), description, "<input-file>");
+    opt_parse.add_opt("output", 'o',
+                      "coverage yield output file (default: stdout)", false,
+                      outfile);
+    opt_parse.add_opt("max_width", 'w',
+                      "max fragment length, "
                       "set equal to read length for single end reads",
                       false, max_width);
-    opt_parse.add_opt("bin_size", 'b', "bin size",
-                      false, bin_size);
-    opt_parse.add_opt("extrap",'e',"maximum extrapolation in base pairs",
+    opt_parse.add_opt("bin_size", 'b', "bin size", false, bin_size);
+    opt_parse.add_opt("extrap", 'e', "maximum extrapolation in base pairs",
                       false, max_extrap);
-    opt_parse.add_opt("step",'s',"step size in bases between extrapolations",
+    opt_parse.add_opt("step", 's', "step size in bases between extrapolations",
                       false, base_step_size);
-    opt_parse.add_opt("bootstraps",'n',"number of bootstraps",
-                      false, n_bootstraps);
-    opt_parse.add_opt("cval", 'c', "level for confidence intervals",
-                      false, c_level);
-    opt_parse.add_opt("terms",'x',"maximum number of terms",
-                      false, orig_max_terms);
-    opt_parse.add_opt("verbose", 'v', "print more information",
-                      false, VERBOSE);
+    opt_parse.add_opt("bootstraps", 'n', "number of bootstraps", false,
+                      n_bootstraps);
+    opt_parse.add_opt("cval", 'c', "level for confidence intervals", false,
+                      c_level);
+    opt_parse.add_opt("terms", 'x', "maximum number of terms", false,
+                      orig_max_terms);
+    opt_parse.add_opt("verbose", 'v', "print more information", false, VERBOSE);
     opt_parse.add_opt("bed", 'B',
                       "input is in bed format without sequence information",
                       false, NO_SEQUENCE);
-    opt_parse.add_opt("quick",'Q',
+    opt_parse.add_opt("quick", 'Q',
                       "quick mode: run gc_extrap without "
                       "bootstrapping for confidence intervals",
                       false, SINGLE_ESTIMATE);
     opt_parse.add_opt("defects", 'D',
                       "defects mode to extrapolate without testing for defects",
                       false, allow_defects);
-    opt_parse.add_opt("seed", 'r', "seed for random number generator",
-                      false, seed);
+    opt_parse.add_opt("seed", 'r', "seed for random number generator", false,
+                      seed);
     opt_parse.set_show_defaults();
 
     vector<string> leftover_args;
-    opt_parse.parse(argc-1, argv+1, leftover_args);
+    opt_parse.parse(argc - 1, argv + 1, leftover_args);
     if (argc == 2 || opt_parse.help_requested()) {
       cerr << opt_parse.help_message() << endl;
       return EXIT_SUCCESS;
@@ -886,8 +868,8 @@ gc_extrap(const int argc, const char **argv) {
     else {
       if (VERBOSE)
         cerr << "MAPPED READ FORMAT" << endl;
-      n_reads = load_coverage_counts_MR(VERBOSE, input_file_name, seed, bin_size,
-                                        max_width, coverage_hist);
+      n_reads = load_coverage_counts_MR(VERBOSE, input_file_name, seed,
+                                        bin_size, max_width, coverage_hist);
     }
 
     const double total_bins = get_counts_from_hist(coverage_hist);
@@ -895,8 +877,8 @@ gc_extrap(const int argc, const char **argv) {
     const double distinct_bins =
       accumulate(coverage_hist.begin(), coverage_hist.end(), 0.0);
 
-    const double avg_bins_per_read = total_bins/n_reads;
-    double bin_step_size = base_step_size/bin_size;
+    const double avg_bins_per_read = total_bins / n_reads;
+    double bin_step_size = base_step_size / bin_size;
 
     const size_t max_observed_count = coverage_hist.size() - 1;
 
@@ -914,8 +896,8 @@ gc_extrap(const int argc, const char **argv) {
            << "TOTAL BINS          = " << total_bins << endl
            << "BINS PER READ       = " << avg_bins_per_read << endl
            << "DISTINCT BINS       = " << distinct_bins << endl
-           << "TOTAL BASES         = " << total_bins*bin_size << endl
-           << "TOTAL COVERED BASES = " << distinct_bins*bin_size << endl
+           << "TOTAL BASES         = " << total_bins * bin_size << endl
+           << "TOTAL COVERED BASES = " << distinct_bins * bin_size << endl
            << "MAX COVERAGE COUNT  = " << max_observed_count << endl
            << "COUNTS OF 1         = " << coverage_hist[1] << endl;
 
@@ -946,18 +928,17 @@ gc_extrap(const int argc, const char **argv) {
     vector<double> coverage_estimates;
 
     if (SINGLE_ESTIMATE) {
-
-      bool SINGLE_ESTIMATE_SUCCESS =
-        extrap_single_estimate(VERBOSE, allow_defects, coverage_hist, orig_max_terms, diagonal,
-                               bin_step_size, max_extrap/bin_size,
-                               coverage_estimates);
+      bool SINGLE_ESTIMATE_SUCCESS = extrap_single_estimate(
+        VERBOSE, allow_defects, coverage_hist, orig_max_terms, diagonal,
+        bin_step_size, max_extrap / bin_size, coverage_estimates);
       // IF FAILURE, EXIT
       if (!SINGLE_ESTIMATE_SUCCESS)
         throw runtime_error("SINGLE ESTIMATE FAILED, NEED TO RUN IN "
                             "FULL MODE FOR ESTIMATES");
 
       std::ofstream of;
-      if (!outfile.empty()) of.open(outfile.c_str());
+      if (!outfile.empty())
+        of.open(outfile.c_str());
       std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
       out << "TOTAL_BASES\tEXPECTED_DISTINCT" << endl;
@@ -967,36 +948,33 @@ gc_extrap(const int argc, const char **argv) {
 
       out << 0 << '\t' << 0 << endl;
       for (size_t i = 0; i < coverage_estimates.size(); ++i)
-        out << (i + 1)*base_step_size << '\t'
-            << coverage_estimates[i]*bin_size << endl;
+        out << (i + 1) * base_step_size << '\t'
+            << coverage_estimates[i] * bin_size << endl;
     }
     else {
-
       if (VERBOSE)
         cerr << "[BOOTSTRAPPING HISTOGRAM]" << endl;
 
-      const size_t max_iter = 10*n_bootstraps;
+      const size_t max_iter = 10 * n_bootstraps;
 
-      vector<vector<double> > bootstrap_estimates;
-      extrap_bootstrap(VERBOSE, allow_defects, seed, coverage_hist, n_bootstraps, orig_max_terms,
-                       diagonal, bin_step_size, max_extrap/bin_size,
-                       max_iter, bootstrap_estimates);
+      vector<vector<double>> bootstrap_estimates;
+      extrap_bootstrap(VERBOSE, allow_defects, seed, coverage_hist,
+                       n_bootstraps, orig_max_terms, diagonal, bin_step_size,
+                       max_extrap / bin_size, max_iter, bootstrap_estimates);
 
       if (VERBOSE)
         cerr << "[COMPUTING CONFIDENCE INTERVALS]" << endl;
       vector<double> coverage_upper_ci_lognorm, coverage_lower_ci_lognorm;
-      vector_median_and_ci(bootstrap_estimates, c_level,
-                           coverage_estimates, coverage_lower_ci_lognorm,
+      vector_median_and_ci(bootstrap_estimates, c_level, coverage_estimates,
+                           coverage_lower_ci_lognorm,
                            coverage_upper_ci_lognorm);
 
       if (VERBOSE)
         cerr << "[WRITING OUTPUT]" << endl;
 
-      write_predicted_coverage_curve(outfile, c_level, base_step_size,
-                                     bin_size, coverage_estimates,
-                                     coverage_lower_ci_lognorm,
-                                     coverage_upper_ci_lognorm);
-
+      write_predicted_coverage_curve(
+        outfile, c_level, base_step_size, bin_size, coverage_estimates,
+        coverage_lower_ci_lognorm, coverage_upper_ci_lognorm);
     }
   }
   catch (runtime_error &e) {
@@ -1010,65 +988,60 @@ gc_extrap(const int argc, const char **argv) {
   return EXIT_SUCCESS;
 }
 
-
-
 ////////////////////////////////////////////////////////////////////////
 /////  C_CURVE BELOW HERE
 
 static int
 c_curve(const int argc, const char **argv) {
-
   try {
-
     bool VERBOSE = false;
     bool PAIRED_END = false;
     bool HIST_INPUT = false;
     bool VALS_INPUT = false;
-    unsigned long int seed = 408;
+    uint64_t seed = 408;
 
     string outfile;
 
     size_t upper_limit = 0;
     double step_size = 1e6;
-
 #ifdef HAVE_HTSLIB
     bool BAM_FORMAT_INPUT = false;
     size_t MAX_SEGMENT_LENGTH = 5000;
 #endif
 
-    const string description =
-      "Generate the complexity curve for data. This does not extrapolate, \
-      but instead resamples from the given data.";
+    const string description = R"(
+Generate the complexity curve for data. This does not extrapolate, \
+but instead resamples from the given data.)";
 
     /********** GET COMMAND LINE ARGUMENTS  FOR C_CURVE ***********/
     OptionParser opt_parse(strip_path(argv[1]), description, "<input-file>");
     opt_parse.add_opt("output", 'o', "yield output file (default: stdout)",
-                      false , outfile);
-    opt_parse.add_opt("step",'s',"step size in extrapolations",
-                      false, step_size);
-    opt_parse.add_opt("verbose", 'v', "print more information",
-                      false, VERBOSE);
-    opt_parse.add_opt("pe", 'P', "input is paired end read file",
-                      false, PAIRED_END);
+                      false, outfile);
+    opt_parse.add_opt("step", 's', "step size in extrapolations", false,
+                      step_size);
+    opt_parse.add_opt("verbose", 'v', "print more information", false, VERBOSE);
+    opt_parse.add_opt("pe", 'P', "input is paired end read file", false,
+                      PAIRED_END);
     opt_parse.add_opt("hist", 'H',
                       "input is a text file containing the observed histogram",
                       false, HIST_INPUT);
-    opt_parse.add_opt("vals", 'V',
-                      "input is a text file containing only the observed counts",
-                      false, VALS_INPUT);
+    opt_parse.add_opt(
+      "vals", 'V', "input is a text file containing only the observed counts",
+      false, VALS_INPUT);
 #ifdef HAVE_HTSLIB
-    opt_parse.add_opt("bam", 'B', "input is in BAM format",
-                      false, BAM_FORMAT_INPUT);
-    opt_parse.add_opt("seg_len", 'l', "maximum segment length when merging "
+    opt_parse.add_opt("bam", 'B', "input is in BAM format", false,
+                      BAM_FORMAT_INPUT);
+    opt_parse.add_opt("seg_len", 'l',
+                      "maximum segment length when merging "
                       "paired end bam reads",
                       false, MAX_SEGMENT_LENGTH);
 #endif
-    opt_parse.add_opt("seed", 'r', "seed for random number generator",
-                      false, seed);
+    opt_parse.add_opt("seed", 'r', "seed for random number generator", false,
+                      seed);
     opt_parse.set_show_defaults();
 
     vector<string> leftover_args;
-    opt_parse.parse(argc-1, argv+1, leftover_args);
+    opt_parse.parse(argc - 1, argv + 1, leftover_args);
     if (argc == 2 || opt_parse.help_requested()) {
       cerr << opt_parse.help_message() << endl;
       return EXIT_SUCCESS;
@@ -1089,7 +1062,7 @@ c_curve(const int argc, const char **argv) {
     /******************************************************************/
 
     // Setup the random number generator
-    srand(time(0) + getpid()); //give the random fxn a new seed
+    srand(time(0) + getpid());  // give the random fxn a new seed
     mt19937 rng(seed);
 
     vector<double> counts_hist;
@@ -1113,13 +1086,12 @@ c_curve(const int argc, const char **argv) {
       const size_t MAX_READS_TO_HOLD = 5000000;
       size_t n_paired = 0;
       size_t n_mates = 0;
-      n_reads = load_counts_BAM_pe(VERBOSE, input_file_name,
-                                   MAX_SEGMENT_LENGTH, MAX_READS_TO_HOLD,
-                                   n_paired, n_mates, counts_hist);
+      n_reads =
+        load_counts_BAM_pe(VERBOSE, input_file_name, MAX_SEGMENT_LENGTH,
+                           MAX_READS_TO_HOLD, n_paired, n_mates, counts_hist);
       if (VERBOSE)
         cerr << "MERGED PAIRED END READS = " << n_paired << endl
              << "MATES PROCESSED = " << n_mates << endl;
-
     }
     else if (BAM_FORMAT_INPUT) {
       if (VERBOSE)
@@ -1132,7 +1104,7 @@ c_curve(const int argc, const char **argv) {
         cerr << "PAIRED_END_BED_INPUT" << endl;
       n_reads = load_counts_BED_pe(input_file_name, counts_hist);
     }
-    else { // default is single end bed file
+    else {  // default is single end bed file
       if (VERBOSE)
         cerr << "BED_INPUT" << endl;
       n_reads = load_counts_BED_se(input_file_name, counts_hist);
@@ -1146,7 +1118,7 @@ c_curve(const int argc, const char **argv) {
 
     const size_t distinct_counts =
       std::count_if(begin(counts_hist), end(counts_hist),
-                    [](const double x) {return x > 0.0;});
+                    [](const double x) { return x > 0.0; });
 
     if (VERBOSE)
       cerr << "TOTAL READS     = " << n_reads << endl
@@ -1166,14 +1138,16 @@ c_curve(const int argc, const char **argv) {
     }
 
     if (upper_limit == 0)
-      upper_limit = n_reads; //set upper limit to equal the number of molecules
+      upper_limit = n_reads;  // set upper limit to equal the number of
+                              // molecules
 
-    //handles output of c_curve
+    // handles output of c_curve
     std::ofstream of;
-    if (!outfile.empty()) of.open(outfile.c_str());
+    if (!outfile.empty())
+      of.open(outfile.c_str());
     std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
-    //prints the complexity curve
+    // prints the complexity curve
     out << "total_reads" << "\t" << "distinct_reads" << endl;
     out << 0 << '\t' << 0 << endl;
     for (size_t i = step_size; i <= upper_limit; i += step_size) {
@@ -1195,16 +1169,13 @@ c_curve(const int argc, const char **argv) {
   return EXIT_SUCCESS;
 }
 
-
 /////////////////////////////////////////////////////////
 /////////////////////////////////////////////////////////
 // BOUND_UNOBS: bounding n_0
 
 static int
 bound_pop(const int argc, const char **argv) {
-
   try {
-
     bool VERBOSE = false;
     bool PAIRED_END = false;
     bool HIST_INPUT = false;
@@ -1223,48 +1194,56 @@ bound_pop(const int argc, const char **argv) {
     size_t n_bootstraps = 500;
     double c_level = 0.95;
     size_t max_iter = 100;
-    unsigned long int seed = 408;
+    uint64_t seed = 408;
 
-    const string description =
-      "Estimate the size of the underlying population based on counts \
-      of observed species in an initial sample.";
+    const string description = R"(
+Estimate the size of the underlying population based on counts  \
+of observed species in an initial sample.)";
 
     /********** GET COMMAND LINE ARGUMENTS FOR BOUND_POP ***********/
-    OptionParser opt_parse(strip_path(argv[1]),
-                           description, "<input-file>");
-    opt_parse.add_opt("output", 'o', "species richness output file "
-                      "(default: stdout)", false , outfile);
-    opt_parse.add_opt("max_num_points",'p',"maximum number of points in "
-                      "quadrature estimates", false, max_num_points);
-    opt_parse.add_opt("tolerance", 't', "numerical tolerance",
-                      false, tolerance);
-    opt_parse.add_opt("bootstraps", 'n', "number of bootstraps",
-                      false, n_bootstraps);
-    opt_parse.add_opt("clevel", 'c', "level for confidence intervals",
-                      false, c_level);
-    opt_parse.add_opt("verbose", 'v', "print more information",
-                      false, VERBOSE);
-    opt_parse.add_opt("pe", 'P', "input is paired end read file",
-                      false, PAIRED_END);
-    opt_parse.add_opt("hist", 'H', "input is a text file containing the "
-                      "observed histogram", false, HIST_INPUT);
-    opt_parse.add_opt("vals", 'V', "input is a text file containing only the "
-                      "observed duplicate counts", false, VALS_INPUT);
+    OptionParser opt_parse(strip_path(argv[1]), description, "<input-file>");
+    opt_parse.add_opt("output", 'o',
+                      "species richness output file "
+                      "(default: stdout)",
+                      false, outfile);
+    opt_parse.add_opt("max_num_points", 'p',
+                      "maximum number of points in "
+                      "quadrature estimates",
+                      false, max_num_points);
+    opt_parse.add_opt("tolerance", 't', "numerical tolerance", false,
+                      tolerance);
+    opt_parse.add_opt("bootstraps", 'n', "number of bootstraps", false,
+                      n_bootstraps);
+    opt_parse.add_opt("clevel", 'c', "level for confidence intervals", false,
+                      c_level);
+    opt_parse.add_opt("verbose", 'v', "print more information", false, VERBOSE);
+    opt_parse.add_opt("pe", 'P', "input is paired end read file", false,
+                      PAIRED_END);
+    opt_parse.add_opt("hist", 'H',
+                      "input is a text file containing the "
+                      "observed histogram",
+                      false, HIST_INPUT);
+    opt_parse.add_opt("vals", 'V',
+                      "input is a text file containing only the "
+                      "observed duplicate counts",
+                      false, VALS_INPUT);
 #ifdef HAVE_HTSLIB
-    opt_parse.add_opt("bam", 'B', "input is in BAM format",
-                      false, BAM_FORMAT_INPUT);
-    opt_parse.add_opt("seg_len", 'l', "maximum segment length when merging "
+    opt_parse.add_opt("bam", 'B', "input is in BAM format", false,
+                      BAM_FORMAT_INPUT);
+    opt_parse.add_opt("seg_len", 'l',
+                      "maximum segment length when merging "
                       "paired end bam reads",
                       false, MAX_SEGMENT_LENGTH);
 #endif
-    opt_parse.add_opt("quick", 'Q', "quick mode, estimate without bootstrapping",
-                      false, QUICK_MODE);
-    opt_parse.add_opt("seed", 'r', "seed for random number generator",
-                      false, seed);
+    opt_parse.add_opt("quick", 'Q',
+                      "quick mode, estimate without bootstrapping", false,
+                      QUICK_MODE);
+    opt_parse.add_opt("seed", 'r', "seed for random number generator", false,
+                      seed);
     opt_parse.set_show_defaults();
 
     vector<string> leftover_args;
-    opt_parse.parse(argc-1, argv+1, leftover_args);
+    opt_parse.parse(argc - 1, argv + 1, leftover_args);
     if (argc == 2 || opt_parse.help_requested()) {
       cerr << opt_parse.help_message() << endl;
       return EXIT_SUCCESS;
@@ -1305,10 +1284,9 @@ bound_pop(const int argc, const char **argv) {
       const size_t MAX_READS_TO_HOLD = 5000000;
       size_t n_paired = 0;
       size_t n_mates = 0;
-      n_obs = load_counts_BAM_pe(VERBOSE, input_file_name,
-                                 MAX_SEGMENT_LENGTH,
-                                 MAX_READS_TO_HOLD, n_paired,
-                                 n_mates, counts_hist);
+      n_obs =
+        load_counts_BAM_pe(VERBOSE, input_file_name, MAX_SEGMENT_LENGTH,
+                           MAX_READS_TO_HOLD, n_paired, n_mates, counts_hist);
       if (VERBOSE) {
         cerr << "MERGED PAIRED END READS = " << n_paired << endl;
         cerr << "MATES PROCESSED = " << n_mates << endl;
@@ -1325,22 +1303,22 @@ bound_pop(const int argc, const char **argv) {
         cerr << "PAIRED_END_BED_INPUT" << endl;
       n_obs = load_counts_BED_pe(input_file_name, counts_hist);
     }
-    else { // default is single end bed file
+    else {  // default is single end bed file
       if (VERBOSE)
         cerr << "BED_INPUT" << endl;
       n_obs = load_counts_BED_se(input_file_name, counts_hist);
     }
 
-    const double distinct_obs = accumulate(begin(counts_hist), end(counts_hist), 0.0);
+    const double distinct_obs =
+      accumulate(begin(counts_hist), end(counts_hist), 0.0);
 
     vector<double> measure_moments;
     // mu_r = (r + 1)! n_{r+1} / n_1
     size_t idx = 1;
     while (counts_hist[idx] > 0 && idx <= counts_hist.size()) {
       // idx + 1 because function calculates (x-1)!
-      measure_moments.push_back(exp(factorial(idx + 1) +
-                                    log(counts_hist[idx]) -
-                                    log(counts_hist[1])));
+      measure_moments.push_back(
+        exp(factorial(idx + 1) + log(counts_hist[idx]) - log(counts_hist[1])));
       if (!isfinite(measure_moments.back())) {
         measure_moments.pop_back();
         break;
@@ -1364,12 +1342,11 @@ bound_pop(const int argc, const char **argv) {
         cerr << std::setprecision(16) << measure_moments[i] << endl;
     }
 
-
     if (QUICK_MODE) {
-      if (measure_moments.size() < 2*max_num_points)
-        max_num_points = static_cast<size_t>(floor(measure_moments.size()/2));
+      if (measure_moments.size() < 2 * max_num_points)
+        max_num_points = static_cast<size_t>(floor(measure_moments.size() / 2));
       else
-        measure_moments.resize(2*max_num_points);
+        measure_moments.resize(2 * max_num_points);
       size_t n_points = 0;
       n_points = ensure_pos_def_mom_seq(measure_moments, tolerance, VERBOSE);
       if (VERBOSE)
@@ -1394,14 +1371,14 @@ bound_pop(const int argc, const char **argv) {
       }
 
       vector<double> points, weights;
-      obs_mom_seq.Lower_quadrature_rules(VERBOSE, n_points, tolerance,
-                                         max_iter, points, weights);
+      obs_mom_seq.Lower_quadrature_rules(VERBOSE, n_points, tolerance, max_iter,
+                                         points, weights);
 
       // renormalize if needed
       const double weights_sum = accumulate(begin(weights), end(weights), 0.0);
       if (weights_sum != 1.0)
         for (size_t i = 0; i < weights.size(); i++)
-          weights[i] = weights[i]/weights_sum;
+          weights[i] = weights[i] / weights_sum;
 
       if (VERBOSE) {
         cerr << "points = " << endl;
@@ -1418,7 +1395,7 @@ bound_pop(const int argc, const char **argv) {
       double estimated_unobs = 0.0;
 
       for (size_t i = 0; i < weights.size(); i++)
-        estimated_unobs += counts_hist[1]*weights[i]/points[i];
+        estimated_unobs += counts_hist[1] * weights[i] / points[i];
 
       if (estimated_unobs > 0.0)
         estimated_unobs += distinct_obs;
@@ -1428,7 +1405,8 @@ bound_pop(const int argc, const char **argv) {
       }
 
       std::ofstream of;
-      if (!outfile.empty()) of.open(outfile.c_str());
+      if (!outfile.empty())
+        of.open(outfile.c_str());
       std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
       out.setf(std::ios_base::fixed, std::ios_base::floatfield);
@@ -1436,14 +1414,12 @@ bound_pop(const int argc, const char **argv) {
 
       out << "quadrature_estimated_unobs" << '\t' << "n_points" << endl;
       out << estimated_unobs << '\t' << n_points << endl;
-
     }
     // NOT QUICK MODE, BOOTSTRAP
     else {
-
       vector<double> quad_estimates;
 
-      //setup rng
+      // setup rng
       srand(time(0) + getpid());
       mt19937 rng(seed);
 
@@ -1457,27 +1433,29 @@ bound_pop(const int argc, const char **argv) {
           distinct_counts_hist.push_back(counts_hist[i]);
         }
 
-      for (size_t iter = 0; iter < max_iter && quad_estimates.size() < n_bootstraps; ++iter) {
+      for (size_t iter = 0;
+           iter < max_iter && quad_estimates.size() < n_bootstraps; ++iter) {
         if (VERBOSE)
           cerr << "iter=" << "\t" << iter << endl;
 
         vector<double> sample_hist;
-        resample_hist(rng, counts_hist_distinct_counts,
-                      distinct_counts_hist, sample_hist);
+        resample_hist(rng, counts_hist_distinct_counts, distinct_counts_hist,
+                      sample_hist);
 
-        const double sampled_distinct = accumulate(begin(sample_hist), end(sample_hist), 0.0);
+        const double sampled_distinct =
+          accumulate(begin(sample_hist), end(sample_hist), 0.0);
 
         // initialize moments, 0th moment is 1
         vector<double> bootstrap_moments(1, 1.0);
         // moments[r] = (r + 1)! n_{r+1} / n_1
-        for (size_t i = 0; i < 2*max_num_points; i++) {
-          bootstrap_moments.push_back(exp(factorial(i + 3) +
-                                          log(sample_hist[i + 2]) -
-                                          log(sample_hist[1])) );
+        for (size_t i = 0; i < 2 * max_num_points; i++) {
+          bootstrap_moments.push_back(exp(
+            factorial(i + 3) + log(sample_hist[i + 2]) - log(sample_hist[1])));
         }
 
         size_t n_points = 0;
-        n_points = ensure_pos_def_mom_seq(bootstrap_moments, tolerance, VERBOSE);
+        n_points =
+          ensure_pos_def_mom_seq(bootstrap_moments, tolerance, VERBOSE);
         n_points = min(n_points, max_num_points);
         if (VERBOSE)
           cerr << "n_points = " << n_points << endl;
@@ -1489,15 +1467,16 @@ bound_pop(const int argc, const char **argv) {
                                                  max_iter, points, weights);
 
         // renormalize if needed
-        const double weights_sum = accumulate(begin(weights), end(weights), 0.0);
+        const double weights_sum =
+          accumulate(begin(weights), end(weights), 0.0);
         if (weights_sum != 1.0)
           for (size_t i = 0; i < weights.size(); i++)
-            weights[i] = weights[i]/weights_sum;
+            weights[i] = weights[i] / weights_sum;
 
         double estimated_unobs = 0.0;
 
         for (size_t i = 0; i < weights.size(); i++)
-          estimated_unobs += counts_hist[1]*weights[i]/points[i];
+          estimated_unobs += counts_hist[1] * weights[i] / points[i];
 
         if (estimated_unobs > 0.0)
           estimated_unobs += sampled_distinct;
@@ -1542,20 +1521,20 @@ bound_pop(const int argc, const char **argv) {
       }
 
       double median_estimate, lower_ci, upper_ci;
-      median_and_ci(quad_estimates, c_level, median_estimate,
-                    lower_ci, upper_ci);
+      median_and_ci(quad_estimates, c_level, median_estimate, lower_ci,
+                    upper_ci);
 
       std::ofstream of;
-      if (!outfile.empty()) of.open(outfile.c_str());
+      if (!outfile.empty())
+        of.open(outfile.c_str());
       std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
       out.setf(std::ios_base::fixed, std::ios_base::floatfield);
       out.precision(1);
 
-      out << "median_estimated_unobs" << '\t'
-          << "lower_ci" << '\t' << "upper_ci" << endl;
-      out << median_estimate << '\t'
-          << lower_ci << '\t' << upper_ci << endl;
+      out << "median_estimated_unobs" << '\t' << "lower_ci" << '\t'
+          << "upper_ci" << endl;
+      out << median_estimate << '\t' << lower_ci << '\t' << upper_ci << endl;
     }
   }
   catch (runtime_error &e) {
@@ -1571,9 +1550,7 @@ bound_pop(const int argc, const char **argv) {
 
 static int
 pop_size(const int argc, const char **argv) {
-
   try {
-
     static const size_t min_required_counts = 4;
     static const string min_required_counts_error_message =
       "max count before zero is less than min required count (" +
@@ -1588,7 +1565,7 @@ pop_size(const int argc, const char **argv) {
     size_t n_bootstraps = 100;
     int diagonal = 0;
     double c_level = 0.95;
-    unsigned long int seed = 408;
+    uint64_t seed = 408;
 
     /* FLAGS */
     bool VERBOSE = false;
@@ -1614,38 +1591,44 @@ pop_size(const int argc, const char **argv) {
 
     OptionParser opt_parse(strip_path(argv[1]), description, "<input-file>");
     opt_parse.add_opt("output", 'o', "yield output file (default: stdout)",
-                      false , outfile);
-    opt_parse.add_opt("extrap",'e',"maximum extrapolation", false, max_extrap);
-    opt_parse.add_opt("steps",'s',"number of steps", false, n_desired_steps);
-    opt_parse.add_opt("boots",'n',"number of bootstraps", false, n_bootstraps);
-    opt_parse.add_opt("cval", 'c', "level for confidence intervals", false, c_level);
-    opt_parse.add_opt("terms", 'x', "maximum terms in estimator", false, orig_max_terms);
+                      false, outfile);
+    opt_parse.add_opt("extrap", 'e', "maximum extrapolation", false,
+                      max_extrap);
+    opt_parse.add_opt("steps", 's', "number of steps", false, n_desired_steps);
+    opt_parse.add_opt("boots", 'n', "number of bootstraps", false,
+                      n_bootstraps);
+    opt_parse.add_opt("cval", 'c', "level for confidence intervals", false,
+                      c_level);
+    opt_parse.add_opt("terms", 'x', "maximum terms in estimator", false,
+                      orig_max_terms);
     opt_parse.add_opt("verbose", 'v', "print more info", false, VERBOSE);
 #ifdef HAVE_HTSLIB
-    opt_parse.add_opt("bam", 'B', "input is in BAM format",
-                      false, BAM_FORMAT_INPUT);
-    opt_parse.add_opt("seg_len", 'l', "maximum segment length when merging "
+    opt_parse.add_opt("bam", 'B', "input is in BAM format", false,
+                      BAM_FORMAT_INPUT);
+    opt_parse.add_opt("seg_len", 'l',
+                      "maximum segment length when merging "
                       "paired end bam reads",
                       false, MAX_SEGMENT_LENGTH);
 #endif
-    opt_parse.add_opt("pe", 'P', "input is paired end read file",
-                      false, PAIRED_END);
-    opt_parse.add_opt("vals", 'V',
-                      "input is a text file containing only the observed counts",
-                      false, VALS_INPUT);
+    opt_parse.add_opt("pe", 'P', "input is paired end read file", false,
+                      PAIRED_END);
+    opt_parse.add_opt(
+      "vals", 'V', "input is a text file containing only the observed counts",
+      false, VALS_INPUT);
     opt_parse.add_opt("hist", 'H',
                       "input is a text file containing the observed histogram",
                       false, HIST_INPUT);
     opt_parse.add_opt("quick", 'Q',
                       "quick mode (no bootstraps) for confidence intervals",
                       false, SINGLE_ESTIMATE);
-    opt_parse.add_opt("defects", 'D', "no testing for defects", false, allow_defects);
-    opt_parse.add_opt("seed", 'r', "seed for random number generator",
-                      false, seed);
+    opt_parse.add_opt("defects", 'D', "no testing for defects", false,
+                      allow_defects);
+    opt_parse.add_opt("seed", 'r', "seed for random number generator", false,
+                      seed);
     opt_parse.set_show_defaults();
     vector<string> leftover_args;
     // ADS: suspect bug below; "-about" isn't working.
-    opt_parse.parse(argc-1, argv+1, leftover_args);
+    opt_parse.parse(argc - 1, argv + 1, leftover_args);
     if (argc == 2 || opt_parse.help_requested()) {
       cerr << opt_parse.help_message() << endl;
       return EXIT_SUCCESS;
@@ -1686,10 +1669,9 @@ pop_size(const int argc, const char **argv) {
       const size_t MAX_READS_TO_HOLD = 5000000;
       size_t n_paired = 0;
       size_t n_mates = 0;
-      n_reads = load_counts_BAM_pe(VERBOSE, input_file_name,
-                                   MAX_SEGMENT_LENGTH,
-                                   MAX_READS_TO_HOLD, n_paired,
-                                   n_mates, counts_hist);
+      n_reads =
+        load_counts_BAM_pe(VERBOSE, input_file_name, MAX_SEGMENT_LENGTH,
+                           MAX_READS_TO_HOLD, n_paired, n_mates, counts_hist);
       if (VERBOSE) {
         cerr << "MERGED PAIRED END READS = " << n_paired << endl;
         cerr << "MATES PROCESSED = " << n_mates << endl;
@@ -1706,7 +1688,7 @@ pop_size(const int argc, const char **argv) {
         cerr << "PAIRED_END_BED_INPUT" << endl;
       n_reads = load_counts_BED_pe(input_file_name, counts_hist);
     }
-    else { // default is single end bed file
+    else {  // default is single end bed file
       if (VERBOSE)
         cerr << "BED_INPUT" << endl;
       n_reads = load_counts_BED_se(input_file_name, counts_hist);
@@ -1726,13 +1708,13 @@ pop_size(const int argc, const char **argv) {
     orig_max_terms = orig_max_terms - (orig_max_terms % 2 == 1);
 
     if (max_extrap < 1.0)
-      max_extrap = 1000000000*distinct_reads;
+      max_extrap = 1000000000 * distinct_reads;
     if (step_size < 1.0)
-      step_size = (max_extrap - distinct_reads)/n_desired_steps;
+      step_size = (max_extrap - distinct_reads) / n_desired_steps;
 
     const size_t distinct_counts =
       std::count_if(begin(counts_hist), end(counts_hist),
-                    [](const double x) {return x > 0.0;});
+                    [](const double x) { return x > 0.0; });
 
     if (VERBOSE)
       cerr << "TOTAL READS     = " << n_reads << endl
@@ -1759,7 +1741,7 @@ pop_size(const int argc, const char **argv) {
 
     // const size_t total_reads = get_counts_from_hist(counts_hist);
 
-    //assert(total_reads == n_reads); // ADS: why commented out?
+    // assert(total_reads == n_reads); // ADS: why commented out?
 
     // check that min required count is satisfied
     if (orig_max_terms < min_required_counts)
@@ -1771,17 +1753,17 @@ pop_size(const int argc, const char **argv) {
     vector<double> yield_estimates;
 
     if (SINGLE_ESTIMATE) {
-
-      const bool single_estimate_success =
-        extrap_single_estimate(VERBOSE, allow_defects, counts_hist, orig_max_terms,
-                               diagonal, step_size, max_extrap, yield_estimates);
+      const bool single_estimate_success = extrap_single_estimate(
+        VERBOSE, allow_defects, counts_hist, orig_max_terms, diagonal,
+        step_size, max_extrap, yield_estimates);
       // IF FAILURE, EXIT
       if (!single_estimate_success)
         throw runtime_error("single estimate failed, run "
                             "full mode for estimates");
 
       std::ofstream of;
-      if (!outfile.empty()) of.open(outfile.c_str());
+      if (!outfile.empty())
+        of.open(outfile.c_str());
       std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
       out << "TOTAL_READS\tEXPECTED_DISTINCT" << endl;
@@ -1790,15 +1772,15 @@ pop_size(const int argc, const char **argv) {
 
       out << 0 << '\t' << 0 << endl;
       for (size_t i = 0; i < yield_estimates.size(); ++i)
-        out << (i + 1)*step_size << '\t' << yield_estimates[i] << endl;
+        out << (i + 1) * step_size << '\t' << yield_estimates[i] << endl;
     }
     else {
       if (VERBOSE)
         cerr << "[BOOTSTRAPPING HISTOGRAM]" << endl;
 
-      const size_t max_iter = 100*n_bootstraps;
+      const size_t max_iter = 100 * n_bootstraps;
 
-      vector<vector <double> > bootstrap_estimates;
+      vector<vector<double>> bootstrap_estimates;
       extrap_bootstrap(VERBOSE, allow_defects, seed, counts_hist, n_bootstraps,
                        orig_max_terms, diagonal, step_size, max_extrap,
                        max_iter, bootstrap_estimates);
@@ -1816,7 +1798,8 @@ pop_size(const int argc, const char **argv) {
         cerr << "[WRITING OUTPUT]" << endl;
 
       std::ofstream of;
-      if (!outfile.empty()) of.open(outfile.c_str());
+      if (!outfile.empty())
+        of.open(outfile.c_str());
       std::ostream out(outfile.empty() ? std::cout.rdbuf() : of.rdbuf());
 
       out.setf(std::ios_base::fixed, std::ios_base::floatfield);
@@ -1827,13 +1810,12 @@ pop_size(const int argc, const char **argv) {
         throw runtime_error("problem with number of estimates in pop_size");
 
       const bool converged =
-        (yield_estimates[n_ests] - yield_estimates[n_ests-1] < 1.0);
+        (yield_estimates[n_ests] - yield_estimates[n_ests - 1] < 1.0);
 
-      out << "pop_size_estimate" << '\t'
-          << "lower_ci" << '\t' << "upper_ci" << endl;
-      out << yield_estimates.back() << '\t'
-          << yield_lower_ci_lognorm.back() << '\t'
-          << yield_upper_ci_lognorm.back();
+      out << "pop_size_estimate" << '\t' << "lower_ci" << '\t' << "upper_ci"
+          << endl;
+      out << yield_estimates.back() << '\t' << yield_lower_ci_lognorm.back()
+          << '\t' << yield_upper_ci_lognorm.back();
       if (!converged)
         out << "\tnot_converged";
       out << endl;
@@ -1850,52 +1832,59 @@ pop_size(const int argc, const char **argv) {
   return EXIT_SUCCESS;
 }
 
+static string
+usage_message() {
+  std::ostringstream oss;
+  oss << "preseq: a program for analyzing library complexity\n"
+         "Version: ";
+  oss << VERSION;
+  oss << "\n\n"
+         "Usage: preseq <command> [OPTIONS]\n\n"
+         "<command>: c_curve    generate complexity curve for a library\n"
+         "           lc_extrap  predict the yield for future experiments\n"
+         "           gc_extrap  predict genome coverage low input\n"
+         "                      sequencing experiments\n"
+         "           bound_pop  lower bound on population size\n"
+         "           pop_size   estimate number of unique species\n";
+  return oss.str();
+}
+
 int
 main(const int argc, const char **argv) {
-
-  static const string usage_message =
-    "preseq:  a program for analyzing library complexity\n"
-    "Version: " + preseq_version + "\n\n"
-    "Usage:   preseq <command> [OPTIONS]\n\n"
-    "<command>: c_curve    generate complexity curve for a library\n"
-    "           lc_extrap  predict the yield for future experiments\n"
-    "           gc_extrap  predict genome coverage low input\n"
-    "                      sequencing experiments\n"
-    "           bound_pop  lower bound on population size\n"
-    "           pop_size   estimate number of unique species\n";
+  // static string usage_message =
+  //   "preseq: a program for analyzing library complexity\n"
+  //   "Version: " +
+  //   preseq_version +
+  //   "\n\n"
+  //   "Usage:   preseq <command> [OPTIONS]\n\n"
+  //   "<command>: c_curve    generate complexity curve for a library\n"
+  //   "           lc_extrap  predict the yield for future experiments\n"
+  //   "           gc_extrap  predict genome coverage low input\n"
+  //   "                      sequencing experiments\n"
+  //   "           bound_pop  lower bound on population size\n"
+  //   "           pop_size   estimate number of unique species\n";
 
   if (argc < 2)
-    cerr << usage_message << endl;
+    cerr << usage_message() << endl;
 
   else if (strcmp(argv[1], "lc_extrap") == 0) {
-
     return lc_extrap(argc, argv);
-
   }
   else if (strcmp(argv[1], "c_curve") == 0) {
-
     return c_curve(argc, argv);
-
   }
   else if (strcmp(argv[1], "gc_extrap") == 0) {
-
     return gc_extrap(argc, argv);
-
   }
   else if (strcmp(argv[1], "bound_pop") == 0) {
-
     return bound_pop(argc, argv);
-
   }
   else if (strcmp(argv[1], "pop_size") == 0) {
-
     return pop_size(argc, argv);
-
   }
   else {
     cerr << "unrecognized command: " << argv[1] << endl
          << usage_message << endl;
     return EXIT_SUCCESS;
-
   }
 }
diff --git a/src/to-mr.cpp b/src/to-mr.cpp
index 140b066..c9d4f99 100644
--- a/src/to-mr.cpp
+++ b/src/to-mr.cpp
@@ -19,61 +19,60 @@
 
 #include <cmath>
 
-#include <string>
-#include <vector>
-#include <iostream>
+#include <cstddef>
 #include <fstream>
-#include <unordered_map>
+#include <iostream>
 #include <stdexcept>
-#include <cstddef>
+#include <string>
+#include <unordered_map>
+#include <vector>
 
+#include "MappedRead.hpp"
 #include "OptionParser.hpp"
-#include "smithlab_utils.hpp"
-#include "smithlab_os.hpp"
 #include "htslib_wrapper_deprecated.hpp"
-#include "MappedRead.hpp"
+#include "smithlab_os.hpp"
+#include "smithlab_utils.hpp"
 
-using std::string;
-using std::vector;
-using std::cout;
 using std::cerr;
+using std::cout;
 using std::endl;
 using std::ifstream;
 using std::max;
 using std::min;
 using std::runtime_error;
-using std::unordered_map;
+using std::string;
 using std::swap;
-
+using std::unordered_map;
+using std::vector;
 
 /********Below are functions for merging pair-end reads********/
 static void
 fill_overlap(const bool pos_str, const MappedRead &mr, const size_t start,
              const size_t end, const size_t offset, string &seq, string &scr) {
-  const size_t a = pos_str ? (start - mr.r.get_start()) : (mr.r.get_end() - end);
-  const size_t b = pos_str ? (end -  mr.r.get_start()) : (mr.r.get_end() - start);
+  const size_t a =
+    pos_str ? (start - mr.r.get_start()) : (mr.r.get_end() - end);
+  const size_t b =
+    pos_str ? (end - mr.r.get_start()) : (mr.r.get_end() - start);
   copy(mr.seq.begin() + a, mr.seq.begin() + b, seq.begin() + offset);
   copy(mr.scr.begin() + a, mr.scr.begin() + b, scr.begin() + offset);
 }
 
 static void
-merge_mates(const size_t suffix_len, const size_t range,
-            const MappedRead &one, const MappedRead &two,
-            MappedRead &merged, int &len) {
-
+merge_mates(const size_t suffix_len, const size_t range, const MappedRead &one,
+            const MappedRead &two, MappedRead &merged, int &len) {
   const bool pos_str = one.r.pos_strand();
   const size_t overlap_start = max(one.r.get_start(), two.r.get_start());
   const size_t overlap_end = min(one.r.get_end(), two.r.get_end());
 
-  const size_t one_left = pos_str ?
-    one.r.get_start() : max(overlap_end, one.r.get_start());
+  const size_t one_left =
+    pos_str ? one.r.get_start() : max(overlap_end, one.r.get_start());
   const size_t one_right =
     pos_str ? min(overlap_start, one.r.get_end()) : one.r.get_end();
 
-  const size_t two_left = pos_str ?
-    max(overlap_end, two.r.get_start()) : two.r.get_start();
-  const size_t two_right = pos_str ?
-    two.r.get_end() : min(overlap_start, two.r.get_end());
+  const size_t two_left =
+    pos_str ? max(overlap_end, two.r.get_start()) : two.r.get_start();
+  const size_t two_right =
+    pos_str ? two.r.get_end() : min(overlap_start, two.r.get_end());
 
   len = pos_str ? (two_right - one_left) : (one_right - two_left);
 
@@ -107,9 +106,11 @@ merge_mates(const size_t suffix_len, const size_t range,
 
         // use the mate with the most info to fill in the overlap
         if (info_one >= info_two)
-          fill_overlap(pos_str, one, overlap_start, overlap_end, lim_one, seq, scr);
+          fill_overlap(pos_str, one, overlap_start, overlap_end, lim_one, seq,
+                       scr);
         else
-          fill_overlap(pos_str, two, overlap_start, overlap_end, lim_one, seq, scr);
+          fill_overlap(pos_str, two, overlap_start, overlap_end, lim_one, seq,
+                       scr);
       }
     }
 
@@ -125,8 +126,7 @@ merge_mates(const size_t suffix_len, const size_t range,
 }
 
 inline static bool
-same_read(const size_t suffix_len,
-          const MappedRead &a, const MappedRead &b) {
+same_read(const size_t suffix_len, const MappedRead &a, const MappedRead &b) {
   const string sa(a.r.get_name());
   const string sb(b.r.get_name());
   return std::equal(sa.begin(), sa.end() - suffix_len, sb.begin());
@@ -149,9 +149,7 @@ get_read_name(const SAMRecord &aln, const size_t suffix_len) {
 
 int
 main(int argc, const char **argv) {
-
   try {
-
     string outfile;
     string mapper = "general";
     size_t max_frag_len = 1000;
@@ -164,17 +162,15 @@ main(int argc, const char **argv) {
                            "Convert the SAM/BAM output from "
                            "to MethPipe mapped read format",
                            "sam/bam_file");
-    opt_parse.add_opt("output", 'o', "Name of output file",
-                      false, outfile);
+    opt_parse.add_opt("output", 'o', "Name of output file", false, outfile);
     // opt_parse.add_opt("mapper", 'm',
     //                   "Original mapper: bismark, bs_seeker or general",
     //                   true, mapper);
     opt_parse.add_opt("suff", 's', "read name suffix length (default: 1)",
                       false, suffix_len);
-    opt_parse.add_opt("max-frag", 'L', "maximum allowed insert size",
-                      false, max_frag_len);
-    opt_parse.add_opt("verbose", 'v', "print more information",
-                      false, VERBOSE);
+    opt_parse.add_opt("max-frag", 'L', "maximum allowed insert size", false,
+                      max_frag_len);
+    opt_parse.add_opt("verbose", 'v', "print more information", false, VERBOSE);
 
     vector<string> leftover_args;
     opt_parse.parse(argc, argv, leftover_args);
@@ -199,12 +195,13 @@ main(int argc, const char **argv) {
     /****************** END COMMAND LINE OPTIONS *****************/
 
     std::ofstream of;
-    if (!outfile.empty()) of.open(outfile.c_str());
+    if (!outfile.empty())
+      of.open(outfile.c_str());
     std::ostream out(outfile.empty() ? cout.rdbuf() : of.rdbuf());
     if (VERBOSE)
       cerr << "[input file: " << mapped_reads_file << "]" << endl
-           << "[output file: "
-           << (outfile.empty() ? "stdout" : outfile) << "]" << endl;
+           << "[output file: " << (outfile.empty() ? "stdout" : outfile) << "]"
+           << endl;
 
     SAMReader_deprecated sam_reader(mapped_reads_file, mapper);
     unordered_map<string, SAMRecord> dangling_mates;
@@ -223,15 +220,16 @@ main(int argc, const char **argv) {
 
           MappedRead merged;
           int len = 0;
-          merge_mates(suffix_len, max_frag_len,
-                      dangling_mates[read_name].mr, aln.mr, merged, len);
+          merge_mates(suffix_len, max_frag_len, dangling_mates[read_name].mr,
+                      aln.mr, merged, len);
           if (len <= static_cast<int>(max_frag_len))
             out << merged << endl;
           else if (len > 0)
             out << dangling_mates[read_name].mr << endl << aln.mr << endl;
           dangling_mates.erase(read_name);
         }
-        else dangling_mates[read_name] = aln;
+        else
+          dangling_mates[read_name] = aln;
 
         // flush dangling_mates
         if (dangling_mates.size() > max_dangling) {
@@ -239,12 +237,14 @@ main(int argc, const char **argv) {
           for (auto &&mates : dangling_mates)
             if (mates.second.mr.r.get_chrom() < aln.mr.r.get_chrom() ||
                 (mates.second.mr.r.get_chrom() == aln.mr.r.get_chrom() &&
-                 mates.second.mr.r.get_end() + max_frag_len < aln.mr.r.get_start())) {
+                 mates.second.mr.r.get_end() + max_frag_len <
+                   aln.mr.r.get_start())) {
               if (!mates.second.is_Trich)
                 revcomp(mates.second.mr);
               out << mates.second.mr << endl;
             }
-            else tmp[mates.first] = mates.second;
+            else
+              tmp[mates.first] = mates.second;
           swap(tmp, dangling_mates);
         }
       }