diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml
index 0ff1201..7227755 100644
--- a/.github/workflows/ci.yaml
+++ b/.github/workflows/ci.yaml
@@ -250,7 +250,7 @@ jobs:
           cmake -S . -B ./build ${GRENEDALF_BUILD_STATIC}
 
       # -------------------------------------------------------
-      #     Build & Test
+      #     Build
       # -------------------------------------------------------
 
       - name: Build
@@ -280,6 +280,8 @@ jobs:
       #     Upload Binaries
       # -------------------------------------------------------
 
+      # We upload the static binaries for the most important systems.
+      # These are the ones that we want to publish with release versions.
       - name: Upload Binaries
         if: (  matrix.os == 'ubuntu-22.04' && matrix.compiler == 'gcc-13' ) || matrix.compiler == 'apple'
         uses: actions/upload-artifact@v4
@@ -287,3 +289,42 @@ jobs:
           name: binary-${{ matrix.os }}
           path: |
             bin/grenedalf
+
+      # -------------------------------------------------------
+      #     Test
+      # -------------------------------------------------------
+
+      # For the same versions that we use for the binaries above,
+      # we also run the test case, comparing grenedalf against our independent implementation.
+      # This covers Ubuntu and two Mac versions, so hopefully good enough.
+      - name: Python Setup
+        if: (  matrix.os == 'ubuntu-22.04' && matrix.compiler == 'gcc-13' ) || matrix.compiler == 'apple'
+        uses: actions/setup-python@v2
+        with:
+          python-version: 3.8
+
+      - name: Install Test Dependencies
+        if: (  matrix.os == 'ubuntu-22.04' && matrix.compiler == 'gcc-13' ) || matrix.compiler == 'apple'
+        run: |
+          # Install the dependencies for the tests
+          python -m venv venv
+          source venv/bin/activate
+          pip install -r test/pip.txt
+
+          # Verify the installation
+          pip list
+          python --version
+          pytest
+
+      - name: Run Test
+        if: (  matrix.os == 'ubuntu-22.04' && matrix.compiler == 'gcc-13' ) || matrix.compiler == 'apple'
+        run: |
+          ./test/run.sh
+
+      - name: Upload Tests
+        if: (  matrix.os == 'ubuntu-22.04' && matrix.compiler == 'gcc-13' ) || matrix.compiler == 'apple'
+        uses: actions/upload-artifact@v4
+        with:
+          name: test-${{ matrix.os }}
+          path: |
+            test
diff --git a/test/README.md b/test/README.md
new file mode 100644
index 0000000..a036e7c
--- /dev/null
+++ b/test/README.md
@@ -0,0 +1,11 @@
+# Test
+
+This directory contains test cases that are mostly meant for internal testing. However, feel free to have a look if you are interested.
+
+We here run grenedalf as well as our minimalistic independent implementation of the equations in Python, and compare the two to each other. As they both yield the same results, we do have some confidence that the equations are correctly implemented. The tests span a range of pool sizes, read depths, allele frequencies, and window sizes. However, this is a minimalistic test with clean data, i.e., we assume sufficient read depth and no missing data in the test. Hence, if you encounter any issues on more realistic data, please report the [issue](https://github.com/lczech/grenedalf/issues).
+
+The Python dependencies for running the tests are listed in `conda.yaml` and `pip.txt`, for the two package manages. Use these to install the packages needed to run the tests. We also run these tests in our [CI Tests](https://github.com/lczech/grenedalf/actions) in GitHub Actions.
+
+By default, we do not test PoPoolation here, as that's not the scope of this test. We however have implemented this for completeness as well; if you want to run the test for PoPoolation, find the commented line containing `run_popoolation` at the very end of the `execute_test.py` script, and un-comment it. Furthermore, download the source code of [PoPoolation](https://sourceforge.net/projects/popoolation/) and [PoPoolation2](https://sourceforge.net/projects/popoolation2/), and place both as sub-directories in `test/popoolation`, named `popoolation` and `popoolation2`, respectively.
+
+The test scripts are also published in the supporting grenedalf manuscript repository, see [here](https://github.com/lczech/grenedalf-paper/tree/master/eval-independent-test). That repository contains the scripts for all tests and benchmarks that we ran for the manuscript, in particular for assessing correctness and performance of grenedalf.
diff --git a/test/clean.sh b/test/clean.sh
new file mode 100755
index 0000000..f683327
--- /dev/null
+++ b/test/clean.sh
@@ -0,0 +1,21 @@
+#!/bin/bash
+
+# Remove the generated data files
+rm -r mpileup
+rm -r sync
+rm -r poolsizes
+
+# Remove the run files
+rm -r grenedalf/diversity
+rm -r grenedalf/fst
+rm -r grenedalf/logs
+rm -r popoolation/diversity
+rm -r popoolation/fst
+rm -r popoolation/logs
+
+# Remove the test result files
+rm -r test_results.tsv
+rm -r figures_*
+
+# Also remove unnecessary py stuff
+rm -r __pycache__
diff --git a/test/conda.yaml b/test/conda.yaml
new file mode 100644
index 0000000..8386f44
--- /dev/null
+++ b/test/conda.yaml
@@ -0,0 +1,15 @@
+name: grenedalf_test
+channels:
+  - defaults
+  - conda-forge
+dependencies:
+  - python=3.8
+  - matplotlib
+  - numpy
+  - pandas
+  - scipy
+  - seaborn
+  - tqdm
+  - pip
+  - pip:
+      - typing-extensions
diff --git a/test/create_files.py b/test/create_files.py
new file mode 100755
index 0000000..53ebfd6
--- /dev/null
+++ b/test/create_files.py
@@ -0,0 +1,122 @@
+#!/usr/bin/env python3
+
+import sys, os
+import pandas as pd
+import tqdm
+
+os.makedirs("mpileup", exist_ok=True)
+os.makedirs("sync", exist_ok=True)
+
+chromosome = "chr1"
+ref_allele = 'A'
+derived_allele = 'C'
+
+# ------------------------------------------------------------
+#     generate pileup
+# ------------------------------------------------------------
+
+def generate_mpileup(file_id, coverage, derived_count, window_size):
+    output_file = f"mpileup/{file_id}.pileup"
+    # print(f"Generating mpileup file: {output_file}")
+
+    with open(output_file, 'w') as f:
+        for pos in range(1, window_size + 1):
+            if pos == 1:
+                cov1 = coverage - derived_count
+                cov2 = derived_count
+            else:
+                cov1 = coverage
+                cov2 = 0
+            bases = ref_allele * cov1 + derived_allele * cov2
+            qualities = 'I' * coverage
+            f.write(f"{chromosome}\t{pos}\t{ref_allele}\t{coverage}\t{bases}\t{qualities}\n")
+
+def generate_mpileup_sample(coverage, derived_count, window_size):
+    file_id = f"mpileup-cov_{coverage}-der_{derived_count}-win_{window_size}"
+    generate_mpileup(file_id, coverage, derived_count, window_size)
+
+# ------------------------------------------------------------
+#     generate sync
+# ------------------------------------------------------------
+
+def write_sync_sample( filehandle, coverage, derived_count ):
+    a_cnt = coverage - derived_count
+    t_cnt = derived_count
+    filehandle.write(f"\t{a_cnt}:{t_cnt}:0:0:0:0")
+
+def generate_sync(file_id, coverage_1, coverage_2, derived_count_1, derived_count_2, window_size):
+    output_file = f"sync/{file_id}.sync"
+    # print(f"Generating sync file: {output_file}")
+
+    with open(output_file, 'w') as f:
+        for pos in range(1, window_size + 1):
+            f.write(f"{chromosome}\t{pos}\t{ref_allele}")
+            if pos == 1:
+                write_sync_sample( f, coverage_1, derived_count_1 )
+                write_sync_sample( f, coverage_2, derived_count_2 )
+            else:
+                write_sync_sample( f, coverage_1, 0 )
+                write_sync_sample( f, coverage_2, 0 )
+            f.write("\n")
+
+def generate_sync_sample(coverage_1, coverage_2, derived_count_1, derived_count_2, window_size):
+    file_id = f"sync-cov1_{coverage_1}-cov2_{coverage_2}-der1_{derived_count_1}-der2_{derived_count_2}-win_{window_size}"
+    generate_sync(file_id, coverage_1, coverage_2, derived_count_1, derived_count_2, window_size)
+
+# ------------------------------------------------------------
+#     read table
+# ------------------------------------------------------------
+
+def create_from_df(df):
+    # Iterate over each row
+    for _, row in df.iterrows():
+        generate_mpileup_sample(
+            int(row['coverage_1']),
+            int(row['derived_count_1']),
+            int(row['window_size'])
+        )
+        generate_mpileup_sample(
+            int(row['coverage_2']),
+            int(row['derived_count_2']),
+            int(row['window_size'])
+        )
+        generate_sync_sample(
+            int(row['coverage_1']),
+            int(row['coverage_2']),
+            int(row['derived_count_1']),
+            int(row['derived_count_2']),
+            int(row['window_size'])
+        )
+
+def create_from_param_space(param_space):
+    print("Creating files")
+    for params in tqdm.tqdm(param_space):
+        n1, n2, c1, c2, k1, k2, w = params
+
+        generate_mpileup_sample(
+            int(c1),
+            int(k1),
+            int(w)
+        )
+        generate_mpileup_sample(
+            int(c2),
+            int(k2),
+            int(w)
+        )
+        generate_sync_sample(
+            int(c1),
+            int(c2),
+            int(k1),
+            int(k2),
+            int(w)
+        )
+
+def main():
+    # Read the tab-delimited table using pandas
+    infile = "independent_check_statistics.tsv"
+    df = pd.read_csv(infile, delimiter='\t')
+    # print(df)
+    create_from_df(df)
+
+if __name__ == '__main__':
+    main()
diff --git a/test/evaluate.py b/test/evaluate.py
new file mode 100755
index 0000000..0c8d33f
--- /dev/null
+++ b/test/evaluate.py
@@ -0,0 +1,241 @@
+#!/usr/bin/env python3
+
+# libraries
+import matplotlib
+matplotlib.use('TkAgg')
+
+import matplotlib.pyplot as plt
+import matplotlib.mlab as mlab
+import numpy as np
+import pandas as pd
+import seaborn as sns
+import os, sys
+import scipy as sp
+from matplotlib.ticker import FuncFormatter
+
+# ------------------------------------------------------------
+#     Settings
+# ------------------------------------------------------------
+
+# global names for our ins and outs
+out_dir_png = "figures_png"
+out_dir_pdf = "figures_pdf"
+out_dir_svg = "figures_svg"
+
+infile = "test_results.tsv"
+df = pd.read_csv(infile, sep='\t')
+
+# ------------------------------------------------------------
+#     Plotting
+# ------------------------------------------------------------
+
+plotnum = 1
+def plot_corr(
+    title, xlabel, ylabel, x, y, with_kernel = True
+    # markers='o', dashes=(0, ""), palette='k', **kwargs
+):
+    global plotnum
+    global out_dir_png
+    global out_dir_pdf
+    global out_dir_svg
+
+    # Get proper name and print it
+    testcase = title + '-' + xlabel + '-' + ylabel
+    testcase = testcase.lower().replace( " ", "-" )
+    testcase = ''.join(c for c in testcase if c.isalnum() or c == '-')
+    testcase = testcase.replace("--", "-")
+    # print("Plot: " + testcase)
+
+    # print(x)
+    # print(y)
+    if len(x) != len(y):
+        print("Warning: x has " + str(len(x)) + " rows, y has " + str(len(y)) + " rows")
+
+    # Remake data as one frame, because of reasons
+    # (being: we need to drop na, but in both at the same time)
+    df = pd.DataFrame(data={'x': x, 'y': y})
+    df = df[df['x'].notna()]
+    df = df[df['y'].notna()]
+    # print (df)
+
+    # Create dirs for results
+    if not os.path.exists(out_dir_png):
+        os.makedirs(out_dir_png)
+    if not os.path.exists(out_dir_pdf):
+        os.makedirs(out_dir_pdf)
+    if not os.path.exists(out_dir_svg):
+        os.makedirs(out_dir_svg)
+
+    # Compute the pearson correlation between both, and the mean squared error
+    pearson_r, pearson_p = sp.stats.pearsonr(x=df['x'], y=df['y'])
+    # mse = ((df['x'] - df['y'])**2).mean()
+    mae = (np.absolute(df['x'] - df['y'])).mean()
+
+    # Let's make a fancy density plot to be able to better see the dot density
+    # Also, we subset for the kernel computation, as it takes waaaay to long otherwise...
+    if with_kernel:
+        max_df_size = 25000
+        if len(df.index) > max_df_size:
+            print("  subsetting")
+            df = df.sample(n=max_df_size)
+        values = np.vstack([df['x'], df['y']])
+        colormap = plt.cm.viridis_r
+        print("  kernel")
+        kernel = sp.stats.gaussian_kde(values)(values)
+
+        # Make the plot
+        print("  plot", len(values[0]))
+        plt.figure( plotnum, figsize=(8,8) )
+        ax = sns.scatterplot(
+            x=df["x"], y=df["y"],
+            c=kernel,
+            cmap=colormap,
+            linewidth=0
+        )
+        # sns.kdeplot(
+        #      x=df["x"], y=df["y"], fill=True, cmap=colormap
+        # )
+    else:
+        # Make the plot
+        plt.figure( plotnum, figsize=(8,8) )
+        ax = sns.scatterplot(
+            x=df["x"], y=df["y"],
+            linewidth=0
+        )
+
+    # Make proper
+    ax.set_box_aspect(1)
+    ax.set_aspect('equal')
+
+    # Draw a line of x=y
+    x0, x1 = ax.get_xlim()
+    y0, y1 = ax.get_ylim()
+    lims = [max(x0, y0), min(x1, y1)]
+    ax.plot( lims, lims, 'lightgray', zorder=-1)
+    # ax.plot( lims, lims, 'k', alpha=0.2 )
+    # plt.plot([0, 1], [0, 1])
+
+    # annotate the pearson correlation coefficient text to 2 decimal places
+    plt.text(.05, .95, 'r={:.2f}'.format(pearson_r), transform=ax.transAxes)
+    # plt.text(.05, .92, 's={:.6f}'.format(mse), transform=ax.transAxes)
+    plt.text(.05, .92, 'MAE={:.4f}'.format(mae), transform=ax.transAxes)
+
+    # Nameing the plot and the axis.
+    ax.set_title( title )
+    ax.set_xlabel( xlabel )
+    ax.set_ylabel( ylabel )
+
+    # Save to files
+    plt.savefig( out_dir_png + "/" + testcase + ".png", format='png', bbox_inches="tight" )
+    # plt.savefig( out_dir_pdf + "/" + testcase + ".pdf", format='pdf', bbox_inches="tight" )
+    # plt.savefig( out_dir_svg + "/" + testcase + ".svg", format='svg', bbox_inches="tight" )
+    plt.close( plotnum )
+    plotnum += 1
+
+# ------------------------------------------------------------
+#     Evaluation
+# ------------------------------------------------------------
+
+# We run an evaluation that tests correlation and mean absolute error.
+# If either of them is out of the range of what we consider good,
+# we set the success flag to 1 (false), indicating failure in the return value of the script.
+success = 0
+def eval_corr( title, xlabel, ylabel, x, y ):
+    # print(title, xlabel, ylabel)
+
+    # Prepare the data frame. Similar to the plotting above.
+    df = pd.DataFrame(data={'x': x, 'y': y})
+    df = df[df['x'].notna()]
+    df = df[df['y'].notna()]
+
+    # Compute the pearson correlation between both, and the mean squared error.
+    # Again, same as above in the plotting. But well, we can live with that code duplication.
+    pearson_r, pearson_p = sp.stats.pearsonr(x=df['x'], y=df['y'])
+    mae = (np.absolute(df['x'] - df['y'])).mean()
+
+    # If we fail, we just set the flag. We do not abort yet,
+    # as we want all tests to run, so that we get all results.
+    # Makes it easier for debugging.
+    if pearson_r < 0.99 or mae > 0.001:
+        print("FAIL", title, xlabel, ylabel)
+        print("pearson_r == " + str(pearson_r))
+        print("mae == " + str(mae))
+        success = 1
+
+    # Lastly also create a plot for evaluation purposes/
+    plot_corr( title, xlabel, ylabel, x, y, with_kernel = False )
+
+# ------------------------------------------------------------
+#     grenedalf vs independent
+# ------------------------------------------------------------
+
+# Theta Pi
+eval_corr( "theta_pi_1", "grenedalf", "independent", df["grenedalf.theta_pi_1"], df["indep.theta_pi_1"] )
+eval_corr( "theta_pi_2", "grenedalf", "independent", df["grenedalf.theta_pi_2"], df["indep.theta_pi_2"] )
+
+# Theta W
+eval_corr( "theta_w_1", "grenedalf", "independent", df["grenedalf.theta_w_1"], df["indep.theta_w_1"] )
+eval_corr( "theta_w_2", "grenedalf", "independent", df["grenedalf.theta_w_2"], df["indep.theta_w_2"] )
+
+# Tajima's D
+eval_corr( "tajimas_d_1", "grenedalf", "independent", df["grenedalf.tajimas_d_1"], df["indep.tajimas_d_1"] )
+eval_corr( "tajimas_d_2", "grenedalf", "independent", df["grenedalf.tajimas_d_2"], df["indep.tajimas_d_2"] )
+
+# Fst
+eval_corr( "fst_nei", "grenedalf", "independent", df["grenedalf.fst_nei"], df["indep.fst_nei"] )
+eval_corr( "fst_hudson", "grenedalf", "independent", df["grenedalf.fst_hudson"], df["indep.fst_hudson"] )
+eval_corr( "fst_karlsson", "grenedalf", "independent", df["grenedalf.fst_karlsson"], df["indep.fst_karlsson"] )
+eval_corr( "fst_kofler", "grenedalf", "independent", df["grenedalf.fst_kofler"], df["indep.fst_kofler"] )
+
+# ------------------------------------------------------------
+#     grenedalf vs PoPoolation
+# ------------------------------------------------------------
+
+# By default, we do not run popoolation, to save time, and because that's not our job here.
+# Hence, we check here if the columns are filled. If not, we are done here.
+if len(df["popoolation.theta_pi_1"].value_counts()) == 0:
+    sys.exit( success )
+
+# Furthermore, in the tests below we do not run the evaluation function,
+# as we do not want to fail for PoPoolation reasons, and expect slight differences
+# in the numerical values. In particular Tajima's D is known to be bugged in PoPoolation.
+# So instead, we just plot the results here.
+
+# Theta Pi
+plot_corr( "theta_pi_1", "grenedalf", "popoolation", df["grenedalf.theta_pi_1"], df["popoolation.theta_pi_1"], with_kernel = False )
+plot_corr( "theta_pi_2", "grenedalf", "popoolation", df["grenedalf.theta_pi_2"], df["popoolation.theta_pi_2"], with_kernel = False )
+
+# Theta W
+plot_corr( "theta_w_1", "grenedalf", "popoolation", df["grenedalf.theta_w_1"], df["popoolation.theta_w_1"], with_kernel = False )
+plot_corr( "theta_w_2", "grenedalf", "popoolation", df["grenedalf.theta_w_2"], df["popoolation.theta_w_2"], with_kernel = False )
+
+# Tajima's D
+plot_corr( "tajimas_d_1", "grenedalf", "popoolation", df["grenedalf.tajimas_d_1"], df["popoolation.tajimas_d_1"], with_kernel = False )
+plot_corr( "tajimas_d_2", "grenedalf", "popoolation", df["grenedalf.tajimas_d_2"], df["popoolation.tajimas_d_2"], with_kernel = False )
+
+# Fst
+plot_corr( "fst_karlsson", "grenedalf", "popoolation", df["grenedalf.fst_karlsson"], df["popoolation.fst_karlsson"], with_kernel = False )
+plot_corr( "fst_kofler", "grenedalf", "popoolation", df["grenedalf.fst_kofler"], df["popoolation.fst_kofler"], with_kernel = False )
+
+# ------------------------------------------------------------
+#     independent vs PoPoolation
+# ------------------------------------------------------------
+
+# Theta Pi
+plot_corr( "theta_pi_1", "independent", "popoolation", df["indep.theta_pi_1"], df["popoolation.theta_pi_1"], with_kernel = False )
+plot_corr( "theta_pi_2", "independent", "popoolation", df["indep.theta_pi_2"], df["popoolation.theta_pi_2"], with_kernel = False )
+
+# Theta W
+plot_corr( "theta_w_1", "independent", "popoolation", df["indep.theta_w_1"], df["popoolation.theta_w_1"], with_kernel = False )
+plot_corr( "theta_w_2", "independent", "popoolation", df["indep.theta_w_2"], df["popoolation.theta_w_2"], with_kernel = False )
+
+# Tajima's D
+plot_corr( "tajimas_d_1", "independent", "popoolation", df["indep.tajimas_d_1"], df["popoolation.tajimas_d_1"], with_kernel = False )
+plot_corr( "tajimas_d_2", "independent", "popoolation", df["indep.tajimas_d_2"], df["popoolation.tajimas_d_2"], with_kernel = False )
+
+# Fst
+plot_corr( "fst_karlsson", "independent", "popoolation", df["indep.fst_karlsson"], df["popoolation.fst_karlsson"], with_kernel = False )
+plot_corr( "fst_kofler", "independent", "popoolation", df["indep.fst_kofler"], df["popoolation.fst_kofler"], with_kernel = False )
+
+# Final report of the success of this evaluation
+sys.exit( success )
diff --git a/test/execute_tests.py b/test/execute_tests.py
new file mode 100755
index 0000000..8e3fefa
--- /dev/null
+++ b/test/execute_tests.py
@@ -0,0 +1,397 @@
+#!/usr/bin/env python3
+
+# Import packages
+import subprocess
+import sys, os
+import glob
+from multiprocessing import Pool
+from functools import partial
+import datetime
+import pandas as pd
+import numpy as np
+import tqdm
+import warnings
+
+# Import the other scripts in this directory
+import independent_check
+import create_files
+
+# ------------------------------------------------------------
+#     Parameter Space
+# ------------------------------------------------------------
+
+# File to write to
+outtable = "test_results.tsv"
+
+# We here set up the param space, following the notation of independent_check.py
+
+# For two samples (1, 2):
+# n: pool sizes
+# c: coverages
+# f: derived frequencies
+# w: window sizes
+params_n1 = [10, 100]
+params_n2 = [5, 50]
+params_c1 = [10, 100]
+params_c2 = [5, 50]
+params_f1 = [0.1, 0.25, 0.5, 0.75, 0.9]
+params_f2 = [0.2, 0.4, 0.6, 0.8]
+params_w  = [1, 10, 100]
+
+param_space = []
+for n1 in params_n1:
+    for n2 in params_n2:
+        for c1 in params_c1:
+            for c2 in params_c2:
+                for f1 in params_f1:
+                    for f2 in params_f2:
+                        for w in params_w:
+                            # k: derived allele count
+                            k1 = round(f1*c1)
+                            k2 = round(f2*c2)
+                            param_space.append((n1, n2, c1, c2, k1, k2, w))
+
+# ------------------------------------------------------------
+#     Run Scipts
+# ------------------------------------------------------------
+
+# Run function, similar to the common/benchmarks.py function,
+# but without all the printing and measuring, which we do not need here.
+def run_script( script, args ):
+    # Run a test script.
+    # The function expects the script to be run, and a dict with key value pairs for the args.
+    # We run bash scripts containing the tests, and they parse these args again.
+
+    # Prepare the command to be run.
+    # We cannot use normal `time` on Ubuntu, as it is a wrapper that does not have the -v option.
+    # Need to run the underlying program instead.
+    script=os.path.realpath(script)
+    script_path = os.path.dirname(os.path.realpath(script))
+    script_name = os.path.basename(os.path.realpath(script))
+    script_args = " ".join([ key + "=" + str(val) for key, val in args.items() ])
+    command = "/usr/bin/time -v " + script + " " + script_args + " 2>&1"
+
+    # Start the process, capturing everything, and terminating on failure
+    try:
+        result = subprocess.run(
+            command, cwd=script_path,
+            capture_output=True, shell=True, text=True
+        )
+    except Exception as e:
+        print("FAILED")
+        print(e)
+    if result.returncode != 0:
+        print("FAILED")
+        print(result.stdout)
+        print(result.stderr)
+
+# ------------------------------------------------------------
+#     Independent Implementation
+# ------------------------------------------------------------
+
+def run_independent_implementation( params, df ):
+    """
+    params: the set of params to use, for simplicity as a tuple
+    df: the dataframe to which to write results to,
+        expected to have all columns already, and writing to the last row
+        (assuemd to be present and empty at the cells that we write)
+    """
+
+    # Set up the parameters as needed
+    # n: pool size
+    # c: coverage
+    # k: derived allele count
+    # w: window size
+    n1, n2, c1, c2, k1, k2, w = params
+    c1s = [c1] * w
+    c2s = [c2] * w
+    k1s = [k1] + [0] * (w-1)
+    k2s = [k2] + [0] * (w-1)
+    row_index = len(df) - 1
+
+    # Go through all statistics, compute them, and set them in the df columns of the last row
+    df.loc[row_index, 'indep.theta_pi_1']   = independent_check.theta_pi(n1, c1s, k1s, 2)
+    df.loc[row_index, 'indep.theta_pi_2']   = independent_check.theta_pi(n2, c2s, k2s, 2)
+    df.loc[row_index, 'indep.theta_w_1']    = independent_check.theta_w(n1, c1s, k1s, 2)
+    df.loc[row_index, 'indep.theta_w_2']    = independent_check.theta_w(n2, c2s, k2s, 2)
+    df.loc[row_index, 'indep.achaz_var_1']  = independent_check.achaz_var_d(n1, c1s, k1s, 2)
+    df.loc[row_index, 'indep.achaz_var_2']  = independent_check.achaz_var_d(n2, c2s, k2s, 2)
+    with warnings.catch_warnings():
+        # Some of the computations divide by zero, which spams our output...
+        warnings.simplefilter("ignore")
+        df.loc[row_index, 'indep.tajimas_d_1']  = independent_check.tajimas_d(n1, c1s, k1s, 2)
+        df.loc[row_index, 'indep.tajimas_d_2']  = independent_check.tajimas_d(n2, c2s, k2s, 2)
+    df.loc[row_index, 'indep.fst_nei']      = independent_check.fst_nei(n1, c1s, k1s, n2, c2s, k2s)
+    df.loc[row_index, 'indep.fst_hudson']   = independent_check.fst_hudson(n1, c1s, k1s, n2, c2s, k2s)
+    df.loc[row_index, 'indep.fst_karlsson'] = independent_check.fst_karlsson(n1, c1s, k1s, n2, c2s, k2s)
+    df.loc[row_index, 'indep.fst_kofler']   = independent_check.fst_kofler(n1, c1s, k1s, n2, c2s, k2s)
+
+# ------------------------------------------------------------
+#     grenedalf
+# ------------------------------------------------------------
+
+def get_grenedalf_result(file, column):
+    res = pd.read_csv(file, delimiter=',')
+    return float(res[column].iloc[0])
+
+def run_grenedalf_diversity_sample( n, c, k, w ):
+    file_id = f"mpileup-cov_{c}-der_{k}-win_{w}"
+    out_id = file_id + f"-pool_{n}"
+
+    args = {}
+    args["fileid"] = f"../mpileup/{file_id}.pileup"
+    args["poolsize"] = n
+    args["windowsize"] = w
+    args["outid"] = out_id
+
+    run_script("grenedalf/diversity.sh", args)
+    return out_id
+
+def run_grenedalf_diversity( n, c, k, w, df, sample ):
+    # n: pool size
+    # c: coverage
+    # k: derived allele count
+    # w: window size
+    row_index = len(df) - 1
+
+    # Run for all measures once. no need to repeat
+    out_id = run_grenedalf_diversity_sample( n, c, k, w )
+    file_id = f"mpileup-cov_{c}-der_{k}-win_{w}"
+
+    # Names in the table and in grenedalf are different
+    measure_name_map = {
+        "theta_pi":  file_id + ".1.theta_pi",
+        "theta_w":   file_id + ".1.theta_watterson",
+        "tajimas_d": file_id + ".1.tajimas_d"
+    }
+
+    for measure in [ "theta_pi", "theta_w", "tajimas_d" ]:
+        df_col = 'grenedalf.' + measure + '_' + str(sample)
+        tab_col = measure_name_map[measure]
+        val = get_grenedalf_result(f"grenedalf/diversity/diversity-{out_id}.csv", tab_col)
+        df.loc[row_index, df_col] = val
+
+def run_grenedalf_fst_sample( n1, n2, c1, c2, k1, k2, w, method ):
+    file_id = f"sync-cov1_{c1}-cov2_{c2}-der1_{k1}-der2_{k2}-win_{w}"
+    out_id = file_id + f"-pool1_{n1}-pool2_{n2}-method_{method}"
+
+    # We need to make a pool size file, and specify the two sample names
+    os.makedirs("poolsizes", exist_ok=True)
+    with open(f"poolsizes/{out_id}.pool", "w") as f:
+        f.write( f"{file_id}.1\t{n1}\n")
+        f.write( f"{file_id}.2\t{n2}\n")
+
+    args = {}
+    args["fileid"] = f"../sync/{file_id}.sync"
+    # args["poolsizes"] = '"' + str(n1) + ',' + str(n2) + '"'
+    args["poolsizes"] = f"../poolsizes/{out_id}.pool"
+    args["windowsize"] = w
+    args["method"] = method
+    args["outid"] = out_id
+
+    run_script("grenedalf/fst.sh", args)
+    return out_id
+
+def run_grenedalf_fst( n1, n2, c1, c2, k1, k2, w, df ):
+    # n: pool size
+    # c: coverage
+    # k: derived allele count
+    # w: window size
+    row_index = len(df) - 1
+    file_id = f"sync-cov1_{c1}-cov2_{c2}-der1_{k1}-der2_{k2}-win_{w}"
+
+    # Names in the table and in grenedalf are different
+    method_name_map = {
+        "unbiased-nei": "fst_nei",
+        "unbiased-hudson": "fst_hudson",
+        "karlsson": "fst_karlsson",
+        "kofler": "fst_kofler"
+    }
+
+    # Need to run for each method independently
+    for method in ["unbiased-nei","unbiased-hudson","kofler","karlsson"]:
+        df_col = 'grenedalf.' + method_name_map[method]
+        out_id = run_grenedalf_fst_sample( n1, n2, c1, c2, k1, k2, w, method )
+        tab_col = f"{file_id}.1:{file_id}.2.fst"
+        df.loc[row_index, df_col] = get_grenedalf_result(f"grenedalf/fst/fst-{out_id}.csv", tab_col)
+
+def run_grenedalf( params, df ):
+    # n: pool size
+    # c: coverage
+    # k: derived allele count
+    # w: window size
+    n1, n2, c1, c2, k1, k2, w = params
+    row_index = len(df) - 1
+
+    # Go through all statistics, compute them, and set them in the df columns of the last row
+    run_grenedalf_diversity( n1, c1, k1, w, df, 1 )
+    run_grenedalf_diversity( n2, c2, k2, w, df, 2 )
+    run_grenedalf_fst( n1, n2, c1, c2, k1, k2, w, df )
+
+# ------------------------------------------------------------
+#     PoPoolation
+# ------------------------------------------------------------
+
+def get_popoolation_result(file, trim_fst_pair=False):
+    with open(file) as f:
+        line = f.readline()
+    try:
+        # PoPoolation stores the resulting value in the last column.
+        # Go get it. If it's FST, we also need to remove the sample names.
+        last = line.split('\t')[-1]
+        if trim_fst_pair:
+            last = last.split('=')[-1]
+        val = float(last)
+    except Exception as e:
+        val = float("nan")
+    return val
+
+
+
+def run_popoolation_diversity_sample( n, c, k, w, measure ):
+    file_id = f"mpileup-cov_{c}-der_{k}-win_{w}"
+    out_id = file_id + f"-pool_{n}-measure_{measure}"
+
+    args = {}
+    args["fileid"] = f"../mpileup/{file_id}.pileup"
+    args["poolsize"] = n
+    args["windowsize"] = w
+    args["measure"] = measure
+    args["outid"] = out_id
+
+    run_script("popoolation/diversity.sh", args)
+    return out_id
+
+def run_popoolation_diversity( n, c, k, w, df, sample ):
+    # n: pool size
+    # c: coverage
+    # k: derived allele count
+    # w: window size
+    row_index = len(df) - 1
+
+    # Names in the table and in popoolation are different
+    measure_name_map = {
+        "theta_pi":  "pi",
+        "theta_w":   "theta",
+        "tajimas_d": "d"
+    }
+
+    # Need to run for each measure independently
+    for measure in [ "theta_pi", "theta_w", "tajimas_d" ]:
+        df_col = 'popoolation.' + measure + '_' + str(sample)
+        out_id = run_popoolation_diversity_sample( n, c, k, w, measure_name_map[measure] )
+        val = get_popoolation_result(f"popoolation/diversity/{out_id}")
+        df.loc[row_index, df_col] = val
+
+def run_popoolation_fst_sample( n1, n2, c1, c2, k1, k2, w, method ):
+    file_id = f"sync-cov1_{c1}-cov2_{c2}-der1_{k1}-der2_{k2}-win_{w}"
+    out_id = file_id + f"-pool1_{n1}-pool2_{n2}-method_{method}"
+
+    args = {}
+    args["fileid"] = f"../sync/{file_id}.sync"
+    args["poolsizes"] = '"' + str(n1) + ':' + str(n2) + '"'
+    args["windowsize"] = w
+    args["method"] = method
+    args["outid"] = out_id
+
+    run_script("popoolation/fst.sh", args)
+    return out_id
+
+def run_popoolation_fst( n1, n2, c1, c2, k1, k2, w, df ):
+    # n: pool size
+    # c: coverage
+    # k: derived allele count
+    # w: window size
+    row_index = len(df) - 1
+
+    # Names in the table and in popoolation are different
+    method_name_map = {
+        "karlsson": "fst_karlsson",
+        "kofler": "fst_kofler"
+    }
+
+    # Need to run for each method independently
+    for method in ["kofler","karlsson"]:
+        df_col = 'popoolation.' + method_name_map[method]
+        out_id = run_popoolation_fst_sample( n1, n2, c1, c2, k1, k2, w, method )
+        df.loc[row_index, df_col] = get_popoolation_result(f"popoolation/fst/{out_id}.fst", trim_fst_pair=True)
+
+def run_popoolation( params, df ):
+    # n: pool size
+    # c: coverage
+    # k: derived allele count
+    # w: window size
+    n1, n2, c1, c2, k1, k2, w = params
+    row_index = len(df) - 1
+
+    # Go through all statistics, compute them, and set them in the df columns of the last row
+    run_popoolation_diversity( n1, c1, k1, w, df, 1 )
+    run_popoolation_diversity( n2, c2, k2, w, df, 2 )
+    run_popoolation_fst( n1, n2, c1, c2, k1, k2, w, df )
+
+# ------------------------------------------------------------
+#     Main
+# ------------------------------------------------------------
+
+# Run everything but the bagel
+def main():
+    # Create the files that we need to run grenedalf and popoolation
+    create_files.create_from_param_space( param_space )
+
+    # Columns of the resulting dataframe that we are filling
+    fixed_cols = [
+        'pool_size_1', 'pool_size_2', 'coverage_1', 'coverage_2',
+        'derived_count_1', 'derived_count_2', 'window_size'
+    ]
+    indep_cols = [
+        'indep.theta_pi_1', 'indep.theta_pi_2', 'indep.theta_w_1', 'indep.theta_w_2',
+        'indep.achaz_var_1', 'indep.achaz_var_2', 'indep.tajimas_d_1', 'indep.tajimas_d_2',
+        'indep.fst_nei', 'indep.fst_hudson', 'indep.fst_karlsson', 'indep.fst_kofler'
+    ]
+    grenedalf_cols = [
+        'grenedalf.theta_pi_1', 'grenedalf.theta_pi_2',
+        'grenedalf.theta_w_1', 'grenedalf.theta_w_2',
+        'grenedalf.tajimas_d_1', 'grenedalf.tajimas_d_2',
+        'grenedalf.fst_nei', 'grenedalf.fst_hudson',
+        'grenedalf.fst_karlsson', 'grenedalf.fst_kofler'
+    ]
+    popoolation_cols = [
+        'popoolation.theta_pi_1', 'popoolation.theta_pi_2',
+        'popoolation.theta_w_1', 'popoolation.theta_w_2',
+        'popoolation.tajimas_d_1', 'popoolation.tajimas_d_2',
+        'popoolation.fst_karlsson', 'popoolation.fst_kofler'
+    ]
+
+    # Create the dataframe for all results
+    df = pd.DataFrame(columns=[fixed_cols + indep_cols + grenedalf_cols + popoolation_cols])
+
+    # Run all tests of the whole param space
+    print("Analyzing files")
+    for params in tqdm.tqdm(param_space):
+        # Set up the parameters as needed
+        n1, n2, c1, c2, k1, k2, w = params
+        # print("At", params)
+
+        # Add an empty row to the df, and get its index
+        df.loc[len(df)] = [np.nan] * len(df.columns)
+        row_index = len(df) - 1
+
+        # Add the param values to the cells of that row
+        df.loc[row_index, 'pool_size_1']     = n1
+        df.loc[row_index, 'pool_size_2']     = n2
+        df.loc[row_index, 'coverage_1']      = c1
+        df.loc[row_index, 'coverage_2']      = c2
+        df.loc[row_index, 'derived_count_1'] = k1
+        df.loc[row_index, 'derived_count_2'] = k2
+        df.loc[row_index, 'window_size']     = w
+
+        # Run all implementations
+        run_independent_implementation( params, df )
+        run_grenedalf( params, df )
+        # run_popoolation( params, df )
+
+    # Store the whole dataframe
+    df.to_csv(outtable, index=False, sep='\t')
+
+if __name__ == '__main__':
+    main()
diff --git a/test/grenedalf/diversity.sh b/test/grenedalf/diversity.sh
new file mode 100755
index 0000000..6786c7e
--- /dev/null
+++ b/test/grenedalf/diversity.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+
+# Parse the args. We use the key to set a variable named that way.
+for arg in "$@"; do
+    key=${arg%%=*}
+    val=${arg#*=}
+    eval "$key"='$val'
+done
+
+# Set the args that we need here
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+GRENEDALF="${SCRIPT_DIR}/../../bin/grenedalf"
+
+mkdir -p diversity
+mkdir -p logs
+
+echo "Start `date`"
+START=$(date +%s.%N)
+
+${GRENEDALF} diversity \
+    --pileup-path ${fileid} \
+    --window-type interval \
+    --window-interval-width ${windowsize} \
+    --filter-sample-min-count 2 \
+    --filter-sample-min-read-depth 2 \
+    --pileup-min-base-qual 0 \
+    --pool-sizes ${poolsize} \
+    --window-average-policy "window-length" \
+    --tajima-d-denominator-policy "provided-min-read-depth" \
+    --out-dir "diversity" \
+    --file-suffix "-${outid}" \
+    --na-entry nan \
+    --no-extra-columns \
+    --allow-file-overwriting \
+    > "logs/diversity-${outid}.log" 2>&1
+
+END=$(date +%s.%N)
+DIFF=$(echo "$END - $START" | bc)
+
+echo "End `date`"
+echo "Internal time: ${DIFF}"
diff --git a/test/grenedalf/fst.sh b/test/grenedalf/fst.sh
new file mode 100755
index 0000000..dc5a781
--- /dev/null
+++ b/test/grenedalf/fst.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+
+# Parse the args. We use the key to set a variable named that way.
+for arg in "$@"; do
+    key=${arg%%=*}
+    val=${arg#*=}
+    eval "$key"='$val'
+done
+
+# Set the args that we need here
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+GRENEDALF="${SCRIPT_DIR}/../../bin/grenedalf"
+
+mkdir -p fst
+mkdir -p logs
+
+echo "Start `date`"
+START=$(date +%s.%N)
+
+${GRENEDALF} fst \
+    --sync-path ${fileid} \
+    --window-type interval \
+    --window-interval-width ${windowsize} \
+    --pool-sizes ${poolsizes} \
+    --method ${method} \
+    --window-average-policy "window-length" \
+    --no-extra-columns \
+    --out-dir "fst" \
+    --file-suffix "-${outid}" \
+    --na-entry nan \
+    --allow-file-overwriting \
+    > "logs/fst-${outid}.log" 2>&1
+
+END=$(date +%s.%N)
+DIFF=$(echo "$END - $START" | bc)
+
+echo "End `date`"
+echo "Internal time: ${DIFF}"
diff --git a/test/independent_check.py b/test/independent_check.py
new file mode 100755
index 0000000..4aed6a1
--- /dev/null
+++ b/test/independent_check.py
@@ -0,0 +1,551 @@
+"""
+Independent python implementation of popgen estimators
+
+All equation numbers reference Version 2023-04-12 of
+the "Pool-Sequencing corrections for population genetics statistics"
+document.
+
+This script is intended as an independent reimplementation of those statistics
+by Jeffrey P. Spence (jspence@stanford.edu). As such the code is concise and
+tries to follow the notation in the equations document. It is not intended to
+be beautiful or easy to read or efficient.
+"""
+
+from typing import List
+import tqdm
+from pandas import DataFrame
+from numpy import sqrt
+from scipy.stats import binom
+
+# Speed up the repetitive part
+from functools import lru_cache
+
+@lru_cache(maxsize=None)
+def harmonic(n: float, power: float) -> float:
+    """Returns the nth harmonic number of type power"""
+    int_n = round(n)
+    return sum([1/x**power for x in range(1, int_n+1)])
+
+
+@lru_cache(maxsize=None)
+def binom_pmf(r, n, p):
+    return binom.pmf(r, n, p)
+
+
+@lru_cache(maxsize=None)
+def pi_within(
+    n1: int, c1: int, k1: int, n2: int, c2: int, k2: int
+) -> float:
+    """
+    Equation (32)
+
+    n1: number of individuals in pool 1
+    c1: number of reads in pool 1 (coverage)
+    k1: number of reads with "1" allele in pool 1
+    n2: number of individuals in pool 2
+    c2: number of reads in pool 2 (coverage)
+    k2: number of reads with "1" allele in pool 2
+    """
+    f11 = k1/c1
+    f10 = 1 - k1/c1
+    pi_one = n1 / (n1 - 1) * c1 / (c1 - 1) * (1 - f11**2 - f10**2)
+
+    f21 = k2/c2
+    f20 = 1 - k2/c2
+    pi_two = n2 / (n2 - 1) * c2 / (c2 - 1) * (1 - f21**2 - f20**2)
+
+    return (pi_one + pi_two) / 2.
+
+
+@lru_cache(maxsize=None)
+def pi_between(
+    n1: int, c1: int, k1: int, n2: int, c2: int, k2: int
+) -> float:
+    """
+    Equation (33)
+
+    n1: number of individuals in pool 1
+    c1: number of reads in pool 1 (coverage)
+    k1: number of reads with "1" allele in pool 1
+    n2: number of individuals in pool 2
+    c2: number of reads in pool 2 (coverage)
+    k2: number of reads with "1" allele in pool 2
+    """
+    f11 = k1/c1
+    f10 = 1 - k1/c1
+    f21 = k2/c2
+    f20 = 1 - k2/c2
+    return 1 - f11*f21 - f10*f20
+
+
+@lru_cache(maxsize=None)
+def pi_total(
+    n1: int, c1: int, k1: int, n2: int, c2: int, k2: int
+) -> float:
+    """
+    Equation (34)
+
+    n1: number of individuals in pool 1
+    c1: number of reads in pool 1 (coverage)
+    k1: number of reads with "1" allele in pool 1
+    n2: number of individuals in pool 2
+    c2: number of reads in pool 2 (coverage)
+    k2: number of reads with "1" allele in pool 2
+    """
+    pi_w = pi_within(n1, c1, k1, n2, c2, k2)
+    pi_b = pi_between(n1, c1, k1, n2, c2, k2)
+    return (pi_w + pi_b) / 2.
+
+
+@lru_cache(maxsize=None)
+def theta_pi_denom( n, c_ell, b ):
+    den = 0.
+    for m in range(b, c_ell - b + 1):
+        pre_factor = 2 * m * (c_ell - m) / c_ell / (c_ell - 1)
+        for k_val in range(1, n):
+            den += pre_factor * binom_pmf(m, c_ell, k_val/n) / k_val
+    return den
+
+def theta_pi(
+    n: int,
+    c: List[int],
+    k: List[int],
+    b: int
+) -> float:
+    """
+    Equation (13)
+
+    n: number of individuals in the pool
+    c: list of coverages at each position
+    k: number of derived alleles at each position
+    b: minimum number of minor alleles to consider
+    """
+    assert len(c) == len(k)
+    to_return = 0.
+    for c_ell, k_ell in zip(c, k):
+        if k_ell < b or (c_ell - k_ell) < b:
+            continue
+        f1 = k_ell / c_ell
+        f0 = 1 - f1
+        num = c_ell / (c_ell - 1) * (1 - f1**2 - f0**2)
+        den = theta_pi_denom( n, c_ell, b )
+        to_return += num / den
+    return to_return / len(c)
+
+
+@lru_cache(maxsize=None)
+def theta_w_denom( n, c_ell, b ):
+    den = 0.
+    for m in range(b, c_ell - b + 1):
+        for k_val in range(1, n):
+            den += binom_pmf(m, c_ell, k_val/n) / k_val
+    return den
+
+def theta_w(
+    n: int,
+    c: List[int],
+    k: List[int],
+    b: int
+) -> float:
+    """
+    Equation (16)
+
+    n: number of individuals in the pool
+    c: list of coverages at each position
+    k: number of derived alleles at each position
+    b: minimum number of minor alleles to consider
+    """
+    assert len(c) == len(k)
+    to_return = 0.
+    for c_ell, k_ell in zip(c, k):
+        if k_ell < b or (c_ell - k_ell) < b:
+            continue
+        to_return += 1.0 / theta_w_denom( n, c_ell, b )
+
+        # den = 0.
+        # for m in range(b, c_ell - b + 1):
+        #     for k_val in range(1, n):
+        #         den += binom_pmf(m, c_ell, k_val/n) / k_val
+        # to_return += 1/den
+
+    # Up until the equations document version 2023-08-02, we had an algebraic
+    # mistake in the equation here, where we had an additional harmonic a_n
+    # in the final estimator. It was just a mistake in the document, but made
+    # it into the code here (line below). We fixed this now, but keep this line
+    # here in order to back-check later in case that's needed.
+    # See also the `test-watterson` directory here for the simulation test
+    # that we did to make sure that this was just an algebraic mistake.
+    a1 = 1.0 # harmonic(n-1, 1)
+    return to_return / a1 / len(c)
+
+
+@lru_cache(maxsize=None)
+def f_star(n: int) -> float:
+    """Equation (20)"""
+    a1 = harmonic(n-1, 1)
+    return (n-3) / (a1 * (n-1) - n)
+
+
+def n_tilde(n: int, c: int) -> float:
+    """Equation (25)"""
+    return n * (1 - ((n-1)/n)**c)
+
+
+@lru_cache(maxsize=None)
+def alpha_star(n: int) -> float:
+    """Equation (21)"""
+    f = f_star(n)
+    a1 = harmonic(n-1, 1)
+    term_1 = f**2 * (a1 - n / (n-1))
+    term_2 = f * (a1*4*(n+1)/(n-1)**2 - 2*(n+3)/(n-1))
+    term_3 = -a1*8*(n+1)/n/(n-1)**2
+    term_4 = (n**2 + n + 60) / (3 * n * (n-1))
+    return term_1 + term_2 + term_3 + term_4
+
+
+@lru_cache(maxsize=None)
+def beta_star(n: int) -> float:
+    """Equation (22)"""
+    f = f_star(n)
+    a2 = harmonic(n-1, 2)
+    a1 = harmonic(n-1, 1)
+    term_1 = f**2 * (a2 - (2*n-1)/(n-1)**2)
+    term_2 = f * (a1*8/(n-1)
+                  - a1*4/n/(n-1)
+                  - (n**3 + 12*n**2 - 35*n + 18) / n / (n-1)**2)
+    term_3 = -a1*16/n/(n-1)
+    term_4 = a1*8/n**2/(n-1)
+    term_5 = 2 * (n**4 + 110*n**2 - 255*n + 126) / (9 * n**2 * (n-1)**2)
+    return term_1 + term_2 + term_3 + term_4 + term_5
+
+
+def achaz_var_d(
+    n: int,
+    c: List[int],
+    k: List[int],
+    b: int
+) -> float:
+    """
+    Equation (26)
+
+    n: number of individuals in the pool
+    c: list of coverages at each position
+    k: number of derived alleles at each position
+    b: minimum number of minor alleles to consider
+    """
+    assert b == 2
+    assert len(c) == len(k)
+    theta_hat_w = theta_w(n, c, k, b)
+    effective_n = n_tilde(n, b)
+    a_star = alpha_star(effective_n)
+    b_star = beta_star(effective_n)
+    return a_star / len(c) * theta_hat_w + b_star * theta_hat_w**2
+
+
+def tajimas_d(
+    n: int,
+    c: List[int],
+    k: List[int],
+    b: int
+) -> float:
+    """
+    Equation (26)
+
+    n: number of individuals in the pool
+    c: list of coverages at each position
+    k: number of derived alleles at each position
+    b: minimum number of minor alleles to consider
+    """
+    d_pool = theta_pi(n, c, k, b) - theta_w(n, c, k, b)
+    var_pool = achaz_var_d(n, c, k, b)
+    return d_pool / sqrt(var_pool)
+
+
+def fst_nei(
+    n1: int,
+    c1: List[int],
+    k1: List[int],
+    n2: int,
+    c2: List[int],
+    k2: List[int]
+) -> float:
+    """
+    Equation (37)
+
+    n1: number of individuals in pool 1
+    c1: list of coverages at each position in the window in pool 1
+    k1: list of number of derived alleles at each position in pool 1
+    n2: number of individuals in pool 2
+    c2: list of coverages at each position in the window in pool 2
+    k2: list of number of derived alleles at each position in pool 2
+    """
+    assert len(c1) == len(k1)
+    assert len(c1) == len(c2)
+    assert len(c1) == len(k2)
+    pi_ws = [pi_within(n1, c1ell, k1ell, n2, c2ell, k2ell)
+             for c1ell, k1ell, c2ell, k2ell in zip(c1, k1, c2, k2)]
+    pi_ts = [pi_total(n1, c1ell, k1ell, n2, c2ell, k2ell)
+             for c1ell, k1ell, c2ell, k2ell in zip(c1, k1, c2, k2)]
+    return 1 - sum(pi_ws) / sum(pi_ts)
+
+
+def fst_hudson(
+    n1: int,
+    c1: List[int],
+    k1: List[int],
+    n2: int,
+    c2: List[int],
+    k2: List[int]
+) -> float:
+    """
+    Equation (38)
+
+    n1: number of individuals in pool 1
+    c1: list of coverages at each position in the window in pool 1
+    k1: list of number of derived alleles at each position in pool 1
+    n2: number of individuals in pool 2
+    c2: list of coverages at each position in the window in pool 2
+    k2: list of number of derived alleles at each position in pool 2
+    """
+    assert len(c1) == len(k1)
+    assert len(c1) == len(c2)
+    assert len(c1) == len(k2)
+    pi_ws = [pi_within(n1, c1ell, k1ell, n2, c2ell, k2ell)
+             for c1ell, k1ell, c2ell, k2ell in zip(c1, k1, c2, k2)]
+    pi_bs = [pi_between(n1, c1ell, k1ell, n2, c2ell, k2ell)
+             for c1ell, k1ell, c2ell, k2ell in zip(c1, k1, c2, k2)]
+    return 1 - sum(pi_ws) / sum(pi_bs)
+
+
+def karlsson_h(n: int, c: int, k: int) -> float:
+    """
+    Unlabeled equation between (50) and (51)
+
+    n: number of individuals in pool (intentionally unused)
+    c: coverage
+    k: number of derived alleles
+    """
+    u = k/c
+    v = 1 - k/c
+    return c / (c-1) * u * v
+
+
+def karlsson_n(
+    n1: int, c1: int, k1: int, n2: int, c2: int, k2: int
+) -> float:
+    """
+    Equation (49)
+
+    n1: number of individuals in pool 1
+    c1: list of coverages at each position in the window in pool 1
+    k1: list of number of derived alleles at each position in pool 1
+    n2: number of individuals in pool 2
+    c2: list of coverages at each position in the window in pool 2
+    k2: list of number of derived alleles at each position in pool 2
+    """
+    u1 = k1/c1
+    u2 = k2/c2
+    h1 = karlsson_h(n1, c1, k1)
+    h2 = karlsson_h(n2, c2, k2)
+    return (u1 - u2)**2 - (h1/c1 + h2/c2)
+
+
+def karlsson_d(
+    n1: int, c1: int, k1: int, n2: int, c2: int, k2: int
+) -> float:
+    """
+    Equation (50)
+
+    n1: number of individuals in pool 1
+    c1: list of coverages at each position in the window in pool 1
+    k1: list of number of derived alleles at each position in pool 1
+    n2: number of individuals in pool 2
+    c2: list of coverages at each position in the window in pool 2
+    k2: list of number of derived alleles at each position in pool 2
+    """
+    h1 = karlsson_h(n1, c1, k1)
+    h2 = karlsson_h(n2, c2, k2)
+    return karlsson_n(n1, c1, k1, n2, c2, k2) + h1 + h2
+
+
+def fst_karlsson(
+    n1: int,
+    c1: List[int],
+    k1: List[int],
+    n2: int,
+    c2: List[int],
+    k2: List[int]
+) -> float:
+    """
+    Equation (51)
+
+    n1: number of individuals in pool 1
+    c1: list of coverages at each position in the window in pool 1
+    k1: list of number of derived alleles at each position in pool 1
+    n2: number of individuals in pool 2
+    c2: list of coverages at each position in the window in pool 2
+    k2: list of number of derived alleles at each position in pool 2
+    """
+    assert len(c1) == len(k1)
+    assert len(c1) == len(c2)
+    assert len(c1) == len(k2)
+    ns = [karlsson_n(n1, c1ell, k1ell, n2, c2ell, k2ell)
+          for c1ell, k1ell, c2ell, k2ell in zip(c1, k1, c2, k2)]
+    ds = [karlsson_d(n1, c1ell, k1ell, n2, c2ell, k2ell)
+          for c1ell, k1ell, c2ell, k2ell in zip(c1, k1, c2, k2)]
+    return sum(ns) / sum(ds)
+
+
+def pi_kofler(n: int, c: int, k: float) -> float:
+    """
+    Equations (39), (40), and (41)
+
+    n: number of individuals in pool (intentionally unused)
+    c: coverage
+    k: number of derived alleles
+    """
+    return c / (c - 1) * (1 - (k/c)**2 - (1 - k/c)**2)
+
+
+def fst_kofler(
+    n1: int,
+    c1: List[int],
+    k1: List[int],
+    n2: int,
+    c2: List[int],
+    k2: List[int]
+) -> float:
+    """
+    Equations (42), (43), (44), and (45)
+
+    n1: number of individuals in pool 1
+    c1: list of coverages at each position in the window in pool 1
+    k1: list of number of derived alleles at each position in pool 1
+    n2: number of individuals in pool 2
+    c2: list of coverages at each position in the window in pool 2
+    k2: list of number of derived alleles at each position in pool 2
+    """
+    pi_w1 = n1 / (n1 - 1) * sum(
+        [pi_kofler(n1, c1ell, k1ell) for c1ell, k1ell in zip(c1, k1)]
+    )
+    pi_w2 = n2 / (n2 - 1) * sum(
+        [pi_kofler(n1, c2ell, k2ell) for c2ell, k2ell in zip(c2, k2)]
+    )
+    min_n = min([n1, n2])
+    min_c = [min([c1ell, c2ell]) for c1ell, c2ell in zip(c1, c2)]
+    fs = [(k1ell/c1ell + k2ell/c2ell) / 2
+          for k1ell, c1ell, k2ell, c2ell in zip(k1, c1, k2, c2)]
+    pi_t = min_n / (min_n - 1) * sum(
+        [pi_kofler(min_n, c_ell, c_ell*f) for c_ell, f in zip(min_c, fs)]
+    )
+
+    return (pi_t - (pi_w1 + pi_w2)/2) / pi_t
+
+
+def main():
+    n1_list = []
+    n2_list = []
+    c1_list = []
+    c2_list = []
+    k1_list = []
+    k2_list = []
+    w_list = []
+    theta_pi_one_list = []
+    theta_pi_two_list = []
+    theta_w_one_list = []
+    theta_w_two_list = []
+    achaz_var_d_one_list = []
+    achaz_var_d_two_list = []
+    tajimas_d_one_list = []
+    tajimas_d_two_list = []
+    fst_nei_list = []
+    fst_hudson_list = []
+    fst_karlsson_list = []
+    fst_kofler_list = []
+
+    run_vars = []
+    for n1 in [10, 100]:
+        for n2 in [5, 50]:
+            for c1 in [10, 100]:
+                for c2 in [5, 50]:
+                    for f1 in [0.1, 0.25, 0.5, 0.75, 0.9]:
+                        for f2 in [0.2, 0.4, 0.6, 0.8]:
+                            for w in [1, 10, 100]:
+                                k1 = round(f1*c1)
+                                k2 = round(f2*c2)
+                                run_vars.append((n1, n2, c1, c2, k1, k2, w))
+
+    for run_var in tqdm.tqdm(run_vars):
+        n1, n2, c1, c2, k1, k2, w = run_var
+        n1_list.append(n1)
+        n2_list.append(n2)
+        c1_list.append(c1)
+        c2_list.append(c2)
+        k1_list.append(k1)
+        k2_list.append(k2)
+        w_list.append(w)
+        c1s = [c1] * w
+        c2s = [c2] * w
+        k1s = [k1] + [0] * (w-1)
+        k2s = [k2] + [0] * (w-1)
+        theta_pi_one_list.append(
+            theta_pi(n1, c1s, k1s, 2)
+        )
+        theta_pi_two_list.append(
+            theta_pi(n2, c2s, k2s, 2)
+        )
+        theta_w_one_list.append(
+            theta_w(n1, c1s, k1s, 2)
+        )
+        theta_w_two_list.append(
+            theta_w(n2, c2s, k2s, 2)
+        )
+        achaz_var_d_one_list.append(
+            achaz_var_d(n1, c1s, k1s, 2)
+        )
+        achaz_var_d_two_list.append(
+            achaz_var_d(n2, c2s, k2s, 2)
+        )
+        tajimas_d_one_list.append(
+            tajimas_d(n1, c1s, k1s, 2)
+        )
+        tajimas_d_two_list.append(
+            tajimas_d(n2, c2s, k2s, 2)
+        )
+        fst_nei_list.append(
+            fst_nei(n1, c1s, k1s, n2, c2s, k2s)
+        )
+        fst_hudson_list.append(
+            fst_hudson(n1, c1s, k1s, n2, c2s, k2s)
+        )
+        fst_karlsson_list.append(
+            fst_karlsson(n1, c1s, k1s, n2, c2s, k2s)
+        )
+        fst_kofler_list.append(
+            fst_kofler(n1, c1s, k1s, n2, c2s, k2s)
+        )
+    to_save = DataFrame()
+    to_save['pool_size_1'] = n1_list
+    to_save['pool_size_2'] = n2_list
+    to_save['coverage_1'] = c1_list
+    to_save['coverage_2'] = c2_list
+    to_save['derived_count_1'] = k1_list
+    to_save['derived_count_2'] = k2_list
+    to_save['window_size'] = w_list
+    to_save['theta_pi_1'] = theta_pi_one_list
+    to_save['theta_pi_2'] = theta_pi_two_list
+    to_save['theta_w_1'] = theta_w_one_list
+    to_save['theta_w_2'] = theta_w_two_list
+    to_save['achaz_var_1'] = achaz_var_d_one_list
+    to_save['achaz_var_2'] = achaz_var_d_two_list
+    to_save['tajimas_d_1'] = tajimas_d_one_list
+    to_save['tajimas_d_2'] = tajimas_d_two_list
+    to_save['fst_nei'] = fst_nei_list
+    to_save['fst_hudson'] = fst_hudson_list
+    to_save['fst_karlsson'] = fst_karlsson_list
+    to_save['fst_kofler'] = fst_kofler_list
+    to_save.to_csv('independent_check_statistics.tsv', index=False, sep='\t')
+
+
+if __name__ == '__main__':
+    main()
diff --git a/test/pip.txt b/test/pip.txt
new file mode 100644
index 0000000..ed29c05
--- /dev/null
+++ b/test/pip.txt
@@ -0,0 +1,7 @@
+matplotlib
+numpy
+pandas
+scipy
+seaborn
+tqdm
+typing-extensions
diff --git a/test/popoolation/diversity.sh b/test/popoolation/diversity.sh
new file mode 100755
index 0000000..dfcdae1
--- /dev/null
+++ b/test/popoolation/diversity.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+
+# Parse the args. We use the key to set a variable named that way.
+for arg in "$@"; do
+    key=${arg%%=*}
+    val=${arg#*=}
+    eval "$key"='$val'
+done
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+POPOOL="${SCRIPT_DIR}/popoolation"
+
+mkdir -p diversity
+mkdir -p logs
+# rm ${MEASURE}/*
+
+START=$(date +%s.%N)
+echo "Start `date`"
+
+perl ${POPOOL}/Variance-sliding.pl \
+    --input ${fileid} \
+    --output "diversity/${outid}" \
+    --measure ${measure} \
+    --window-size ${windowsize} \
+    --step-size ${windowsize} \
+    --fastq-type sanger \
+    --min-qual 0 \
+    --pool-size ${poolsize} \
+    --min-count 2 \
+    --min-coverage 4 \
+    --max-coverage 500 \
+    --min-covered-fraction 0.0 \
+    --no-discard-deletions \
+    > logs/${outid}.log 2>&1
+
+END=$(date +%s.%N)
+DIFF=$(echo "$END - $START" | bc)
+
+echo "End `date`"
+echo "Internal time: ${DIFF}"
diff --git a/test/popoolation/fst.sh b/test/popoolation/fst.sh
new file mode 100755
index 0000000..e6ece36
--- /dev/null
+++ b/test/popoolation/fst.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+# Parse the args. We use the key to set a variable named that way.
+for arg in "$@"; do
+    key=${arg%%=*}
+    val=${arg#*=}
+    eval "$key"='$val'
+done
+
+METHOD=$method
+if [[ "$METHOD" == "karlsson" ]]; then
+    METHODSTR="--karlsson-fst"
+else
+    METHODSTR=""
+fi
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+POPOOL="${SCRIPT_DIR}/popoolation2"
+
+mkdir -p fst
+mkdir -p logs
+# rm fst/*
+
+echo "Start `date`"
+START=$(date +%s.%N)
+
+perl ${POPOOL}/fst-sliding.pl \
+    --input ${fileid} \
+    ${METHODSTR} \
+    --output "fst/${outid}.fst" \
+    --window-size ${windowsize} \
+    --step-size ${windowsize} \
+    --pool-size ${poolsizes} \
+    --min-count 2 \
+    --min-coverage 4 \
+    --max-coverage 100 \
+    --min-covered-fraction 0 \
+    > logs/fst-${outid}.log 2>&1
+
+END=$(date +%s.%N)
+DIFF=$(echo "$END - $START" | bc)
+
+echo "End `date`"
+echo "Internal time: ${DIFF}"
diff --git a/test/run.sh b/test/run.sh
new file mode 100755
index 0000000..394e566
--- /dev/null
+++ b/test/run.sh
@@ -0,0 +1,17 @@
+#!/bin/bash
+
+SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
+
+echo "Running execute_tests.py"
+${SCRIPT_DIR}/execute_tests.py
+if [ $? -ne 0 ]; then
+    echo "FAIL"
+    return 1
+fi
+
+echo "Running evaluate.py"
+${SCRIPT_DIR}/evaluate.py
+if [ $? -ne 0 ]; then
+    echo "FAIL"
+    return 1
+fi