diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
new file mode 100644
index 0000000..1c10fd3
--- /dev/null
+++ b/.devcontainer/devcontainer.json
@@ -0,0 +1,33 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the
+// README at: https://github.com/devcontainers/templates/tree/main/src/docker-existing-dockerfile
+{
+  "name": "Anglerfish",
+  "build": {
+    // Sets the run context to one level up instead of the .devcontainer folder.
+    "context": "..",
+    // Update the 'dockerFile' property if you aren't using the standard 'Dockerfile' filename.
+    "dockerfile": "../Dockerfile",
+    // Use devcontainer target
+    "target": "devcontainer",
+  },
+  "initializeCommand": "echo 'Workaround, see github https://github.com/microsoft/vscode-remote-release/issues/9302#issuecomment-1854476541'",
+  "features": {},
+  "customizations": {
+    "vscode": {
+      "extensions": [
+        "esbenp.prettier-vscode",
+        "wholroyd.jinja",
+        "ms-python.python",
+        "charliermarsh.ruff@2024.2.0",
+        "ms-azuretools.vscode-docker",
+      ],
+    },
+  },
+  // Use 'forwardPorts' to make a list of ports inside the container available locally.
+  // "forwardPorts": [],
+  // "postCreateCommand": "pip3 install -e .",
+  // Configure tool-specific properties.
+  // "customizations": {},
+  // Uncomment to connect as an existing user other than the container default. More info: https://aka.ms/dev-containers-non-root.
+  // "remoteUser": "devcontainer"
+}
diff --git a/.github/workflows/anglerfish.yml b/.github/workflows/anglerfish.yml
index 0fed1f1..0bef153 100644
--- a/.github/workflows/anglerfish.yml
+++ b/.github/workflows/anglerfish.yml
@@ -19,6 +19,7 @@ jobs:
             python=${{ matrix.python-version }}
             pip
           environment-file: environment.yml
+          environment-name: anglerfish-dev
 
       # Install Anglerfish
       - shell: bash -l {0}
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
index 09aaac5..4162f50 100644
--- a/.pre-commit-config.yaml
+++ b/.pre-commit-config.yaml
@@ -1,7 +1,7 @@
 # .pre-commit-config.yaml
 repos:
   - repo: https://github.com/astral-sh/ruff-pre-commit
-    rev: v0.1.6
+    rev: v0.1.13
     hooks:
       - id: ruff
       - id: ruff-format
diff --git a/Dockerfile b/Dockerfile
index 445b676..53aad9f 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,16 +1,59 @@
-FROM mambaorg/micromamba
+FROM mambaorg/micromamba as base
 
 LABEL author="Remi-Andre Olsen" \
       description="Anglerfish development version" \
       maintainer="remi-andre.olsen@scilifelab.se"
 
 USER root
+
+# Check for arm64 architecture to install minimap2
+ARG TARGETARCH
+ENV MINIMAP_VERSION=2.26
+RUN if [ "$TARGETARCH" = "arm64" ]; then \
+      # Install compliation tools for minimap2
+      apt-get update;\
+      apt-get install -y curl build-essential libz-dev;\
+      # Download minimap2
+      curl -L https://github.com/lh3/minimap2/archive/refs/tags/v${MINIMAP_VERSION}.tar.gz | tar -zxvf - ;\
+      # Compile minimap2
+      cd minimap2-${MINIMAP_VERSION};\
+      make arm_neon=1 aarch64=1; \
+      # Add to path
+      mv minimap2 /usr/local/bin/;\
+    fi
+
 COPY --chown=$MAMBA_USER:$MAMBA_USER environment.yml /
-RUN micromamba env create -n anglerfish && micromamba install -y -n anglerfish -f /environment.yml && micromamba clean --all --yes 
-ENV PATH /opt/conda/envs/anglerfish/bin:$PATH
+COPY --chown=$MAMBA_USER:$MAMBA_USER requirements.txt /
+
+# Remove minimap2 from environment.yml for arm64
+RUN if [ "$TARGETARCH" = "arm64" ]; then \
+      grep -v 'minimap2' /environment.yml > /environment.tmp.yml ;\
+    else \
+      cp /environment.yml /environment.tmp.yml ;\
+    fi ;\
+    chown $MAMBA_USER:$MAMBA_USER /environment.tmp.yml
 
 # Add source files to the container
 ADD . /usr/src/anglerfish
 WORKDIR /usr/src/anglerfish
-RUN eval "$(micromamba shell hook --shell bash)" && micromamba activate anglerfish && python -m pip install .[dev]
+
+# Activate the environment
+ARG MAMBA_DOCKERFILE_ACTIVATE=1
+RUN micromamba install -y -n base -f /environment.tmp.yml && micromamba clean --all --yes
+
+#####
+# Devcontainer
+#####
+FROM base as devcontainer
+
+# Useful tools for devcontainer
+RUN apt-get update;\
+    apt-get install -y git vim
+RUN eval "$(micromamba shell hook --shell bash)" && python -m pip install -e .[dev]
+
+#####
+# Main
+#####
+FROM base as main
 USER $MAMBA_USER
+RUN eval "$(micromamba shell hook --shell bash)" && python -m pip install .[dev]
\ No newline at end of file
diff --git a/README.md b/README.md
index 7de2e87..be02c73 100644
--- a/README.md
+++ b/README.md
@@ -33,12 +33,25 @@ conda install -c bioconda anglerfish
 pip install --upgrade --force-reinstall git+https://github.com/remiolsen/anglerfish.git
 ```
 
+### For Arm64 processors (e.g. Apple M2)
+
+Anglerfish depends on minimap2 which needs to be [compiled from source](https://github.com/lh3/minimap2?tab=readme-ov-file#installation) for Arm64 processors.
+When minimap2 is compiled and available on $PATH, anglerfish can be installed with pip:
+
+```shell
+pip install bio-anglerfish
+```
+
+Additionaly, if Docker is your cup of tea, the Dockerfile supplied in this repository should also work on both arm64 and x86 processors.
+
 ## Source development
 
 1. [Install miniconda](https://docs.conda.io/en/latest/miniconda.html).
 
 2. Set up repo clone with editable install
 
+For x86 processors (e.g. Intel/AMD):
+
 ```
 git clone https://github.com/remiolsen/anglerfish.git
 cd anglerfish
@@ -49,6 +62,18 @@ conda activate anglerfish-dev
 pip install -e ".[dev]"
 ```
 
+For Arm64 processors (e.g. Apple M1/M2). First [compile and install minimap2 manually](https://github.com/lh3/minimap2?tab=readme-ov-file#installation), then:
+
+```
+git clone https://github.com/remiolsen/anglerfish.git
+cd anglerfish
+# Create a the anglerfish conda environment (but remove minimap2)
+conda env create -f <(grep -v minimap2 environment.yml)
+# Install anglerfish
+conda activate anglerfish-dev
+pip install -e ".[dev]"
+```
+
 3. (Optional) Install pre-commit to prevent committing code that will fail linting
 
 ```
@@ -146,6 +171,16 @@ In folder `anglerfish_????_??_??_?????/`
 - `anglerfish_stats.txt` Barcode statistics from anglerfish run
 - `anglerfish_stats.json` Machine readable anglerfish statistics
 
+## Anglerfish Explore (Experimental)
+
+`anglerfish explore` is a command that aims to explore a sequencing pool without a given samplesheet and give hints on what adapter types are present, which index lenghts are used and whether there are any UMIs within the index sequence. The Anglerfish explore command is still under heavy development but can be triggered by running:
+
+```shell
+python anglerfish/explore/cli.py
+```
+
+inside the anglerfish directory.
+
 ## Credits
 
 The Anglerfish code was written by [@remiolsen](https://github.com/remiolsen) but it would not exist without the contributions of [@FranBonath](https://github.com/FranBonath), [@taborsak](https://github.com/taborsak), [@ssjunnebo](https://github.com/ssjunnebo) and Carl Rubin.
diff --git a/anglerfish/demux/adaptor.py b/anglerfish/demux/adaptor.py
new file mode 100644
index 0000000..8db7b66
--- /dev/null
+++ b/anglerfish/demux/adaptor.py
@@ -0,0 +1,69 @@
+import importlib
+import os
+
+import yaml
+
+
+class Adaptor:
+    def __init__(self, adaptors, delim, adaptor, i7_index=None, i5_index=None):
+        self.i7 = AdaptorPart(adaptors[adaptor]["i7"], adaptor, delim, i7_index)
+        self.i5 = AdaptorPart(adaptors[adaptor]["i5"], adaptor, delim, i5_index)
+        self.name = f"{adaptor}"
+        self.delim = delim
+
+    def get_i7_mask(self, insert_Ns=True):
+        return self.i7.get_mask(insert_Ns)
+
+    def get_i5_mask(self, insert_Ns=True):
+        return self.i5.get_mask(insert_Ns)
+
+    def get_fastastring(self, insert_Ns=True):
+        fasta_i5 = f">{self.name}_i5\n{self.get_i5_mask(insert_Ns)}\n"
+        fasta_i7 = f">{self.name}_i7\n{self.get_i7_mask(insert_Ns)}\n"
+        return fasta_i5 + fasta_i7
+
+
+class AdaptorPart:
+    def __init__(self, sequence, name, delim, index):
+        self.sequence = sequence
+        self.name = name
+        self.delim = delim
+        self.index = index
+
+    def has_index(self):
+        return self.sequence.find(self.delim) > -1
+
+    def len_before_index(self):
+        return self.sequence.find(self.delim)
+
+    def len_after_index(self):
+        return len(self.sequence) - self.sequence.find(self.delim) - len(self.delim)
+
+    def get_mask(self, insert_Ns):
+        if self.has_index():
+            if not insert_Ns:
+                return self.sequence.replace(self.delim, "")
+            else:
+                return self.sequence.replace(self.delim, "N" * len(self.index))
+        else:
+            return self.sequence
+
+
+# Fetch all adaptors
+def load_adaptors(raw=False):
+    p = importlib.resources.files("anglerfish.config").joinpath("adaptors.yaml")
+    assert isinstance(p, os.PathLike)
+
+    adaptors_raw = []
+    with open(p) as f:
+        adaptors_raw = yaml.safe_load(f)
+
+    if raw:
+        return adaptors_raw
+    adaptors = []
+    for adaptor in adaptors_raw:
+        adaptors.append(
+            Adaptor(adaptors_raw, "-NNN-", adaptor, i7_index=None, i5_index=None)
+        )
+
+    return adaptors
diff --git a/anglerfish/demux/demux.py b/anglerfish/demux/demux.py
index 444bb01..db43cac 100644
--- a/anglerfish/demux/demux.py
+++ b/anglerfish/demux/demux.py
@@ -24,7 +24,7 @@ def parse_cs(cs_string, index, max_distance):
     return nts, lev.distance(index.lower(), nts)
 
 
-def run_minimap2(fastq_in, indexfile, output_paf, threads):
+def run_minimap2(fastq_in, indexfile, output_paf, threads, minimap_b=1):
     """
     Runs Minimap2
     """
@@ -36,9 +36,10 @@ def run_minimap2(fastq_in, indexfile, output_paf, threads):
         "10",
         "-w",
         "5",
-        "-B1",
-        "-A6",
-        "--dual=no",
+        "-A",
+        "6",
+        "-B",
+        str(minimap_b),
         "-c",
         "-t",
         str(threads),
@@ -46,14 +47,18 @@ def run_minimap2(fastq_in, indexfile, output_paf, threads):
         fastq_in,
     ]
 
-    with open(output_paf, "ab") as ofile:
-        subprocess.run(cmd, stdout=ofile, check=True)
+    run_log = f"{output_paf}.log"
+    with open(output_paf, "ab") as ofile, open(run_log, "ab") as log_file:
+        p1 = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=log_file)
+        subprocess.run("sort", stdin=p1.stdout, stdout=ofile, check=True)
 
 
-def parse_paf_lines(paf, min_qual=10):
+def parse_paf_lines(paf, min_qual=10, complex_identifier=False):
     """
     Read and parse one paf alignment lines.
     Returns a dict with the import values for later use
+    If complex_identifier is True (default False), the keys will be on the form
+    "{read}_{i5_or_i7}_{strand_str}"
     """
     entries = {}
     with open(paf) as paf:
@@ -62,17 +67,28 @@ def parse_paf_lines(paf, min_qual=10):
             try:
                 # TODO: objectify this
                 entry = {
+                    "read": aln[0],
                     "adapter": aln[5],
                     "rlen": int(aln[1]),  # read length
                     "rstart": int(aln[2]),  # start alignment on read
                     "rend": int(aln[3]),  # end alignment on read
                     "strand": aln[4],
+                    "cg": aln[-2],  # cigar string
                     "cs": aln[-1],  # cs string
                     "q": int(aln[11]),  # Q score
                     "iseq": None,
                     "sample": None,
                 }
-                read = aln[0]
+                read = entry["read"]
+                if complex_identifier:
+                    i5_or_i7 = entry["adapter"].split("_")[-1]
+                    if entry["strand"] == "+":
+                        strand_str = "positive"
+                    else:
+                        strand_str = "negative"
+                    ix = f"{read}_{i5_or_i7}_{strand_str}"
+                else:
+                    ix = read
             except IndexError:
                 log.debug(f"Could not find all paf columns: {read}")
                 continue
@@ -81,10 +97,10 @@ def parse_paf_lines(paf, min_qual=10):
                 log.debug(f"Low quality alignment: {read}")
                 continue
 
-            if read in entries.keys():
-                entries[read].append(entry)
+            if ix in entries.keys():
+                entries[ix].append(entry)
             else:
-                entries[read] = [entry]
+                entries[ix] = [entry]
 
     return entries
 
@@ -166,14 +182,14 @@ def cluster_matches(
         fi7 = ""
         for _, adaptor, _ in sample_adaptor:
             try:
-                i5_seq = adaptor.i5_index
+                i5_seq = adaptor.i5.index
                 if i5_reversed and i5_seq is not None:
                     i5_seq = str(Seq(i5_seq).reverse_complement())
                 fi5, d1 = parse_cs(i5["cs"], i5_seq, max_distance)
             except AttributeError:
                 d1 = 0  # presumably it's single index, so no i5
 
-            i7_seq = adaptor.i7_index
+            i7_seq = adaptor.i7.index
             if i7_reversed and i7_seq is not None:
                 i7_seq = str(Seq(i7_seq).reverse_complement())
             fi7, d2 = parse_cs(i7["cs"], i7_seq, max_distance)
@@ -189,8 +205,8 @@ def cluster_matches(
             continue
         if dists[index_min] > max_distance:
             # Find only full length i7(+i5) adaptor combos. Basically a list of "known unknowns"
-            if len(fi7) + len(fi5) == len(adaptor.i7_index or "") + len(
-                adaptor.i5_index or ""
+            if len(fi7) + len(fi5) == len(adaptor.i7.index or "") + len(
+                adaptor.i5.index or ""
             ):
                 fi75 = "+".join([i for i in [fi7, fi5] if not i == ""])
                 unmatched_bed.append([read, start_insert, end_insert, fi75, "999", "."])
diff --git a/anglerfish/demux/report.py b/anglerfish/demux/report.py
index 456ff3b..46923e0 100644
--- a/anglerfish/demux/report.py
+++ b/anglerfish/demux/report.py
@@ -97,8 +97,8 @@ def write_dataframe(self, outdir, samplesheet):
                 sen_dict = asdict(sentry)
                 if sen_dict["sample_name"] == s_dict["sample_name"]:
                     s_dict["adaptor_name"] = sen_dict["adaptor"].name
-                    s_dict["i7_index"] = sen_dict["adaptor"].i7_index
-                    s_dict["i5_index"] = sen_dict["adaptor"].i5_index
+                    s_dict["i7_index"] = sen_dict["adaptor"].i7.index
+                    s_dict["i5_index"] = sen_dict["adaptor"].i5.index
             out_list.append(s_dict)
         for key, unmatch in self.unmatched_stats.items():
             for unmatch_sample in unmatch:
diff --git a/anglerfish/demux/samplesheet.py b/anglerfish/demux/samplesheet.py
index ed0c2d6..b62c53f 100644
--- a/anglerfish/demux/samplesheet.py
+++ b/anglerfish/demux/samplesheet.py
@@ -1,19 +1,15 @@
 import csv
 import glob
-import importlib.resources
-import os
 import re
 from dataclasses import dataclass
 from itertools import combinations
 
 import Levenshtein as lev
-import yaml
 
-p = importlib.resources.files("anglerfish.config").joinpath("adaptors.yaml")
-assert isinstance(p, os.PathLike)
-with open(p) as stream:
-    adaptors = yaml.safe_load(stream)
+from anglerfish.demux.adaptor import Adaptor, load_adaptors
+
 delim = "-NNN-"
+adaptors = load_adaptors(raw=True)
 
 
 @dataclass
@@ -24,32 +20,6 @@ class SampleSheetEntry:
     ont_barcode: str
 
 
-class Adaptor:
-    def __init__(self, adaptor, i7_index=None, i5_index=None):
-        self.i5 = adaptors[adaptor]["i5"]
-        self.i7 = adaptors[adaptor]["i7"]
-        self.i5_index = i5_index
-        self.i7_index = i7_index
-        self.name = f"{adaptor}_len{len(i7_index)}"
-
-        if delim in self.i5 and i5_index is None:
-            raise UserWarning("Adaptor has i5 but no sequence was specified")
-        if delim in self.i7 and i7_index is None:
-            raise UserWarning("Adaptor has i7 but no sequence was specified")
-
-    def get_i5_mask(self):
-        if delim in self.i5:
-            return self.i5.replace(delim, "N" * len(self.i5_index))
-        else:
-            return self.i5
-
-    def get_i7_mask(self):
-        if delim in self.i7:
-            return self.i7.replace(delim, "N" * len(self.i7_index))
-        else:
-            return self.i7
-
-
 class SampleSheet:
     def __init__(self, input_csv, ont_bc):
         # Read samplesheet in format:
@@ -83,7 +53,7 @@ def __init__(self, input_csv, ont_bc):
                 sample_name = row["sample_name"]
                 test_globs[row["fastq_path"]] = glob.glob(row["fastq_path"])
 
-                bc_re = re.compile("\/(barcode\d\d|unclassified)\/")
+                bc_re = re.compile(r"\/(barcode\d\d|unclassified)\/")
                 ont_barcode = None
                 if ont_bc:
                     ob = re.findall(bc_re, row["fastq_path"])
@@ -94,7 +64,7 @@ def __init__(self, input_csv, ont_bc):
 
                 ss_entry = SampleSheetEntry(
                     sample_name,
-                    Adaptor(row["adaptors"], i7, i5),
+                    Adaptor(adaptors, delim, row["adaptors"], i7, i5),
                     row["fastq_path"],
                     ont_barcode,
                 )
@@ -135,10 +105,10 @@ def minimum_bc_distance(self):
         for ont_barcode, adaptors in ss_by_bc.items():
             testset[ont_barcode] = []
             for adaptor in adaptors:
-                if adaptor.i5_index is not None:
-                    testset[ont_barcode].append(adaptor.i5_index + adaptor.i7_index)
+                if adaptor.i5.has_index():
+                    testset[ont_barcode].append(adaptor.i5.index + adaptor.i7.index)
                 else:
-                    testset[ont_barcode].append(adaptor.i7_index)
+                    testset[ont_barcode].append(adaptor.i7.index)
 
         fq_distances = []
         for ont_barcode, adaptors in testset.items():
@@ -148,7 +118,9 @@ def minimum_bc_distance(self):
             else:
                 for a, b in [i for i in combinations(adaptors, 2)]:
                     dist = lev.distance(a, b)
-                    assert dist > 0, f"""There is one or more identical barcodes in the input samplesheet.
+                    assert (
+                        dist > 0
+                    ), f"""There is one or more identical barcodes in the input samplesheet.
                         First one found: {a}. If these exist in different ONT barcodes, please specify the --ont_barcodes flag."""
                     distances.append(dist)
             fq_distances.append(min(distances))
diff --git a/anglerfish/explore/__init__.py b/anglerfish/explore/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/anglerfish/explore/cli.py b/anglerfish/explore/cli.py
new file mode 100644
index 0000000..1cceab5
--- /dev/null
+++ b/anglerfish/explore/cli.py
@@ -0,0 +1,100 @@
+import click
+
+from anglerfish.explore.explore import run_explore
+
+
+@click.command()
+@click.option("-f", "--fastq", required=True, help="Fastq file to align")
+@click.option("-o", "--outdir", required=True, help="Output directory")
+@click.option(
+    "-t",
+    "--threads",
+    default=4,
+    type=int,
+    help="Number of threads specified to minimap2",
+)
+@click.option(
+    "-e",
+    "--use-existing",
+    is_flag=True,
+    help="Use existing alignments if found in the specified output directory.",
+)
+@click.option(
+    "-g",
+    "--good_hit_threshold",
+    default=0.9,
+    type=float,
+    help="Fraction of adaptor bases immediately before and immediately after index insert required to match perfectly for a hit to be considered a good hit (default=0.9).",
+)
+@click.option(
+    "-i",
+    "--insert_thres_low",
+    default=4,
+    type=int,
+    help="Lower threshold for index(+UMI) insert length, with value included (deafult=4).",
+)
+@click.option(
+    "-j",
+    "--insert_thres_high",
+    default=30,
+    type=int,
+    help="Upper threshold for index(+UMI) insert length, with value included (default=30).",
+)
+@click.option(
+    "-B",
+    "--minimap_b",
+    default=4,
+    type=int,
+    help="Minimap2 -B parameter, mismatch penalty (default=4).",
+)
+@click.option(
+    "-m",
+    "--min_hits_per_adaptor",
+    default=50,
+    type=int,
+    help="Minimum number of good hits for an adaptor to be included in the analysis (default=50).",
+)
+@click.option(
+    "-u",
+    "--umi_threshold",
+    default=11,
+    type=float,
+    help="Minimum number of bases in insert to perform entropy calculation (default=11).",
+)
+@click.option(
+    "-k",
+    "--kmer_length",
+    default=2,
+    type=int,
+    help="Length of k-mers to use for entropy calculation (default=2).",
+)
+def main(
+    fastq,
+    outdir,
+    threads,
+    use_existing,
+    good_hit_threshold,
+    insert_thres_low,
+    insert_thres_high,
+    minimap_b,
+    min_hits_per_adaptor,
+    umi_threshold,
+    kmer_length,
+):
+    run_explore(
+        fastq,
+        outdir,
+        threads,
+        use_existing,
+        good_hit_threshold,
+        insert_thres_low,
+        insert_thres_high,
+        minimap_b,
+        min_hits_per_adaptor,
+        umi_threshold,
+        kmer_length,
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/anglerfish/explore/entropy.py b/anglerfish/explore/entropy.py
new file mode 100644
index 0000000..9a6bea9
--- /dev/null
+++ b/anglerfish/explore/entropy.py
@@ -0,0 +1,63 @@
+import itertools
+from collections import deque
+
+import numpy as np
+
+
+# Rolling window implementation
+def _window(seq, n=3):
+    win = deque(maxlen=n)
+    for char in seq:
+        win.append(char)
+        if len(win) >= n:
+            yield tuple(win)
+
+
+def _entropy(pk, qk):
+    """
+    Kullback-Leibler divergence (relative entropy)
+    Reference https://docs.scipy.org/doc/scipy/reference/generated/scipy.stats.entropy.html
+    """
+    # To avoid zeroes in pk, add a tiny fraction of observed values
+    pk = pk + 1e-2
+
+    if np.sum(pk) - 1 > 1e-5:
+        pk = pk / np.sum(pk)
+    return np.sum(pk * np.log(pk / qk))
+
+
+# Each sequence gives matrix of position and k-mer occurence.
+def _seq_to_count_matrix(dna_seq, kmer_positions, max=50, k=2):
+    """max is the length of the prefix that will be considered"""
+    if len(dna_seq) < max:
+        max = len(dna_seq)
+
+    matrix = np.zeros((4**k, max - k + 1))
+    for i, word in enumerate(_window(dna_seq[:max], n=k)):
+        matrix[kmer_positions[word], i] += 1
+    return matrix
+
+
+def _count_for_seqs(seqs, kmer_positions, insert_length, k=2):
+    seqs = [seq for seq in seqs if len(seq) == insert_length]
+    matrices = [_seq_to_count_matrix(seq, kmer_positions, max=50, k=k) for seq in seqs]
+    stacked = np.stack(matrices)
+    sum_a = np.sum(stacked, axis=0)
+    even_dist = np.ones(4**k) / (4**k)
+    entropies = [_entropy(a, even_dist) for a in sum_a.T]
+    return entropies
+
+
+def _extract_inserts_from_df(df):
+    mim_re_cs = r"^cs:Z::[1-9][0-9]*\+([a,c,t,g]*):[1-9][0-9]*$"
+    return df.cs.str.extract(mim_re_cs, expand=False).str.upper().tolist()
+
+
+def calculate_relative_entropy(df_good_hits, kmer_length, insert_length):
+    all_kmers = itertools.product("ACTG", repeat=kmer_length)
+    kmer_positions = dict(
+        (kmer, position) for kmer, position in zip(all_kmers, range(4**kmer_length))
+    )
+    seqs = _extract_inserts_from_df(df_good_hits)
+    entropies = _count_for_seqs(seqs, kmer_positions, insert_length, k=kmer_length)
+    return entropies
diff --git a/anglerfish/explore/explore.py b/anglerfish/explore/explore.py
new file mode 100644
index 0000000..1fd9cf8
--- /dev/null
+++ b/anglerfish/explore/explore.py
@@ -0,0 +1,200 @@
+import logging
+import os
+import uuid
+
+import pandas as pd
+
+from anglerfish.demux.adaptor import load_adaptors
+from anglerfish.demux.demux import parse_paf_lines, run_minimap2
+from anglerfish.explore.entropy import calculate_relative_entropy
+
+logging.basicConfig(level=logging.INFO)
+log = logging.getLogger("explore")
+
+
+def run_explore(
+    fastq,
+    outdir,
+    threads,
+    use_existing,
+    good_hit_threshold,
+    insert_thres_low,
+    insert_thres_high,
+    minimap_b,
+    min_hits_per_adaptor,
+    umi_threshold,
+    kmer_length,
+):
+    # Setup a run directory
+    run_uuid = str(uuid.uuid4())
+    try:
+        os.mkdir(outdir)
+    except FileExistsError:
+        log.info(f"Output directory {outdir} already exists")
+        if not use_existing:
+            log.error(
+                f"Output directory {outdir} already exists, please use --use_existing to continue"
+            )
+            exit(1)
+        else:
+            pass
+
+    log.info("Running anglerfish explore")
+    log.info(f"Run uuid {run_uuid}")
+
+    adaptors = load_adaptors()
+    alignments = []
+
+    # Map all reads against all adaptors
+    for adaptor in adaptors:
+        adaptor_name = adaptor.name
+
+        # Align
+        aln_path = os.path.join(outdir, f"{adaptor_name}.paf")
+        alignments.append((adaptor, aln_path))
+        if os.path.exists(aln_path) and use_existing:
+            log.info(f"Skipping {adaptor_name} as alignment already exists")
+            continue
+        adaptor_path = os.path.join(outdir, f"{adaptor_name}.fasta")
+        with open(adaptor_path, "w") as f:
+            f.write(adaptor.get_fastastring(insert_Ns=False))
+
+        log.info(f"Aligning {adaptor_name}")
+        run_minimap2(fastq, adaptor_path, aln_path, threads, minimap_b)
+
+    # Parse alignments
+    entries = {}
+    adaptors_included = []
+    for adaptor, aln_path in alignments:
+        log.info(f"Parsing {adaptor.name}")
+        aln_dict_with_lists = parse_paf_lines(aln_path, complex_identifier=True)
+
+        # Choose only the highest scoring alignment for each combination of read, adaptor end and strand
+        aln_dict = dict(
+            [(k, max(v, key=lambda x: x["q"])) for k, v in aln_dict_with_lists.items()]
+        )
+
+        df = pd.DataFrame.from_dict(aln_dict, orient="index")
+        nr_good_hits = {}
+        for adaptor_end_name, adaptor_end in zip(
+            ["i5", "i7"], [adaptor.i5, adaptor.i7]
+        ):
+            if adaptor_end.has_index():
+                # Match Insert Match = mim
+                # The cs string filter is quite strict, requiring 10+ perfect match before insert and 10+ perfect match after insert
+                # The cg string allows for mismatches within the matching strings but no insertions or deletions
+                # All cg string matches are also cs string matches (subset) but not vice versa
+                mim_re_cs = r"^cs:Z::[1-9][0-9]*\+([a,c,t,g]*):[1-9][0-9]*$"  # Match Insert Match = mim
+                mim_re_cg = r"^cg:Z:([0-9]*)M([0-9]*)I([0-9]*)M$"
+                df_mim = df[df.cs.str.match(mim_re_cs)]
+
+                # Extract the match lengths
+                match_col_df = df_mim.cg.str.extract(mim_re_cg).rename(
+                    {0: "match_1_len", 1: "insert_len", 2: "match_2_len"}, axis=1
+                )
+                match_col_df = match_col_df.astype(
+                    {
+                        "match_1_len": "int32",
+                        "insert_len": "int32",
+                        "match_2_len": "int32",
+                    }
+                )
+
+                df_mim.loc[match_col_df.index, match_col_df.columns] = match_col_df
+
+                # Alignment thresholds
+                before_thres = round(
+                    adaptor_end.len_before_index() * good_hit_threshold
+                )
+                after_thres = round(adaptor_end.len_after_index() * good_hit_threshold)
+                insert_thres_low = insert_thres_low
+                insert_thres_high = insert_thres_high
+
+                requirements = (
+                    (df_mim["match_1_len"] >= (before_thres))
+                    & (df_mim["insert_len"] >= insert_thres_low)
+                    & (df_mim["insert_len"] <= insert_thres_high)
+                    & (df_mim["match_2_len"] >= (after_thres))
+                )
+                df_good_hits = df_mim[requirements]
+
+                median_insert_length = df_good_hits["insert_len"].median()
+                insert_lengths = df_good_hits["insert_len"].value_counts()
+            else:
+                m_re_cs = r"^cs:Z::([1-9][0-9]*)$"
+                df_good_hits = df[df.cg.str.match(m_re_cs)]
+                match_col_df = df_good_hits.cg.str.extract(m_re_cs).rename(
+                    {0: "match_1_len"}, axis=1
+                )
+                match_col_df = match_col_df.astype({"match_1_len": "int32"})
+
+                df_good_hits.loc[
+                    match_col_df.index, match_col_df.columns
+                ] = match_col_df
+
+                thres = round(
+                    (adaptor_end.len_before_index() + adaptor_end.len_after_index())
+                    * good_hit_threshold
+                )
+                df_good_hits = df_good_hits[df_good_hits["match_1_len"] >= thres]
+
+                median_insert_length = None
+                insert_lengths = None
+
+            # Filter multiple hits per read and adaptor end
+            df_good_hits = df_good_hits.sort_values(
+                by=["read", "match_1_len"], ascending=False
+            ).drop_duplicates(subset=["read", "adapter"], keep="first")
+
+            if adaptor.name not in entries.keys():
+                entries[adaptor.name] = {}
+            entries[adaptor.name][adaptor_end_name] = df_good_hits
+
+            nr_good_hits[adaptor_end_name] = len(df_good_hits)
+            log.info(
+                f"{adaptor.name}:{adaptor_end_name} had {len(df_good_hits)} good hits."
+            )
+
+        if min(nr_good_hits.values()) >= min_hits_per_adaptor:
+            log.info(f"Adaptor {adaptor.name} is included in the analysis")
+            adaptors_included.append(adaptor)
+        else:
+            log.info(f"Adaptor {adaptor.name} is excluded from the analysis")
+
+    # Print insert length info for adaptor types included in the analysis
+    for adaptor in adaptors_included:
+        for adaptor_end_name, adaptor_end in zip(
+            ["i5", "i7"], [adaptor.i5, adaptor.i7]
+        ):
+            df_good_hits = entries[adaptor.name][adaptor_end_name]
+            if adaptor_end.has_index():
+                median_insert_length = df_good_hits["insert_len"].median()
+                if median_insert_length > umi_threshold:
+                    # Calculate entropies here
+                    entropies = calculate_relative_entropy(
+                        df_good_hits, kmer_length, median_insert_length
+                    )
+                    entropy_file = os.path.join(
+                        outdir, f"{adaptor.name}_{adaptor_end_name}.entropy.csv"
+                    )
+                    pd.Series(entropies).to_csv(entropy_file, float_format="%.2f")
+                    log.info(
+                        f"{adaptor.name}:{adaptor_end_name}, relative entropy for k={kmer_length}, over the index saved to {entropy_file}"
+                    )
+                insert_lengths = df_good_hits["insert_len"].value_counts()
+                log.info(
+                    f"{adaptor.name}:{adaptor_end_name} had {len(df_good_hits)} good hits with median insert length {median_insert_length}"
+                )
+                histogram_file = os.path.join(
+                    outdir, f"{adaptor.name}_{adaptor_end_name}.hist.csv"
+                )
+                insert_lengths[sorted(insert_lengths.index)].to_csv(histogram_file)
+                log.info(
+                    f"{adaptor.name}:{adaptor_end_name} insert length histogram saved {histogram_file}"
+                )
+            else:
+                median_insert_length = None
+                insert_lengths = None
+                log.info(
+                    f"{adaptor.name}:{adaptor_end_name} had {len(df_good_hits)} good hits (no insert length since no index)"
+                )
diff --git a/environment.yml b/environment.yml
index 26fd280..ec2d96d 100644
--- a/environment.yml
+++ b/environment.yml
@@ -4,10 +4,9 @@ channels:
   - conda-forge
   - bioconda
 dependencies:
-  - conda-forge::biopython=1.79
-  - conda-forge::python-levenshtein=0.23.0
-  - conda-forge::numpy>=1.22.0
-  - bioconda::minimap2=2.20
-  - conda-forge::pyyaml=6.0
+  - bioconda::minimap2=2.26
+  - pip
+  - pip:
+      - -r requirements.txt
   - conda-forge::pre-commit
   - conda-forge::prettier
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..db39464
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1,6 @@
+biopython==1.83
+click==8.1.7
+levenshtein==0.23.0
+numpy==1.26.3
+pandas==2.1.4
+pyyaml==6.0.1
\ No newline at end of file
diff --git a/setup.py b/setup.py
index bba0426..0b50d3f 100644
--- a/setup.py
+++ b/setup.py
@@ -32,12 +32,8 @@
     python_requires=">=3.10",
     packages=find_packages(),
     package_data={"": ["config/adaptors.yaml"]},
-    install_requires=[
-        "python-levenshtein==0.23.0",
-        "biopython==1.79",
-        "numpy>=1.22.0",
-        "pyyaml==6.0",
-    ],
+    # Read requirements.txt
+    install_requires=[x.strip() for x in open("requirements.txt").readlines()],
     extras_require={"dev": ["ruff", "mypy", "editorconfig-checker"]},
     entry_points={
         "console_scripts": [