From c2e28f93bd3616fb28e57bde9d08d48500e95172 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 13:43:44 +0100
Subject: [PATCH 01/15] add ci files

---
 .github/workflows/lint-code.yml | 135 ++++++++++++++++++++++++++++++++
 .pre-commit-config.yaml         |  19 +++++
 .vscode/settings.json           |   8 ++
 pyproject.toml                  |  21 ++++-
 4 files changed, 182 insertions(+), 1 deletion(-)
 create mode 100644 .github/workflows/lint-code.yml
 create mode 100644 .pre-commit-config.yaml
 create mode 100644 .vscode/settings.json

diff --git a/.github/workflows/lint-code.yml b/.github/workflows/lint-code.yml
new file mode 100644
index 0000000..c73256e
--- /dev/null
+++ b/.github/workflows/lint-code.yml
@@ -0,0 +1,135 @@
+name: lint-code
+on: [push, pull_request]
+
+# Cancel if a newer run is started
+concurrency:
+  group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
+  cancel-in-progress: true
+
+jobs:
+  # Use ruff to check for code style violations
+  ruff-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install ruff
+      - name: ruff --> Check for style violations
+        # Configured in pyproject.toml
+        run: ruff check .
+
+  # Use ruff to check code formatting
+  ruff-format:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install ruff
+      - name: ruff --> Check code formatting
+        run: ruff format --check .
+
+  # Use mypy for static type checking
+  mypy-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install mypy
+      # Start by installing type stubs
+      - name: mypy --> Install stubs
+        run: echo -e "y" | mypy --install-types **/*.py || exit 0
+      - name: mypy --> Static type checking
+        # Configured in pyprojet.toml
+        run: mypy **/*.py
+
+  # Use pipreqs to check for missing dependencies
+  pipreqs-check:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v4
+        with:
+          python-version: "3.10"
+
+      - name: Install pipreqs
+        run: pip install pipreqs
+
+      - name: Install requirements
+        run: pip install -r requirements.txt
+
+      - name: Run pipreqs
+        run: pipreqs --savepath pipreqs.txt
+
+      - name: Compare requirements
+        run: |
+          # Extract and sort package names
+          awk '{print $1}' $1 | sort -u > "$1".compare
+          awk -F'==' '{print $1}' $2 | sort -u > "$2".compare
+
+          # Compare package lists
+          if cmp -s "$1".compare "$2".compare
+          then
+            echo "Requirements are the same"
+            exit 0
+          else
+            echo "Requirements are different"
+            exit 1
+          fi
+
+    # Use Prettier to check various file formats
+  prettier:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+      - name: Setup node
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Install Prettier
+        run: npm install -g prettier
+
+      - name: Run Prettier --check
+        run: prettier --check .
+
+  # Use editorconfig to check all remaining file formats
+  editorconfig:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repo
+        uses: actions/checkout@v4
+
+      - name: Setup node
+        uses: actions/setup-node@v4
+        with:
+          node-version: "20"
+
+      - name: Install editorconfig-checker
+        run: npm install -g editorconfig-checker
+
+      - name: editorconfig --> Lint files
+        run: editorconfig-checker $(git ls-files | grep -v '.py\|.md\|.json\|.yml\|.yaml\|.html')
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..1c09ed2
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,19 @@
+# .pre-commit-config.yaml
+repos:
+  - repo: https://github.com/astral-sh/ruff-pre-commit
+    rev: v0.1.6
+    hooks:
+      - id: ruff
+      - id: ruff-format
+  - repo: https://github.com/pre-commit/mirrors-mypy
+    rev: "v1.7.1"
+    hooks:
+      - id: mypy
+  - repo: https://github.com/pre-commit/mirrors-prettier
+    rev: "v4.0.0-alpha.8"
+    hooks:
+      - id: prettier
+  - repo: https://github.com/editorconfig-checker/editorconfig-checker.python
+    rev: "2.7.2"
+    hooks:
+      - id: editorconfig-checker
diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 0000000..6e4306d
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,8 @@
+{
+  "editor.formatOnSave": true,
+  "editor.defaultFormatter": "esbenp.prettier-vscode",
+  "[python]": {
+    "editor.defaultFormatter": "charliermarsh.ruff"
+  },
+  "prettier.configPath": "./pyproject.toml"
+}
diff --git a/pyproject.toml b/pyproject.toml
index 7fd26b9..b3bc5ba 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,3 +1,22 @@
 [build-system]
 requires = ["setuptools"]
-build-backend = "setuptools.build_meta"
\ No newline at end of file
+build-backend = "setuptools.build_meta"
+
+[tool.ruff.lint]
+select =[
+    #         Ruff default rules
+    # ------------------------------
+    "E4",   # pycodestyle Imports
+    "E7",   # pycodestyle Statements
+    "E9",   # pycodestyle Runtime
+    "F",    # Pyflakes
+
+    #         Additional    Comment
+    # ------------------------------------------------------
+    "I",    # isort         Best-practice sorting of imports
+    "UP",   # pyupgrade     Make sure syntax is up-to-date
+]
+
+[tool.mypy]
+ignore_missing_imports = true
+follow_imports = 'skip'

From 7634a3bd7b150f552f064e7089b4ab160b6f4564 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 13:46:11 +0100
Subject: [PATCH 02/15] non-invasive formatting

---
 .github/workflows/anglerfish.yml |   8 +-
 .github/workflows/pypi.yml       |   2 +-
 README.md                        |  25 ++--
 anglerfish/__main__.py           |   4 +-
 anglerfish/anglerfish.py         | 216 +++++++++++++++++++++++--------
 anglerfish/demux/demux.py        | 138 ++++++++++++--------
 anglerfish/demux/report.py       | 111 ++++++++++------
 anglerfish/demux/samplesheet.py  |  74 ++++++-----
 setup.py                         |  51 ++++----
 9 files changed, 405 insertions(+), 224 deletions(-)

diff --git a/.github/workflows/anglerfish.yml b/.github/workflows/anglerfish.yml
index 2077e95..0fed1f1 100644
--- a/.github/workflows/anglerfish.yml
+++ b/.github/workflows/anglerfish.yml
@@ -14,11 +14,11 @@ jobs:
       - uses: actions/checkout@v4
       - uses: mamba-org/setup-micromamba@v1
         with:
-           init-shell: bash
-           create-args: >-
+          init-shell: bash
+          create-args: >-
             python=${{ matrix.python-version }}
             pip
-           environment-file: environment.yml
+          environment-file: environment.yml
 
       # Install Anglerfish
       - shell: bash -l {0}
@@ -29,7 +29,7 @@ jobs:
       # Run anglerfish --help
       - shell: bash -l {0}
         name: Test anglerfish
-        run: | 
+        run: |
           anglerfish --help
 
       # Run anglerfish using test data
diff --git a/.github/workflows/pypi.yml b/.github/workflows/pypi.yml
index c1bd356..346e673 100644
--- a/.github/workflows/pypi.yml
+++ b/.github/workflows/pypi.yml
@@ -28,4 +28,4 @@ jobs:
         uses: pypa/gh-action-pypi-publish@master
         with:
           user: __token__
-          password: ${{ secrets.PYPI_API_TOKEN }}
\ No newline at end of file
+          password: ${{ secrets.PYPI_API_TOKEN }}
diff --git a/README.md b/README.md
index 123c4b2..2a4867d 100644
--- a/README.md
+++ b/README.md
@@ -1,10 +1,10 @@
 # Anglerfish
+
 [![Anglerfish CI Status](https://github.com/remiolsen/anglerfish/workflows/Anglerfish/badge.svg)](https://github.com/remiolsen/anglerfish/actions)
 [![PyPI](https://img.shields.io/pypi/v/bio-anglerfish)](https://pypi.python.org/pypi/bio-anglerfish/)
 [![Conda (channel only)](https://img.shields.io/conda/vn/bioconda/anglerfish)](https://anaconda.org/bioconda/anglerfish)
 [![Docker Container available](https://img.shields.io/docker/automated/remiolsen/anglerfish.svg)](https://hub.docker.com/r/remiolsen/anglerfish/)
 
-
 ## Introduction
 
 Anglerfish is a tool designed to demultiplex Illumina libraries sequenced on Oxford Nanopore
@@ -17,18 +17,18 @@ For more information on how this can be used, please see this [poster](docs/AGBT
 
 ### Requirements
 
-* Python3 (3.7)
+- Python3 (3.7)
 
 Python modules:
 
-* biopython v. 1.70
-* python-levenshtein v. 0.12.0
-* numpy v. 1.19.2
-* pyyaml v. 6.0
+- biopython v. 1.70
+- python-levenshtein v. 0.12.0
+- numpy v. 1.19.2
+- pyyaml v. 6.0
 
 Software:
 
-* minimap2 v. 2.20
+- minimap2 v. 2.20
 
 ### From PyPi
 
@@ -65,8 +65,8 @@ pip install --upgrade --force-reinstall git+https://github.com/remiolsen/anglerf
 
 Anglerfish requires two files to run.
 
-  * A basecalled FASTQ file from for instance Guppy (`/path/to/ONTreads.fastq.gz`)
-  * A samplesheet containing the sample names and indices expected to be found in the sequencing run. (`/path/to/samples.csv`)
+- A basecalled FASTQ file from for instance Guppy (`/path/to/ONTreads.fastq.gz`)
+- A samplesheet containing the sample names and indices expected to be found in the sequencing run. (`/path/to/samples.csv`)
 
 Example of a samplesheet file:
 
@@ -135,10 +135,9 @@ P54321_101,truseq,ATTACTCG,/path/to/barcode02/*.fastq.gz
 
 In folder `anglerfish_????_??_??_?????/`
 
-* `*.fastq.gz` Demultiplexed reads (if any)
-* `anglerfish_stats.txt` Barcode statistics from anglerfish run
-* `anglerfish_stats.json` Machine readable anglerfish statistics
-
+- `*.fastq.gz` Demultiplexed reads (if any)
+- `anglerfish_stats.txt` Barcode statistics from anglerfish run
+- `anglerfish_stats.json` Machine readable anglerfish statistics
 
 ## Credits
 
diff --git a/anglerfish/__main__.py b/anglerfish/__main__.py
index b48c1cd..c11e37d 100644
--- a/anglerfish/__main__.py
+++ b/anglerfish/__main__.py
@@ -1,4 +1,4 @@
 from . import anglerfish
 
-if __name__ == '__main__':
-    anglerfish()
\ No newline at end of file
+if __name__ == "__main__":
+    anglerfish()
diff --git a/anglerfish/anglerfish.py b/anglerfish/anglerfish.py
index 436c8c6..551536f 100755
--- a/anglerfish/anglerfish.py
+++ b/anglerfish/anglerfish.py
@@ -9,16 +9,22 @@
 from datetime import datetime as dt
 from itertools import groupby
 from collections import Counter
-from .demux.demux import run_minimap2, parse_paf_lines, layout_matches, cluster_matches, write_demuxedfastq
+from .demux.demux import (
+    run_minimap2,
+    parse_paf_lines,
+    layout_matches,
+    cluster_matches,
+    write_demuxedfastq,
+)
 from .demux.samplesheet import SampleSheet
 from .demux.report import Report, SampleStat, AlignmentStat
 import gzip
+
 logging.basicConfig(level=logging.INFO)
-log = logging.getLogger('anglerfish')
+log = logging.getLogger("anglerfish")
 
 
 def run_demux(args):
-
     run_uuid = str(uuid.uuid4())
     os.mkdir(args.out_fastq)
     ss = SampleSheet(args.samplesheet, args.ont_barcodes)
@@ -36,7 +42,9 @@ def run_demux(args):
             args.max_distance = 1
         log.info(f"Using maximum edit distance of {args.max_distance}")
     if args.max_distance >= bc_dist:
-        log.error(f" Edit distance of barcodes in samplesheet are less than the minimum specified {args.max_distance}>={bc_dist}")
+        log.error(
+            f" Edit distance of barcodes in samplesheet are less than the minimum specified {args.max_distance}>={bc_dist}"
+        )
         exit()
     log.debug(f"Samplesheet bc_dist == {bc_dist}")
 
@@ -45,22 +53,23 @@ def run_demux(args):
     adaptor_set = set(adaptors_t)
     adaptors_sorted = dict([(i, []) for i in adaptor_set])
     for entry in ss:
-        adaptors_sorted[(entry.adaptor.name, entry.ont_barcode)].append((entry.sample_name, entry.adaptor, os.path.abspath(entry.fastq)))
+        adaptors_sorted[(entry.adaptor.name, entry.ont_barcode)].append(
+            (entry.sample_name, entry.adaptor, os.path.abspath(entry.fastq))
+        )
 
     out_fastqs = []
     for key, sample in adaptors_sorted.items():
-
         adaptor_name, ont_barcode = key
         fastq_path = sample[0][2]
         # If there are multiple ONT barcodes, we need to add the ONT barcode to the adaptor name
         adaptor_bc_name = adaptor_name
         if ont_barcode:
-            adaptor_bc_name = adaptor_name+"_"+ont_barcode
+            adaptor_bc_name = adaptor_name + "_" + ont_barcode
         fastq_files = glob.glob(fastq_path)
 
         # Align
         aln_path = os.path.join(args.out_fastq, f"{adaptor_bc_name}.paf")
-        adaptor_path = os.path.join(args.out_fastq,f"{adaptor_name}.fasta")
+        adaptor_path = os.path.join(args.out_fastq, f"{adaptor_name}.fasta")
         with open(adaptor_path, "w") as f:
             f.write(ss.get_fastastring(adaptor_name))
         for fq in fastq_files:
@@ -69,70 +78,101 @@ def run_demux(args):
         # Easy line count in input fastq files
         num_fq = 0
         for fq in fastq_files:
-            with gzip.open(fq, 'rb') as f:
+            with gzip.open(fq, "rb") as f:
                 for i in f:
-                    num_fq  += 1
-        num_fq  = int(num_fq  / 4)
+                    num_fq += 1
+        num_fq = int(num_fq / 4)
         paf_entries = parse_paf_lines(aln_path)
 
         # Make stats
         log.info(f" Searching for adaptor hits in {adaptor_bc_name}")
-        fragments, singletons, concats, unknowns = layout_matches(adaptor_name+"_i5",adaptor_name+"_i7",paf_entries)
+        fragments, singletons, concats, unknowns = layout_matches(
+            adaptor_name + "_i5", adaptor_name + "_i7", paf_entries
+        )
         stats = AlignmentStat(adaptor_bc_name)
         stats.compute_pafstats(num_fq, fragments, singletons, concats, unknowns)
         report.add_alignment_stat(stats)
 
         # Demux
-        no_matches = []; matches = []
-        flipped_i7 = False; flipped_i5 = False
+        no_matches = []
+        matches = []
+        flipped_i7 = False
+        flipped_i5 = False
         flips = {
             "i7": {"i7_reversed": True, "i5_reversed": False},
             "i5": {"i7_reversed": False, "i5_reversed": True},
-            "i7+i5": {"i7_reversed": True, "i5_reversed": True}
+            "i7+i5": {"i7_reversed": True, "i5_reversed": True},
         }
         if args.force_rc is not None:
-            log.info(f" Force reverse complementing {args.force_rc} index for adaptor {adaptor_name}. Lenient mode is disabled")
-            no_matches, matches = cluster_matches(adaptors_sorted[key], fragments, args.max_distance, **flips[args.force_rc])
+            log.info(
+                f" Force reverse complementing {args.force_rc} index for adaptor {adaptor_name}. Lenient mode is disabled"
+            )
+            no_matches, matches = cluster_matches(
+                adaptors_sorted[key],
+                fragments,
+                args.max_distance,
+                **flips[args.force_rc],
+            )
             flipped_i7, flipped_i5 = flips[args.force_rc].values()
-        elif args.lenient: # Try reverse complementing the I5 and/or i7 indices and choose the best match
-            no_matches, matches = cluster_matches(adaptors_sorted[key], fragments, args.max_distance)
+        elif args.lenient:  # Try reverse complementing the I5 and/or i7 indices and choose the best match
+            no_matches, matches = cluster_matches(
+                adaptors_sorted[key], fragments, args.max_distance
+            )
             flipped = {}
             for flip, rev in flips.items():
-                rc_no_matches, rc_matches = cluster_matches(adaptors_sorted[key], fragments, args.max_distance, **rev)
+                rc_no_matches, rc_matches = cluster_matches(
+                    adaptors_sorted[key], fragments, args.max_distance, **rev
+                )
                 flipped[flip] = (rc_matches, rc_no_matches, len(rc_matches))
             best_flip = max(zip(flipped.values(), flipped.keys()))[1]
 
             # There are no barcode flips with unambiguously more matches, so we abort
-            if sorted([i[2] for i in flipped.values()])[-1] == sorted([i[2] for i in flipped.values()])[-2]:
-                log.info(f"Could not find any barcode reverse complements with unambiguously more matches")
+            if (
+                sorted([i[2] for i in flipped.values()])[-1]
+                == sorted([i[2] for i in flipped.values()])[-2]
+            ):
+                log.info(
+                    f"Could not find any barcode reverse complements with unambiguously more matches"
+                )
             elif flipped[best_flip][2] > len(matches) * args.lenient_factor:
-                log.info(f" Reverse complementing {best_flip} index for adaptor {adaptor_name} found at least {args.lenient_factor} times more matches")
+                log.info(
+                    f" Reverse complementing {best_flip} index for adaptor {adaptor_name} found at least {args.lenient_factor} times more matches"
+                )
                 matches, no_matches, _ = flipped[best_flip]
                 flipped_i7, flipped_i5 = flips[best_flip].values()
             else:
                 log.info(f" Using original index orientation for {adaptor_name}")
         else:
-            no_matches, matches = cluster_matches(adaptors_sorted[key], fragments, args.max_distance)
-
-        for k, v in groupby(sorted(matches,key=lambda x: x[3]), key=lambda y: y[3]):
+            no_matches, matches = cluster_matches(
+                adaptors_sorted[key], fragments, args.max_distance
+            )
 
+        for k, v in groupby(sorted(matches, key=lambda x: x[3]), key=lambda y: y[3]):
             # To avoid collisions in fastq filenames, we add the ONT barcode to the sample name
             fq_prefix = k
             if ont_barcode:
-                fq_prefix = ont_barcode+"-"+fq_prefix
-            fq_name = os.path.join(args.out_fastq, fq_prefix+".fastq.gz")
+                fq_prefix = ont_barcode + "-" + fq_prefix
+            fq_name = os.path.join(args.out_fastq, fq_prefix + ".fastq.gz")
             out_fastqs.append(fq_name)
             sample_dict = {i[0]: [i] for i in v}
 
             # Find read lengths
             rlens = np.array([])
-            for l,w in sample_dict.items():
+            for l, w in sample_dict.items():
                 for i in w:
-                    rlens = np.append(rlens, i[2]-i[1])
-            rmean = np.round(np.mean(rlens),2)
-            rstd = np.round(np.std(rlens),2)
+                    rlens = np.append(rlens, i[2] - i[1])
+            rmean = np.round(np.mean(rlens), 2)
+            rstd = np.round(np.std(rlens), 2)
 
-            sample_stat = SampleStat(k, len(sample_dict.keys()), rmean, rstd, flipped_i7, flipped_i5, ont_barcode)
+            sample_stat = SampleStat(
+                k,
+                len(sample_dict.keys()),
+                rmean,
+                rstd,
+                flipped_i7,
+                flipped_i5,
+                ont_barcode,
+            )
             report.add_sample_stat(sample_stat)
             if not args.skip_demux:
                 write_demuxedfastq(sample_dict, fastq_path, fq_name)
@@ -141,11 +181,15 @@ def run_demux(args):
         nomatch_count = Counter([x[3] for x in no_matches])
         if args.max_unknowns == None:
             args.max_unknowns = len([sample for sample in ss]) + 10
-        report.add_unmatched_stat(nomatch_count.most_common(args.max_unknowns), ont_barcode, adaptor_name)
+        report.add_unmatched_stat(
+            nomatch_count.most_common(args.max_unknowns), ont_barcode, adaptor_name
+        )
 
     # Check if there were samples in the samplesheet without adaptor alignments and add them to report
     for entry in ss:
-        if entry.sample_name not in [s.sample_name for s in [stat for stat in report.sample_stats]]:
+        if entry.sample_name not in [
+            s.sample_name for s in [stat for stat in report.sample_stats]
+        ]:
             sample_stat = SampleStat(entry.sample_name, 0, 0, 0, False, ont_barcode)
             report.add_sample_stat(sample_stat)
 
@@ -154,31 +198,99 @@ def run_demux(args):
     report.write_dataframe(args.out_fastq, ss)
 
     if args.skip_fastqc:
-        log.warning(" As of version 0.4.1, built in support for FastQC + MultiQC is removed. The '-f' flag is redundant.")
+        log.warning(
+            " As of version 0.4.1, built in support for FastQC + MultiQC is removed. The '-f' flag is redundant."
+        )
+
 
 def anglerfish():
-    parser = argparse.ArgumentParser(description='Tools to demux I7 and I5 barcodes when sequenced by single-molecules')
-    parser.add_argument('--samplesheet', '-s', required=True, help='CSV formatted list of samples and barcodes')
-    parser.add_argument('--out_fastq', '-o', default='.', help='Analysis output folder (default: Current dir)')
-    parser.add_argument('--threads', '-t', default=4, help='Number of threads to use (default: 4)')
-    parser.add_argument('--skip_demux', '-c', action='store_true', help='Only do BC counting and not demuxing')
-    parser.add_argument('--skip_fastqc', '-f', action='store_true', help=argparse.SUPPRESS)
-    parser.add_argument('--max-distance', '-m', type=int, help='Manually set maximum edit distance for BC matching, automatically set this is set to either 1 or 2')
-    parser.add_argument('--max-unknowns', '-u', type=int, help='Maximum number of unknown indices to show in the output (default: length of samplesheet + 10)')
-    parser.add_argument('--run_name', '-r', default='anglerfish', help='Name of the run (default: anglerfish)')
-    parser.add_argument('--lenient', '-l', action='store_true', help='Will try reverse complementing the I5 and/or I7 indices and choose the best match.')
-    parser.add_argument('--lenient_factor', '-x', default=4.0, type=float, help='If lenient is set, this is the minimum factor of additional matches required to reverse complement the index (default: 4.0)')
-    parser.add_argument('--force_rc', '-p', choices=['i7', 'i5', 'i7+i5'], help='Force reverse complementing the I5 and/or I7 indices. This will disregard lenient mode.')
-    parser.add_argument('--ont_barcodes', '-n', action='store_true', help='Will assume the samplesheet refers to a single ONT run prepped with a barcoding kit. And will treat each barcode separately')
-    parser.add_argument('--debug', '-d', action='store_true', help='Extra commandline output')
-    parser.add_argument('--version', '-v', action='version', help='Print version and quit', version=f'anglerfish {pkg_resources.get_distribution("bio-anglerfish").version}')
+    parser = argparse.ArgumentParser(
+        description="Tools to demux I7 and I5 barcodes when sequenced by single-molecules"
+    )
+    parser.add_argument(
+        "--samplesheet",
+        "-s",
+        required=True,
+        help="CSV formatted list of samples and barcodes",
+    )
+    parser.add_argument(
+        "--out_fastq",
+        "-o",
+        default=".",
+        help="Analysis output folder (default: Current dir)",
+    )
+    parser.add_argument(
+        "--threads", "-t", default=4, help="Number of threads to use (default: 4)"
+    )
+    parser.add_argument(
+        "--skip_demux",
+        "-c",
+        action="store_true",
+        help="Only do BC counting and not demuxing",
+    )
+    parser.add_argument(
+        "--skip_fastqc", "-f", action="store_true", help=argparse.SUPPRESS
+    )
+    parser.add_argument(
+        "--max-distance",
+        "-m",
+        type=int,
+        help="Manually set maximum edit distance for BC matching, automatically set this is set to either 1 or 2",
+    )
+    parser.add_argument(
+        "--max-unknowns",
+        "-u",
+        type=int,
+        help="Maximum number of unknown indices to show in the output (default: length of samplesheet + 10)",
+    )
+    parser.add_argument(
+        "--run_name",
+        "-r",
+        default="anglerfish",
+        help="Name of the run (default: anglerfish)",
+    )
+    parser.add_argument(
+        "--lenient",
+        "-l",
+        action="store_true",
+        help="Will try reverse complementing the I5 and/or I7 indices and choose the best match.",
+    )
+    parser.add_argument(
+        "--lenient_factor",
+        "-x",
+        default=4.0,
+        type=float,
+        help="If lenient is set, this is the minimum factor of additional matches required to reverse complement the index (default: 4.0)",
+    )
+    parser.add_argument(
+        "--force_rc",
+        "-p",
+        choices=["i7", "i5", "i7+i5"],
+        help="Force reverse complementing the I5 and/or I7 indices. This will disregard lenient mode.",
+    )
+    parser.add_argument(
+        "--ont_barcodes",
+        "-n",
+        action="store_true",
+        help="Will assume the samplesheet refers to a single ONT run prepped with a barcoding kit. And will treat each barcode separately",
+    )
+    parser.add_argument(
+        "--debug", "-d", action="store_true", help="Extra commandline output"
+    )
+    parser.add_argument(
+        "--version",
+        "-v",
+        action="version",
+        help="Print version and quit",
+        version=f'anglerfish {pkg_resources.get_distribution("bio-anglerfish").version}',
+    )
     args = parser.parse_args()
     utcnow = dt.utcnow()
     runname = utcnow.strftime(f"{args.run_name}_%Y_%m_%d_%H%M%S")
 
     assert os.path.exists(args.out_fastq)
     assert os.path.exists(args.samplesheet)
-    args.out_fastq = os.path.join(os.path.abspath(args.out_fastq),runname)
+    args.out_fastq = os.path.join(os.path.abspath(args.out_fastq), runname)
     args.samplesheet = os.path.abspath(args.samplesheet)
     args.run_name = runname
     run_demux(args)
diff --git a/anglerfish/demux/demux.py b/anglerfish/demux/demux.py
index 7154287..b55bdac 100644
--- a/anglerfish/demux/demux.py
+++ b/anglerfish/demux/demux.py
@@ -6,8 +6,9 @@
 import io
 from Bio.SeqIO.QualityIO import FastqGeneralIterator
 from Bio.Seq import Seq
+
 logging.basicConfig(level=logging.INFO)
-log = logging.getLogger('demux')
+log = logging.getLogger("demux")
 
 
 def parse_cs(cs_string, index, max_distance):
@@ -30,22 +31,24 @@ def run_minimap2(fastq_in, indexfile, output_paf, threads):
         "minimap2",
         "--cs",
         "-m8",
-        "-k", "10",
-        "-w", "5",
+        "-k",
+        "10",
+        "-w",
+        "5",
         "-B1",
         "-A6",
         "--dual=no",
         "-c",
-        "-t", str(threads),
+        "-t",
+        str(threads),
         indexfile,
-        fastq_in
+        fastq_in,
     ]
 
     with open(output_paf, "ab") as ofile:
         proc = subprocess.run(cmd, stdout=ofile, check=True)
 
 
-
 def parse_paf_lines(paf, min_qual=10):
     """
     Read and parse one paf alignment lines.
@@ -57,16 +60,17 @@ def parse_paf_lines(paf, min_qual=10):
             aln = paf_line.split()
             try:
                 # TODO: objectify this
-                entry = {"adapter": aln[5],
-                         "rlen": int(aln[1]), # read length
-                         "rstart": int(aln[2]), # start alignment on read
-                         "rend": int(aln[3]), # end alignment on read
-                         "strand": aln[4],
-                         "cs": aln[-1], # cs string
-                         "q": int(aln[11]), # Q score
-                         "iseq": None,
-                         "sample": None
-                        }
+                entry = {
+                    "adapter": aln[5],
+                    "rlen": int(aln[1]),  # read length
+                    "rstart": int(aln[2]),  # start alignment on read
+                    "rend": int(aln[3]),  # end alignment on read
+                    "strand": aln[4],
+                    "cs": aln[-1],  # cs string
+                    "q": int(aln[11]),  # Q score
+                    "iseq": None,
+                    "sample": None,
+                }
                 read = aln[0]
             except IndexError:
                 log.debug(f"Could not find all paf columns: {read}")
@@ -94,14 +98,20 @@ def layout_matches(i5_name, i7_name, paf_entries):
         - unknowns. Any other reads
     """
 
-    fragments = {}; singletons = {}; concats = {}; unknowns = {}
+    fragments = {}
+    singletons = {}
+    concats = {}
+    unknowns = {}
     for read, entry_list in paf_entries.items():
         sorted_entries = []
-        for k in range(len(entry_list)-1):
-            entry_i = entry_list[k]; entry_j = entry_list[k+1]
-            if entry_i['adapter'] != entry_j['adapter'] and \
-                (entry_i['adapter'] == i5_name and entry_j['adapter'] == i7_name) or \
-                (entry_j['adapter'] == i5_name and entry_i['adapter'] == i7_name):
+        for k in range(len(entry_list) - 1):
+            entry_i = entry_list[k]
+            entry_j = entry_list[k + 1]
+            if (
+                entry_i["adapter"] != entry_j["adapter"]
+                and (entry_i["adapter"] == i5_name and entry_j["adapter"] == i7_name)
+                or (entry_j["adapter"] == i5_name and entry_i["adapter"] == i7_name)
+            ):
                 if entry_i in sorted_entries:
                     sorted_entries.append(entry_j)
                 else:
@@ -109,27 +119,35 @@ def layout_matches(i5_name, i7_name, paf_entries):
         if len(entry_list) == 1:
             singletons[read] = entry_list
         elif len(sorted_entries) == 2:
-            fragments[read] = sorted(sorted_entries,key=lambda l:l['rstart'])
+            fragments[read] = sorted(sorted_entries, key=lambda l: l["rstart"])
         elif len(sorted_entries) > 2:
-            concats[read] = sorted(sorted_entries,key=lambda l:l['rstart'])
+            concats[read] = sorted(sorted_entries, key=lambda l: l["rstart"])
         else:
             unknowns[read] = entry_list
-        #TODO: add minimum insert size
+        # TODO: add minimum insert size
     return (fragments, singletons, concats, unknowns)
 
 
-def cluster_matches(sample_adaptor, matches, max_distance, i7_reversed=False, i5_reversed=False):
-
+def cluster_matches(
+    sample_adaptor, matches, max_distance, i7_reversed=False, i5_reversed=False
+):
     # Only illumina fragments
-    matched = {}; matched_bed = []; unmatched_bed = []
+    matched = {}
+    matched_bed = []
+    unmatched_bed = []
     for read, alignments in matches.items():
-
         i5 = False
         i7 = False
-        if alignments[0]['adapter'][-2:] == 'i5' and alignments[1]['adapter'][-2:] == 'i7':
+        if (
+            alignments[0]["adapter"][-2:] == "i5"
+            and alignments[1]["adapter"][-2:] == "i7"
+        ):
             i5 = alignments[0]
             i7 = alignments[1]
-        elif alignments[1]['adapter'][-2:] == 'i5' and alignments[0]['adapter'][-2:] == 'i7':
+        elif (
+            alignments[1]["adapter"][-2:] == "i5"
+            and alignments[0]["adapter"][-2:] == "i7"
+        ):
             i5 = alignments[1]
             i7 = alignments[0]
         else:
@@ -137,72 +155,80 @@ def cluster_matches(sample_adaptor, matches, max_distance, i7_reversed=False, i5
             continue
 
         dists = []
-        fi5 = ""; fi7 = ""
+        fi5 = ""
+        fi7 = ""
         for _, adaptor, _ in sample_adaptor:
             try:
                 i5_seq = adaptor.i5_index
                 if i5_reversed and i5_seq is not None:
                     i5_seq = str(Seq(i5_seq).reverse_complement())
-                fi5, d1 = parse_cs(i5['cs'], i5_seq, max_distance)
+                fi5, d1 = parse_cs(i5["cs"], i5_seq, max_distance)
             except AttributeError:
-                d1 = 0 # presumably it's single index, so no i5
+                d1 = 0  # presumably it's single index, so no i5
 
             i7_seq = adaptor.i7_index
             if i7_reversed and i7_seq is not None:
                 i7_seq = str(Seq(i7_seq).reverse_complement())
-            fi7, d2 = parse_cs(i7['cs'], i7_seq, max_distance)
-            dists.append(d1+d2)
+            fi7, d2 = parse_cs(i7["cs"], i7_seq, max_distance)
+            dists.append(d1 + d2)
 
         index_min = min(range(len(dists)), key=dists.__getitem__)
         # Test if two samples in the sheet is equidistant to the i5/i7
-        if len([i for i, j in enumerate(dists) if j==dists[index_min]]) > 1:
+        if len([i for i, j in enumerate(dists) if j == dists[index_min]]) > 1:
             log.debug(" Ambiguous alignment, skipping")
             continue
-        start_insert = min(i5['rend'],i7['rend'])
-        end_insert = max(i7['rstart'],i5['rstart'])
+        start_insert = min(i5["rend"], i7["rend"])
+        end_insert = max(i7["rstart"], i5["rstart"])
         if end_insert - start_insert < 10:
             log.debug(" Erroneous / overlapping adaptor matches")
             continue
         if dists[index_min] > max_distance:
             log.debug(f" No match {fi7}-{fi5}")
             # Find only full length i7(+i5) adaptor combos. Basically a list of "known unknowns"
-            if len(fi7) + len(fi5) == len(adaptor.i7_index or "") + len(adaptor.i5_index or ""):
+            if len(fi7) + len(fi5) == len(adaptor.i7_index or "") + len(
+                adaptor.i5_index or ""
+            ):
                 fi75 = "+".join([i for i in [fi7, fi5] if not i == ""])
                 unmatched_bed.append([read, start_insert, end_insert, fi75, "999", "."])
             continue
         matched[read] = alignments
         log.debug(f" Matched {read} to {adaptor.i7_index}-{adaptor.i5_index}")
-        matched_bed.append([read, start_insert, end_insert, sample_adaptor[index_min][0], "999", "."])
+        matched_bed.append(
+            [read, start_insert, end_insert, sample_adaptor[index_min][0], "999", "."]
+        )
     return unmatched_bed, matched_bed
 
 
-
 def write_demuxedfastq(beds, fastq_in, fastq_out):
     """
-     Take a set of coordinates in bed format [[seq1, start, end, ..][seq2, ..]]
-     from over a set of fastq entries in the input files and do extraction.
-     TODO: Can be optimized using pigz or rewritten using python threading
+    Take a set of coordinates in bed format [[seq1, start, end, ..][seq2, ..]]
+    from over a set of fastq entries in the input files and do extraction.
+    TODO: Can be optimized using pigz or rewritten using python threading
     """
     gz_buf = 131072
     fq_files = glob.glob(fastq_in)
     for fq in fq_files:
-        with subprocess.Popen(["gzip", "-c", "-d", fq],
-                stdout=subprocess.PIPE, bufsize=gz_buf) as fzi:
+        with subprocess.Popen(
+            ["gzip", "-c", "-d", fq], stdout=subprocess.PIPE, bufsize=gz_buf
+        ) as fzi:
             fi = io.TextIOWrapper(fzi.stdout, write_through=True)
-            with open(fastq_out, 'ab') as ofile:
-                with subprocess.Popen(["gzip", "-c", "-f"],
-                        stdin=subprocess.PIPE, stdout=ofile, bufsize=gz_buf, close_fds=False) as oz:
-
+            with open(fastq_out, "ab") as ofile:
+                with subprocess.Popen(
+                    ["gzip", "-c", "-f"],
+                    stdin=subprocess.PIPE,
+                    stdout=ofile,
+                    bufsize=gz_buf,
+                    close_fds=False,
+                ) as oz:
                     for title, seq, qual in FastqGeneralIterator(fi):
                         new_title = title.split()
                         if new_title[0] not in beds.keys():
                             continue
                         outfqs = ""
                         for bed in beds[new_title[0]]:
-
-                            new_title[0] += "_"+bed[3]
+                            new_title[0] += "_" + bed[3]
                             outfqs += "@{}\n".format(" ".join(new_title))
-                            outfqs += "{}\n".format(seq[bed[1]:bed[2]])
+                            outfqs += "{}\n".format(seq[bed[1] : bed[2]])
                             outfqs += "+\n"
-                            outfqs += "{}\n".format(qual[bed[1]:bed[2]])
-                        oz.stdin.write(outfqs.encode('utf-8'))
+                            outfqs += "{}\n".format(qual[bed[1] : bed[2]])
+                        oz.stdin.write(outfqs.encode("utf-8"))
diff --git a/anglerfish/demux/report.py b/anglerfish/demux/report.py
index 57d50b3..2ac3e89 100644
--- a/anglerfish/demux/report.py
+++ b/anglerfish/demux/report.py
@@ -3,8 +3,8 @@
 from dataclasses import dataclass, asdict
 from typing import ClassVar
 
-class Report(object):
 
+class Report(object):
     unmatch_header = ["index", "num_reads", "ont_barcode"]
 
     def __init__(self, run_name, uuid, version):
@@ -17,27 +17,33 @@ def __init__(self, run_name, uuid, version):
 
     def add_alignment_stat(self, aln_stat):
         self.aln_stats.append(aln_stat)
+
     def add_sample_stat(self, sample_stat):
         self.sample_stats.append(sample_stat)
+
     def add_unmatched_stat(self, unmatched_stat, ont_barcode, adaptor_name):
         self.unmatched_stats[(ont_barcode, adaptor_name)] = unmatched_stat
 
     def write_report(self, outdir):
-        with open(os.path.join(outdir,"anglerfish_stats.txt"), "w") as f:
-            f.write(f"Anglerfish v. {self.version} (run: {self.run_name}, {self.uuid})\n===================\n")
+        with open(os.path.join(outdir, "anglerfish_stats.txt"), "w") as f:
+            f.write(
+                f"Anglerfish v. {self.version} (run: {self.run_name}, {self.uuid})\n===================\n"
+            )
             for astat in self.aln_stats:
                 f.write(f"{astat.adaptor_name}:\n")
-                for i,j in astat.paf_stats.items():
+                for i, j in astat.paf_stats.items():
                     f.write(f"{j[0]}\t{i} ({j[1]*100:.2f}%)\n")
             f.write("\n{}\n".format("\t".join(getattr(SampleStat, "header"))))
             for sample in self.sample_stats:
-                f.write(f"{sample.sample_name}\t{sample.num_reads}\t{sample.mean_read_len}\t{sample.std_read_len}\t{sample.i7_reversed}\t{sample.i5_reversed}\t{sample.ont_barcode}\n")
-            uhead = getattr(Report, 'unmatch_header')
-            f.write(f"\n{chr(9).join(uhead)}\n") # chr(9) = tab
+                f.write(
+                    f"{sample.sample_name}\t{sample.num_reads}\t{sample.mean_read_len}\t{sample.std_read_len}\t{sample.i7_reversed}\t{sample.i5_reversed}\t{sample.ont_barcode}\n"
+                )
+            uhead = getattr(Report, "unmatch_header")
+            f.write(f"\n{chr(9).join(uhead)}\n")  # chr(9) = tab
             for key, unmatch in self.unmatched_stats.items():
                 for idx, mnum in unmatch:
                     f.write("{}\t{}\t{}\n".format(idx, mnum, key[0]))
-    
+
     def write_json(self, outdir):
         json_out = {
             "anglerfish_version": self.version,
@@ -45,23 +51,35 @@ def write_json(self, outdir):
             "run_uuid": self.uuid,
             "paf_stats": [],
             "sample_stats": [],
-            "undetermined": []
+            "undetermined": [],
         }
         for astat in self.aln_stats:
             json_out["paf_stats"].append(astat.paf_stats)
         for sample in self.sample_stats:
-            slist = [sample.sample_name, sample.num_reads, sample.mean_read_len, sample.std_read_len, sample.i7_reversed, sample.i5_reversed, sample.ont_barcode]
-            json_out["sample_stats"].append(dict(zip(getattr(SampleStat, "header"),slist)))
+            slist = [
+                sample.sample_name,
+                sample.num_reads,
+                sample.mean_read_len,
+                sample.std_read_len,
+                sample.i7_reversed,
+                sample.i5_reversed,
+                sample.ont_barcode,
+            ]
+            json_out["sample_stats"].append(
+                dict(zip(getattr(SampleStat, "header"), slist))
+            )
         for key, unmatch in self.unmatched_stats.items():
             for idx, mnum in unmatch:
-                json_out["undetermined"].append(dict(zip(getattr(Report, "unmatch_header"),[idx, mnum, key[0]])))
-        with open(os.path.join(outdir,"anglerfish_stats.json"), "w") as f:
-            f.write(json.dumps(json_out,indent=2, sort_keys=True))
+                json_out["undetermined"].append(
+                    dict(zip(getattr(Report, "unmatch_header"), [idx, mnum, key[0]]))
+                )
+        with open(os.path.join(outdir, "anglerfish_stats.json"), "w") as f:
+            f.write(json.dumps(json_out, indent=2, sort_keys=True))
 
-    def write_dataframe(self,outdir,samplesheet):
+    def write_dataframe(self, outdir, samplesheet):
         """Write a dataframe of the stats to a csv file.
-            TODO: This needs be cleaned up and made more robust. Especially lock in / decouple from upstream the header names and order:
-            sample_name, num_reads, mean_read_len, std_read_len, i7_reversed, i5_reversed, ont_barcode, adaptor_name, i7_index, i5_index
+        TODO: This needs be cleaned up and made more robust. Especially lock in / decouple from upstream the header names and order:
+        sample_name, num_reads, mean_read_len, std_read_len, i7_reversed, i5_reversed, ont_barcode, adaptor_name, i7_index, i5_index
         """
         out_list = []
         for sample in self.sample_stats:
@@ -85,7 +103,7 @@ def write_dataframe(self,outdir,samplesheet):
                 un["i7_index"] = i7i5[0]
                 un["i5_index"] = i7i5[1]
                 out_list.append(un)
-        with open(os.path.join(outdir,"anglerfish_dataframe.csv"), "w") as f:
+        with open(os.path.join(outdir, "anglerfish_dataframe.csv"), "w") as f:
             out_header = out_list[0].keys()
             f.write(",".join(out_header))
             f.write("\n")
@@ -93,25 +111,39 @@ def write_dataframe(self,outdir,samplesheet):
                 f.write(",".join([str(out[i]) for i in out_header]))
                 f.write("\n")
 
-class AlignmentStat(object):
 
-    def __init__(self, adaptor_name):    
-            self.adaptor_name = adaptor_name
-            self.paf_stats = {}
+class AlignmentStat(object):
+    def __init__(self, adaptor_name):
+        self.adaptor_name = adaptor_name
+        self.paf_stats = {}
 
     def compute_pafstats(self, num_fq, fragments, singletons, concats, unknowns):
-        total = len(fragments)+len(singletons)+len(concats)+len(unknowns)
-        self.paf_stats["input_reads"] = [num_fq , 1.0]
-        self.paf_stats["reads aligning to adaptor sequences"] = [total, total/float(num_fq)]
-        self.paf_stats["aligned reads matching both I7 and I5 adaptor"] = [len(fragments), len(fragments)/float(total)]
-        self.paf_stats["aligned reads matching only I7 or I5 adaptor"] = [len(singletons), len(singletons)/float(total)]
-        self.paf_stats["aligned reads matching multiple I7/I5 adaptor pairs"] = [len(concats), len(concats)/float(total)]
-        self.paf_stats["aligned reads with uncategorized alignments"] = [len(unknowns), len(unknowns)/float(total)]
+        total = len(fragments) + len(singletons) + len(concats) + len(unknowns)
+        self.paf_stats["input_reads"] = [num_fq, 1.0]
+        self.paf_stats["reads aligning to adaptor sequences"] = [
+            total,
+            total / float(num_fq),
+        ]
+        self.paf_stats["aligned reads matching both I7 and I5 adaptor"] = [
+            len(fragments),
+            len(fragments) / float(total),
+        ]
+        self.paf_stats["aligned reads matching only I7 or I5 adaptor"] = [
+            len(singletons),
+            len(singletons) / float(total),
+        ]
+        self.paf_stats["aligned reads matching multiple I7/I5 adaptor pairs"] = [
+            len(concats),
+            len(concats) / float(total),
+        ]
+        self.paf_stats["aligned reads with uncategorized alignments"] = [
+            len(unknowns),
+            len(unknowns) / float(total),
+        ]
 
 
 @dataclass
 class SampleStat:
-
     sample_name: str
     num_reads: int
     mean_read_len: float
@@ -119,13 +151,12 @@ class SampleStat:
     i7_reversed: bool
     i5_reversed: bool
     ont_barcode: str = None
-    header: ClassVar[list] = ["sample_name",
-                              "#reads", # We specify this for historical reasons
-                              "mean_read_len",
-                              "std_read_len",
-                              "i7_reversed",
-                              "i5_reversed",
-                              "ont_barcode"]
-
-
-
+    header: ClassVar[list] = [
+        "sample_name",
+        "#reads",  # We specify this for historical reasons
+        "mean_read_len",
+        "std_read_len",
+        "i7_reversed",
+        "i5_reversed",
+        "ont_barcode",
+    ]
diff --git a/anglerfish/demux/samplesheet.py b/anglerfish/demux/samplesheet.py
index 6bb117d..12f6dae 100644
--- a/anglerfish/demux/samplesheet.py
+++ b/anglerfish/demux/samplesheet.py
@@ -13,9 +13,9 @@
     adaptors = yaml.safe_load(stream)
 delim = "-NNN-"
 
+
 @dataclass
 class SampleSheetEntry:
-
     sample_name: str
     adaptor: object
     fastq: str
@@ -23,9 +23,7 @@ class SampleSheetEntry:
 
 
 class Adaptor(object):
-
     def __init__(self, adaptor, i7_index=None, i5_index=None):
-
         self.i5 = adaptors[adaptor]["i5"]
         self.i7 = adaptors[adaptor]["i7"]
         self.i5_index = i5_index
@@ -39,21 +37,19 @@ def __init__(self, adaptor, i7_index=None, i5_index=None):
 
     def get_i5_mask(self):
         if delim in self.i5:
-            return self.i5.replace(delim, "N"*len(self.i5_index))
+            return self.i5.replace(delim, "N" * len(self.i5_index))
         else:
             return self.i5
 
     def get_i7_mask(self):
         if delim in self.i7:
-            return self.i7.replace(delim, "N"*len(self.i7_index))
+            return self.i7.replace(delim, "N" * len(self.i7_index))
         else:
             return self.i7
 
 
 class SampleSheet(object):
-
     def __init__(self, input_csv, ont_bc):
-
         # Read samplesheet in format:
         # sample_name, adaptors, i7_index(-i5_index), fastq_path
         # If we are demuxing a run with ONT barcodes, we will have to assume fastq files are located in "barcode##" folders
@@ -61,54 +57,70 @@ def __init__(self, input_csv, ont_bc):
         self.samplesheet = []
         try:
             csvfile = open(input_csv, "r")
-            dialect = csv.Sniffer().sniff(csvfile.readline(), [',',';','\t'])
+            dialect = csv.Sniffer().sniff(csvfile.readline(), [",", ";", "\t"])
             csvfile.seek(0)
-            data = csv.DictReader(csvfile,
-                fieldnames=['sample_name', 'adaptors', 'index', 'fastq_path'], dialect=dialect)
+            data = csv.DictReader(
+                csvfile,
+                fieldnames=["sample_name", "adaptors", "index", "fastq_path"],
+                dialect=dialect,
+            )
             rn = 1
 
             test_globs = {}
             for row in data:
-                if row['adaptors'] not in adaptors:
-                    raise UserWarning(f"'{row['adaptors']}' not in the list of valid adaptors: {adaptors.keys()}")
+                if row["adaptors"] not in adaptors:
+                    raise UserWarning(
+                        f"'{row['adaptors']}' not in the list of valid adaptors: {adaptors.keys()}"
+                    )
                 i7i5 = row["index"].split("-")
-                i7 = i7i5[0]; i5 = None
+                i7 = i7i5[0]
+                i5 = None
                 if len(i7i5) > 1:
                     i5 = i7i5[1]
 
-                sample_name = row['sample_name']
-                test_globs[row['fastq_path']] = glob.glob(row['fastq_path'])
+                sample_name = row["sample_name"]
+                test_globs[row["fastq_path"]] = glob.glob(row["fastq_path"])
 
                 bc_re = re.compile("\/(barcode\d\d|unclassified)\/")
                 ont_barcode = None
                 if ont_bc:
-                    ob = re.findall(bc_re, row['fastq_path'])
-                    assert len(ob) > 0 and len(ob[0][-1]) > 0, "ONT barcode not found in fastq path. In ONT barcode mode (-n), fastq files must be located in barcode## folders"
+                    ob = re.findall(bc_re, row["fastq_path"])
+                    assert (
+                        len(ob) > 0 and len(ob[0][-1]) > 0
+                    ), "ONT barcode not found in fastq path. In ONT barcode mode (-n), fastq files must be located in barcode## folders"
                     ont_barcode = ob[0]
 
-                ss_entry = SampleSheetEntry(sample_name, Adaptor(row['adaptors'], i7, i5),row['fastq_path'], ont_barcode)
+                ss_entry = SampleSheetEntry(
+                    sample_name,
+                    Adaptor(row["adaptors"], i7, i5),
+                    row["fastq_path"],
+                    ont_barcode,
+                )
                 self.samplesheet.append(ss_entry)
                 rn += 1
 
             # Explanation: Don't mess around with the globs too much. Don't refer to the same file twice but using globs,
             # e.g, ./input.fastq and ./[i]nput.fastq
-            for a,b in combinations(test_globs.values(), 2):
+            for a, b in combinations(test_globs.values(), 2):
                 if len(set(a) & set(b)) > 0:
-                    raise UserWarning(f"Fastq paths are inconsistent. Please check samplesheet")
+                    raise UserWarning(
+                        f"Fastq paths are inconsistent. Please check samplesheet"
+                    )
 
             if not ont_bc and len(set([v[0] for v in test_globs.values()])) > 1:
-                raise UserWarning("""Found several different fastq files in samplesheet. Please carefully check any glob patterns. 
+                raise UserWarning(
+                    """Found several different fastq files in samplesheet. Please carefully check any glob patterns. 
                                   If you are using ONT barcodes, please specify the --ont_barcodes flag. Or if you are trying to input several 
-                                  sets of fastqs into anglerfish, please run anglerfish separately for each set.""")
+                                  sets of fastqs into anglerfish, please run anglerfish separately for each set."""
+                )
 
         except:
             raise
         finally:
             csvfile.close()
 
-
     def minimum_bc_distance(self):
-        """ Compute the minimum edit distance between all barcodes in samplesheet, or within each ONT barcode group """
+        """Compute the minimum edit distance between all barcodes in samplesheet, or within each ONT barcode group"""
 
         ss_by_bc = {}
         testset = {}
@@ -122,18 +134,18 @@ def minimum_bc_distance(self):
             testset[ont_barcode] = []
             for adaptor in adaptors:
                 if adaptor.i5_index is not None:
-                    testset[ont_barcode].append(adaptor.i5_index+adaptor.i7_index)
+                    testset[ont_barcode].append(adaptor.i5_index + adaptor.i7_index)
                 else:
                     testset[ont_barcode].append(adaptor.i7_index)
 
-        fq_distances=[]
+        fq_distances = []
         for ont_barcode, adaptors in testset.items():
             distances = []
             if len(adaptors) == 1:
                 distances = [len(adaptors[0])]
             else:
                 for a, b in [i for i in combinations(adaptors, 2)]:
-                    dist = lev.distance(a,b)
+                    dist = lev.distance(a, b)
                     assert dist > 0, f"""There is one or more identical barcodes in the input samplesheet.
                         First one found: {a}. If these exist in different ONT barcodes, please specify the --ont_barcodes flag."""
                     distances.append(dist)
@@ -141,22 +153,22 @@ def minimum_bc_distance(self):
         return min(fq_distances)
 
     def get_fastastring(self, adaptor_name=None):
-
         fastas = {}
         for entry in self.samplesheet:
             if entry.adaptor.name == adaptor_name or adaptor_name is None:
-                fastas[entry.adaptor.name+"_i7"] = entry.adaptor.get_i7_mask()
-                fastas[entry.adaptor.name+"_i5"] = entry.adaptor.get_i5_mask()
+                fastas[entry.adaptor.name + "_i7"] = entry.adaptor.get_i7_mask()
+                fastas[entry.adaptor.name + "_i5"] = entry.adaptor.get_i5_mask()
 
         assert len(fastas) > 0
 
         outstr = ""
         for key, seq in fastas.items():
-            outstr += ">{}\n{}\n".format(key,seq)
+            outstr += ">{}\n{}\n".format(key, seq)
 
         return outstr
 
     def __iter__(self):
         return iter(self.samplesheet)
+
     def __next__(self):
         pass
diff --git a/setup.py b/setup.py
index 72b5591..2c8751a 100644
--- a/setup.py
+++ b/setup.py
@@ -12,29 +12,30 @@
 """
 from setuptools import setup, find_packages
 from pathlib import Path
+
 this_directory = Path(__file__).parent
 long_description = (this_directory / "README.md").read_text()
 
-version='0.6.0'
+version = "0.6.0"
 
 setup(
-    name='bio-anglerfish',
+    name="bio-anglerfish",
     version=version,
-    description='Anglerfish, a tool to demultiplex Illumina libraries from ONT data',
+    description="Anglerfish, a tool to demultiplex Illumina libraries from ONT data",
     long_description=long_description,
-    long_description_content_type='text/markdown',
-    author='Remi-Andre Olsen',
-    author_email='remi-andre.olsen@scilifelab.se',
-    url='https://github.com/remiolsen/anglerfish',
-    license='MIT',
+    long_description_content_type="text/markdown",
+    author="Remi-Andre Olsen",
+    author_email="remi-andre.olsen@scilifelab.se",
+    url="https://github.com/remiolsen/anglerfish",
+    license="MIT",
     python_requires=">=3.7",
-    packages = find_packages(),
-    package_data = {"":["config/adaptors.yaml"]},
+    packages=find_packages(),
+    package_data={"": ["config/adaptors.yaml"]},
     install_requires=[
-        'python-levenshtein==0.23.0',
-        'biopython==1.79',
-        'numpy==1.22.0',
-        'pyyaml==6.0'
+        "python-levenshtein==0.23.0",
+        "biopython==1.79",
+        "numpy==1.22.0",
+        "pyyaml==6.0",
     ],
     entry_points={
         "console_scripts": [
@@ -43,16 +44,16 @@
     },
     zip_safe=False,
     classifiers=[
-    	"Development Status :: 5 - Production/Stable",
-    	"Environment :: Console",
-    	"Intended Audience :: Developers",
-    	"Intended Audience :: Healthcare Industry",
-    	"Intended Audience :: Science/Research",
-    	"License :: OSI Approved :: MIT License",
-    	"Operating System :: POSIX :: Linux",
-    	"Programming Language :: Python",
+        "Development Status :: 5 - Production/Stable",
+        "Environment :: Console",
+        "Intended Audience :: Developers",
+        "Intended Audience :: Healthcare Industry",
+        "Intended Audience :: Science/Research",
+        "License :: OSI Approved :: MIT License",
+        "Operating System :: POSIX :: Linux",
+        "Programming Language :: Python",
         "Topic :: Scientific/Engineering",
-    	"Topic :: Scientific/Engineering :: Medical Science Apps.",
-        "Topic :: Scientific/Engineering :: Bio-Informatics"
-	]
+        "Topic :: Scientific/Engineering :: Medical Science Apps.",
+        "Topic :: Scientific/Engineering :: Bio-Informatics",
+    ],
 )

From ab6f0acffccfc711357faea5b4c79101b3a63023 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 13:47:45 +0100
Subject: [PATCH 03/15] add file to supress blame and supress last commit

---
 .git-blame-ignore-revs | 2 ++
 1 file changed, 2 insertions(+)
 create mode 100644 .git-blame-ignore-revs

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
new file mode 100644
index 0000000..732882a
--- /dev/null
+++ b/.git-blame-ignore-revs
@@ -0,0 +1,2 @@
+# 240110, non-invasive, repo-wide formatting with ruff and prettier
+7634a3bd7b150f552f064e7089b4ab160b6f4564

From 1d308fb21d4c4d07f1d6a97d0e85d9495db80557 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 13:49:30 +0100
Subject: [PATCH 04/15] ruff check autofix

---
 anglerfish/anglerfish.py        | 22 ++++++++++++----------
 anglerfish/demux/demux.py       | 15 ++++++++-------
 anglerfish/demux/report.py      | 10 +++++-----
 anglerfish/demux/samplesheet.py | 18 +++++++++---------
 setup.py                        |  3 ++-
 5 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/anglerfish/anglerfish.py b/anglerfish/anglerfish.py
index 551536f..fb92667 100755
--- a/anglerfish/anglerfish.py
+++ b/anglerfish/anglerfish.py
@@ -1,24 +1,26 @@
 #!/usr/bin/env python
 import argparse
-import logging
 import glob
+import gzip
+import logging
 import os
-import pkg_resources
-import numpy as np
 import uuid
+from collections import Counter
 from datetime import datetime as dt
 from itertools import groupby
-from collections import Counter
+
+import numpy as np
+import pkg_resources
+
 from .demux.demux import (
-    run_minimap2,
-    parse_paf_lines,
-    layout_matches,
     cluster_matches,
+    layout_matches,
+    parse_paf_lines,
+    run_minimap2,
     write_demuxedfastq,
 )
+from .demux.report import AlignmentStat, Report, SampleStat
 from .demux.samplesheet import SampleSheet
-from .demux.report import Report, SampleStat, AlignmentStat
-import gzip
 
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("anglerfish")
@@ -132,7 +134,7 @@ def run_demux(args):
                 == sorted([i[2] for i in flipped.values()])[-2]
             ):
                 log.info(
-                    f"Could not find any barcode reverse complements with unambiguously more matches"
+                    "Could not find any barcode reverse complements with unambiguously more matches"
                 )
             elif flipped[best_flip][2] > len(matches) * args.lenient_factor:
                 log.info(
diff --git a/anglerfish/demux/demux.py b/anglerfish/demux/demux.py
index b55bdac..840b8f0 100644
--- a/anglerfish/demux/demux.py
+++ b/anglerfish/demux/demux.py
@@ -1,11 +1,12 @@
 import glob
-import re
+import io
 import logging
-import Levenshtein as lev
+import re
 import subprocess
-import io
-from Bio.SeqIO.QualityIO import FastqGeneralIterator
+
+import Levenshtein as lev
 from Bio.Seq import Seq
+from Bio.SeqIO.QualityIO import FastqGeneralIterator
 
 logging.basicConfig(level=logging.INFO)
 log = logging.getLogger("demux")
@@ -55,7 +56,7 @@ def parse_paf_lines(paf, min_qual=10):
     Returns a dict with the import values for later use
     """
     entries = {}
-    with open(paf, "r") as paf:
+    with open(paf) as paf:
         for paf_line in paf:
             aln = paf_line.split()
             try:
@@ -228,7 +229,7 @@ def write_demuxedfastq(beds, fastq_in, fastq_out):
                         for bed in beds[new_title[0]]:
                             new_title[0] += "_" + bed[3]
                             outfqs += "@{}\n".format(" ".join(new_title))
-                            outfqs += "{}\n".format(seq[bed[1] : bed[2]])
+                            outfqs += f"{seq[bed[1] : bed[2]]}\n"
                             outfqs += "+\n"
-                            outfqs += "{}\n".format(qual[bed[1] : bed[2]])
+                            outfqs += f"{qual[bed[1] : bed[2]]}\n"
                         oz.stdin.write(outfqs.encode("utf-8"))
diff --git a/anglerfish/demux/report.py b/anglerfish/demux/report.py
index 2ac3e89..52ff8a7 100644
--- a/anglerfish/demux/report.py
+++ b/anglerfish/demux/report.py
@@ -1,10 +1,10 @@
-import os
 import json
-from dataclasses import dataclass, asdict
+import os
+from dataclasses import asdict, dataclass
 from typing import ClassVar
 
 
-class Report(object):
+class Report:
     unmatch_header = ["index", "num_reads", "ont_barcode"]
 
     def __init__(self, run_name, uuid, version):
@@ -42,7 +42,7 @@ def write_report(self, outdir):
             f.write(f"\n{chr(9).join(uhead)}\n")  # chr(9) = tab
             for key, unmatch in self.unmatched_stats.items():
                 for idx, mnum in unmatch:
-                    f.write("{}\t{}\t{}\n".format(idx, mnum, key[0]))
+                    f.write(f"{idx}\t{mnum}\t{key[0]}\n")
 
     def write_json(self, outdir):
         json_out = {
@@ -112,7 +112,7 @@ def write_dataframe(self, outdir, samplesheet):
                 f.write("\n")
 
 
-class AlignmentStat(object):
+class AlignmentStat:
     def __init__(self, adaptor_name):
         self.adaptor_name = adaptor_name
         self.paf_stats = {}
diff --git a/anglerfish/demux/samplesheet.py b/anglerfish/demux/samplesheet.py
index 12f6dae..35713a8 100644
--- a/anglerfish/demux/samplesheet.py
+++ b/anglerfish/demux/samplesheet.py
@@ -1,15 +1,15 @@
 import csv
-import Levenshtein as lev
 import glob
+import importlib.resources
 import re
 from dataclasses import dataclass
 from itertools import combinations
-import yaml
-import importlib.resources
 
+import Levenshtein as lev
+import yaml
 
 p = importlib.resources.files("anglerfish.config").joinpath("adaptors.yaml")
-with open(p, "r") as stream:
+with open(p) as stream:
     adaptors = yaml.safe_load(stream)
 delim = "-NNN-"
 
@@ -22,7 +22,7 @@ class SampleSheetEntry:
     ont_barcode: str
 
 
-class Adaptor(object):
+class Adaptor:
     def __init__(self, adaptor, i7_index=None, i5_index=None):
         self.i5 = adaptors[adaptor]["i5"]
         self.i7 = adaptors[adaptor]["i7"]
@@ -48,7 +48,7 @@ def get_i7_mask(self):
             return self.i7
 
 
-class SampleSheet(object):
+class SampleSheet:
     def __init__(self, input_csv, ont_bc):
         # Read samplesheet in format:
         # sample_name, adaptors, i7_index(-i5_index), fastq_path
@@ -56,7 +56,7 @@ def __init__(self, input_csv, ont_bc):
 
         self.samplesheet = []
         try:
-            csvfile = open(input_csv, "r")
+            csvfile = open(input_csv)
             dialect = csv.Sniffer().sniff(csvfile.readline(), [",", ";", "\t"])
             csvfile.seek(0)
             data = csv.DictReader(
@@ -104,7 +104,7 @@ def __init__(self, input_csv, ont_bc):
             for a, b in combinations(test_globs.values(), 2):
                 if len(set(a) & set(b)) > 0:
                     raise UserWarning(
-                        f"Fastq paths are inconsistent. Please check samplesheet"
+                        "Fastq paths are inconsistent. Please check samplesheet"
                     )
 
             if not ont_bc and len(set([v[0] for v in test_globs.values()])) > 1:
@@ -163,7 +163,7 @@ def get_fastastring(self, adaptor_name=None):
 
         outstr = ""
         for key, seq in fastas.items():
-            outstr += ">{}\n{}\n".format(key, seq)
+            outstr += f">{key}\n{seq}\n"
 
         return outstr
 
diff --git a/setup.py b/setup.py
index 2c8751a..6408fd5 100644
--- a/setup.py
+++ b/setup.py
@@ -10,9 +10,10 @@
 
     conda install -c bioconda anglerfish
 """
-from setuptools import setup, find_packages
 from pathlib import Path
 
+from setuptools import find_packages, setup
+
 this_directory = Path(__file__).parent
 long_description = (this_directory / "README.md").read_text()
 

From e7cd2a323ac25605ffa9ef9fe2eafbd2409cae0e Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 13:50:12 +0100
Subject: [PATCH 05/15] supress blame of last commit

---
 .git-blame-ignore-revs | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/.git-blame-ignore-revs b/.git-blame-ignore-revs
index 732882a..9556a97 100644
--- a/.git-blame-ignore-revs
+++ b/.git-blame-ignore-revs
@@ -1,2 +1,5 @@
 # 240110, non-invasive, repo-wide formatting with ruff and prettier
 7634a3bd7b150f552f064e7089b4ab160b6f4564
+
+# 240110, safe ruff-fixes
+1d308fb21d4c4d07f1d6a97d0e85d9495db80557
\ No newline at end of file

From 6a4357b716921c2bcc2ad7628cb387741dde752d Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 13:59:16 +0100
Subject: [PATCH 06/15] Apply unsafe fixes, allow ambiguous variable names

---
 anglerfish/anglerfish.py  | 6 +++---
 anglerfish/demux/demux.py | 2 +-
 pyproject.toml            | 4 ++++
 3 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/anglerfish/anglerfish.py b/anglerfish/anglerfish.py
index fb92667..c79051f 100755
--- a/anglerfish/anglerfish.py
+++ b/anglerfish/anglerfish.py
@@ -37,7 +37,7 @@ def run_demux(args):
     log.info(f" arguments {vars(args)}")
     log.info(f" run uuid {run_uuid}")
     bc_dist = ss.minimum_bc_distance()
-    if args.max_distance == None:
+    if args.max_distance is None:
         if bc_dist > 1:
             args.max_distance = 2
         else:
@@ -75,7 +75,7 @@ def run_demux(args):
         with open(adaptor_path, "w") as f:
             f.write(ss.get_fastastring(adaptor_name))
         for fq in fastq_files:
-            retcode = run_minimap2(fq, adaptor_path, aln_path, args.threads)
+            run_minimap2(fq, adaptor_path, aln_path, args.threads)
 
         # Easy line count in input fastq files
         num_fq = 0
@@ -181,7 +181,7 @@ def run_demux(args):
 
         # Top unmatched indexes
         nomatch_count = Counter([x[3] for x in no_matches])
-        if args.max_unknowns == None:
+        if args.max_unknowns is None:
             args.max_unknowns = len([sample for sample in ss]) + 10
         report.add_unmatched_stat(
             nomatch_count.most_common(args.max_unknowns), ont_barcode, adaptor_name
diff --git a/anglerfish/demux/demux.py b/anglerfish/demux/demux.py
index 840b8f0..4d2ecb9 100644
--- a/anglerfish/demux/demux.py
+++ b/anglerfish/demux/demux.py
@@ -47,7 +47,7 @@ def run_minimap2(fastq_in, indexfile, output_paf, threads):
     ]
 
     with open(output_paf, "ab") as ofile:
-        proc = subprocess.run(cmd, stdout=ofile, check=True)
+        subprocess.run(cmd, stdout=ofile, check=True)
 
 
 def parse_paf_lines(paf, min_qual=10):
diff --git a/pyproject.toml b/pyproject.toml
index b3bc5ba..8ba7254 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -17,6 +17,10 @@ select =[
     "UP",   # pyupgrade     Make sure syntax is up-to-date
 ]
 
+ignore = [
+    "E741", # ambiguous variable name
+]
+
 [tool.mypy]
 ignore_missing_imports = true
 follow_imports = 'skip'

From 4aafe91c49709a9f11634e0d38345bbf77913824 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 14:12:30 +0100
Subject: [PATCH 07/15] manual fixes + update gha config

---
 .github/workflows/lint-code.yml | 36 ---------------------------------
 anglerfish/__main__.py          |  2 +-
 anglerfish/demux/report.py      |  2 +-
 anglerfish/demux/samplesheet.py |  2 ++
 4 files changed, 4 insertions(+), 38 deletions(-)

diff --git a/.github/workflows/lint-code.yml b/.github/workflows/lint-code.yml
index c73256e..572ff68 100644
--- a/.github/workflows/lint-code.yml
+++ b/.github/workflows/lint-code.yml
@@ -63,42 +63,6 @@ jobs:
         # Configured in pyprojet.toml
         run: mypy **/*.py
 
-  # Use pipreqs to check for missing dependencies
-  pipreqs-check:
-    runs-on: ubuntu-latest
-    steps:
-      - name: Checkout repository
-        uses: actions/checkout@v4
-      - name: Set up Python
-        uses: actions/setup-python@v4
-        with:
-          python-version: "3.10"
-
-      - name: Install pipreqs
-        run: pip install pipreqs
-
-      - name: Install requirements
-        run: pip install -r requirements.txt
-
-      - name: Run pipreqs
-        run: pipreqs --savepath pipreqs.txt
-
-      - name: Compare requirements
-        run: |
-          # Extract and sort package names
-          awk '{print $1}' $1 | sort -u > "$1".compare
-          awk -F'==' '{print $1}' $2 | sort -u > "$2".compare
-
-          # Compare package lists
-          if cmp -s "$1".compare "$2".compare
-          then
-            echo "Requirements are the same"
-            exit 0
-          else
-            echo "Requirements are different"
-            exit 1
-          fi
-
     # Use Prettier to check various file formats
   prettier:
     runs-on: ubuntu-latest
diff --git a/anglerfish/__main__.py b/anglerfish/__main__.py
index c11e37d..72a9288 100644
--- a/anglerfish/__main__.py
+++ b/anglerfish/__main__.py
@@ -1,4 +1,4 @@
-from . import anglerfish
+from .anglerfish import anglerfish
 
 if __name__ == "__main__":
     anglerfish()
diff --git a/anglerfish/demux/report.py b/anglerfish/demux/report.py
index 52ff8a7..cb70cce 100644
--- a/anglerfish/demux/report.py
+++ b/anglerfish/demux/report.py
@@ -150,7 +150,7 @@ class SampleStat:
     std_read_len: float
     i7_reversed: bool
     i5_reversed: bool
-    ont_barcode: str = None
+    ont_barcode: str|None = None
     header: ClassVar[list] = [
         "sample_name",
         "#reads",  # We specify this for historical reasons
diff --git a/anglerfish/demux/samplesheet.py b/anglerfish/demux/samplesheet.py
index 35713a8..ed0c2d6 100644
--- a/anglerfish/demux/samplesheet.py
+++ b/anglerfish/demux/samplesheet.py
@@ -1,6 +1,7 @@
 import csv
 import glob
 import importlib.resources
+import os
 import re
 from dataclasses import dataclass
 from itertools import combinations
@@ -9,6 +10,7 @@
 import yaml
 
 p = importlib.resources.files("anglerfish.config").joinpath("adaptors.yaml")
+assert isinstance(p, os.PathLike)
 with open(p) as stream:
     adaptors = yaml.safe_load(stream)
 delim = "-NNN-"

From 6bd5cdc8907f9a8b9c723146c0c891e04573b307 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 14:14:16 +0100
Subject: [PATCH 08/15] update gitignore

---
 .gitignore | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/.gitignore b/.gitignore
index b2df197..3aeabc2 100755
--- a/.gitignore
+++ b/.gitignore
@@ -2,3 +2,5 @@
 *~
 *.egg-info
 .DS_Store
+.benchmarks
+.*_cache
\ No newline at end of file

From eed9f879a8398a0a5033be2afe96627c3e975a45 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 15:50:23 +0100
Subject: [PATCH 09/15] document dev requirements and tweak formatting

---
 anglerfish/demux/report.py | 2 +-
 requirements-dev-conda.txt | 2 ++
 requirements-dev-pip.txt   | 3 +++
 3 files changed, 6 insertions(+), 1 deletion(-)
 create mode 100644 requirements-dev-conda.txt
 create mode 100644 requirements-dev-pip.txt

diff --git a/anglerfish/demux/report.py b/anglerfish/demux/report.py
index cb70cce..03dba2c 100644
--- a/anglerfish/demux/report.py
+++ b/anglerfish/demux/report.py
@@ -150,7 +150,7 @@ class SampleStat:
     std_read_len: float
     i7_reversed: bool
     i5_reversed: bool
-    ont_barcode: str|None = None
+    ont_barcode: str | None = None
     header: ClassVar[list] = [
         "sample_name",
         "#reads",  # We specify this for historical reasons
diff --git a/requirements-dev-conda.txt b/requirements-dev-conda.txt
new file mode 100644
index 0000000..1a04d6a
--- /dev/null
+++ b/requirements-dev-conda.txt
@@ -0,0 +1,2 @@
+conda-forge::pre-commit
+conda-forge::prettier
diff --git a/requirements-dev-pip.txt b/requirements-dev-pip.txt
new file mode 100644
index 0000000..faed137
--- /dev/null
+++ b/requirements-dev-pip.txt
@@ -0,0 +1,3 @@
+ruff
+mypy
+editorconfig-checker

From 4606cf2c911d81ce517de4138514bc5dc2aeb794 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 16:06:46 +0100
Subject: [PATCH 10/15] supplement readme

---
 README.md | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/README.md b/README.md
index 2a4867d..2d1304a 100644
--- a/README.md
+++ b/README.md
@@ -55,6 +55,13 @@ conda env create -f environment.yml
 pip install -e .
 ```
 
+### Developer tools
+
+```
+conda install --file requirements-dev-conda.txt
+pip install -r requirements-dev-pip.txt
+```
+
 ### Development version
 
 ```

From 4837509902166076b1c7b258132e0bc0339f5c0e Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 16:07:31 +0100
Subject: [PATCH 11/15] add pre-commit cache dir to gitignore

---
 .gitignore | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 3aeabc2..0771c71 100755
--- a/.gitignore
+++ b/.gitignore
@@ -3,4 +3,5 @@
 *.egg-info
 .DS_Store
 .benchmarks
-.*_cache
\ No newline at end of file
+.*_cache
+node_modules

From b950721cbf1ff82e7289d8a72789496435eb6bc5 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 16:25:38 +0100
Subject: [PATCH 12/15] Update reademe, make .vscode settings optional for
 contributors by including it in .gitignore

---
 .gitignore |  1 +
 README.md  | 32 +++++++++++++++++++++++++++-----
 2 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/.gitignore b/.gitignore
index 0771c71..86637f5 100755
--- a/.gitignore
+++ b/.gitignore
@@ -5,3 +5,4 @@
 .benchmarks
 .*_cache
 node_modules
+.vscode
\ No newline at end of file
diff --git a/README.md b/README.md
index 2d1304a..9093cb1 100644
--- a/README.md
+++ b/README.md
@@ -42,9 +42,17 @@ pip install bio-anglerfish
 conda install -c bioconda anglerfish
 ```
 
-### Manually with Conda
+### Install development version
 
-First [install miniconda](https://docs.conda.io/en/latest/miniconda.html), then:
+```
+pip install --upgrade --force-reinstall git+https://github.com/remiolsen/anglerfish.git
+```
+
+## Source development
+
+1. [Install miniconda](https://docs.conda.io/en/latest/miniconda.html).
+
+2. Set up repo clone with editable install
 
 ```
 git clone https://github.com/remiolsen/anglerfish.git
@@ -52,20 +60,34 @@ cd anglerfish
 # Create a the anglerfish conda environment
 conda env create -f environment.yml
 # Install anglerfish
+conda activate anglerfish
 pip install -e .
 ```
 
-### Developer tools
+3. Install developer tools
 
 ```
 conda install --file requirements-dev-conda.txt
 pip install -r requirements-dev-pip.txt
 ```
 
-### Development version
+4. (Optional) Install pre-commit to prevent committing code that will fail linting
 
 ```
-pip install --upgrade --force-reinstall git+https://github.com/remiolsen/anglerfish.git
+pre-commit install
+```
+
+5. (Optional) Enable automatic formatting in VS Code by creating `.vscode/settings.json` with:
+
+```
+{
+  "editor.formatOnSave": true,
+  "editor.defaultFormatter": "esbenp.prettier-vscode",
+  "[python]": {
+    "editor.defaultFormatter": "charliermarsh.ruff"
+  },
+  "prettier.configPath": "./pyproject.toml"
+}
 ```
 
 ## Usage

From 22d116d52c980288b653946d40c3ae8fc5e365d1 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 16:27:45 +0100
Subject: [PATCH 13/15] newline

---
 .gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitignore b/.gitignore
index 86637f5..9c7393b 100755
--- a/.gitignore
+++ b/.gitignore
@@ -5,4 +5,4 @@
 .benchmarks
 .*_cache
 node_modules
-.vscode
\ No newline at end of file
+.vscode

From 2547706d40c4b928322112b460a42613fb971b1f Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Wed, 10 Jan 2024 16:30:05 +0100
Subject: [PATCH 14/15] remove from index

---
 .vscode/settings.json | 8 --------
 1 file changed, 8 deletions(-)
 delete mode 100644 .vscode/settings.json

diff --git a/.vscode/settings.json b/.vscode/settings.json
deleted file mode 100644
index 6e4306d..0000000
--- a/.vscode/settings.json
+++ /dev/null
@@ -1,8 +0,0 @@
-{
-  "editor.formatOnSave": true,
-  "editor.defaultFormatter": "esbenp.prettier-vscode",
-  "[python]": {
-    "editor.defaultFormatter": "charliermarsh.ruff"
-  },
-  "prettier.configPath": "./pyproject.toml"
-}

From 9115460372dc461346607a0ef12ce4f210cfe9e5 Mon Sep 17 00:00:00 2001
From: kedhammar <alfred.kedhammar@scilifelab.se>
Date: Thu, 11 Jan 2024 11:53:25 +0100
Subject: [PATCH 15/15] ruff

---
 setup.py | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/setup.py b/setup.py
index d5a7814..c8636a1 100644
--- a/setup.py
+++ b/setup.py
@@ -17,26 +17,26 @@
 this_directory = Path(__file__).parent
 long_description = (this_directory / "README.md").read_text()
 
-version='0.6.1-dev'
+version = "0.6.1-dev"
 
 setup(
     name="bio-anglerfish",
     version=version,
     description="Anglerfish, a tool to demultiplex Illumina libraries from ONT data",
     long_description=long_description,
-    long_description_content_type='text/markdown',
-    author='Remi-Andre Olsen',
-    author_email='remi-andre.olsen@scilifelab.se',
-    url='https://github.com/remiolsen/anglerfish',
-    license='MIT',
+    long_description_content_type="text/markdown",
+    author="Remi-Andre Olsen",
+    author_email="remi-andre.olsen@scilifelab.se",
+    url="https://github.com/remiolsen/anglerfish",
+    license="MIT",
     python_requires=">=3.10",
-    packages = find_packages(),
-    package_data = {"":["config/adaptors.yaml"]},
+    packages=find_packages(),
+    package_data={"": ["config/adaptors.yaml"]},
     install_requires=[
-        'python-levenshtein==0.23.0',
-        'biopython==1.79',
-        'numpy>=1.22.0',
-        'pyyaml==6.0'
+        "python-levenshtein==0.23.0",
+        "biopython==1.79",
+        "numpy>=1.22.0",
+        "pyyaml==6.0",
     ],
     entry_points={
         "console_scripts": [