Skip to content

Commit

Permalink
Replace IVA by Haploflow
Browse files Browse the repository at this point in the history
This change does not handle `merged_contigs_csv`.
  • Loading branch information
Donaim committed Nov 6, 2024
1 parent d3139ae commit e229317
Show file tree
Hide file tree
Showing 6 changed files with 58 additions and 103 deletions.
29 changes: 9 additions & 20 deletions .github/workflows/build-and-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,27 +15,16 @@ jobs:
- name: Run apt update
run: sudo apt-get update

- name: Install IVA assembler dependencies
- name: Install Haploflow
run: |
sudo apt-get install -qq zlib1g-dev libncurses5-dev libncursesw5-dev mummer ncbi-blast+
cd ~/bin
wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc
wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc_dump
# Server doesn't support HTTPS, so check for changed files.
echo "\
db1935884aec2d23d4d623ff85eb4eae8d7a946c9ee0c33ea1818215c40d3099 kmc
34a97db2dab5fdae0276d2589c940142813e9cd87ae10e5e2dd37ed3545b4436 kmc_dump" | sha256sum --check
chmod +x kmc kmc_dump
wget -q https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2
tar -xf samtools-1.3.1.tar.bz2 --no-same-owner --bzip2
cd samtools-1.3.1
./configure --prefix=$HOME
make
make install
cd ~
wget -q https://downloads.sourceforge.net/project/smalt/smalt-0.7.6-bin.tar.gz
tar -xzf smalt-0.7.6-bin.tar.gz
ln -s ~/smalt-0.7.6-bin/smalt_x86_64 ~/bin/smalt
sudo apt-get update
sudo apt-get install -y build-essential git ronn
cd /opt/
git clone https://github.com/hzi-bifo/Haploflow
cd Haploflow
git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50
sh build.sh
sudo ln -s /opt/Haploflow/build/haploflow ~/bin/haploflow
- name: Install Rust and merge-mates
run: |
Expand Down
37 changes: 9 additions & 28 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -54,34 +54,15 @@ RUN wget -q -O bowtie2.zip https://github.com/BenLangmead/bowtie2/releases/downl

ENV PATH $PATH:/opt/bowtie2

## Installing IVA dependencies
RUN apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev && \
cd /bin && \
wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc && \
wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc_dump && \
chmod +x kmc kmc_dump && \
cd /opt && \
wget -q https://sourceforge.net/projects/mummer/files/mummer/3.23/MUMmer3.23.tar.gz && \
tar -xzf MUMmer3.23.tar.gz --no-same-owner && \
cd MUMmer3.23 && \
make --quiet install && \
rm -r docs src ../MUMmer3.23.tar.gz && \
ln -s /opt/MUMmer3.23/nucmer \
/opt/MUMmer3.23/delta-filter \
/opt/MUMmer3.23/show-coords \
/bin && \
cd /opt && \
wget -q https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2 && \
tar -xf samtools-1.3.1.tar.bz2 --no-same-owner --bzip2 && \
cd samtools-1.3.1 && \
./configure --quiet --prefix=/ && \
make --quiet && \
make --quiet install && \
cd /opt && \
rm -rf samtools-1.3.1* && \
wget -q http://downloads.sourceforge.net/project/smalt/smalt-0.7.6-bin.tar.gz && \
tar -xzf smalt-0.7.6-bin.tar.gz --no-same-owner && \
ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt
## Install Haploflow
RUN apt-get update && \
apt-get install -y build-essential sudo git ronn cmake && \
cd /opt/ && \
git clone https://github.com/hzi-bifo/Haploflow && \
cd Haploflow && \
git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50 && \
yes | sh build.sh && \
ln -s /opt/Haploflow/build/haploflow /bin/haploflow

## Install dependencies for genetracks/drawsvg
RUN apt-get install -q -y libcairo2-dev
Expand Down
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ Requests is distributed under the Apache 2.0 license.

Python 3 is distributed under the [Python 3 license][python].

Bowtie2, IVA, and Python-Levenshtein are distributed under the GNU General
Bowtie2, Haploflow, and Python-Levenshtein are distributed under the GNU General
Public License (GPL).

Matplotlib is distributed under the [Matplotlib license][matplotlib].
Expand Down
35 changes: 7 additions & 28 deletions Singularity
Original file line number Diff line number Diff line change
Expand Up @@ -62,34 +62,13 @@ From: python:3.11
ln -s /opt/bowtie2-2.2.8/ /opt/bowtie2
rm bowtie2.zip

echo ===== Installing IVA dependencies ===== >/dev/null
apt-get install -q -y zlib1g-dev libncurses5-dev libncursesw5-dev
cd /bin
wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc
wget -q http://sun.aei.polsl.pl/kmc/download-2.1.1/linux/kmc_dump
chmod +x kmc kmc_dump
cd /opt
wget -q https://sourceforge.net/projects/mummer/files/mummer/3.23/MUMmer3.23.tar.gz
tar -xzf MUMmer3.23.tar.gz --no-same-owner
cd MUMmer3.23
make --quiet install
rm -r docs src ../MUMmer3.23.tar.gz
ln -s /opt/MUMmer3.23/nucmer \
/opt/MUMmer3.23/delta-filter \
/opt/MUMmer3.23/show-coords \
/bin
cd /opt
wget -q https://github.com/samtools/samtools/releases/download/1.3.1/samtools-1.3.1.tar.bz2
tar -xf samtools-1.3.1.tar.bz2 --no-same-owner --bzip2
cd samtools-1.3.1
./configure --quiet --prefix=/
make --quiet
make --quiet install
cd /opt
rm -rf samtools-1.3.1*
wget -q http://downloads.sourceforge.net/project/smalt/smalt-0.7.6-bin.tar.gz
tar -xzf smalt-0.7.6-bin.tar.gz --no-same-owner
ln -s /opt/smalt-0.7.6-bin/smalt_x86_64 /bin/smalt
echo ===== Installing Haploflow ===== >/dev/null
apt-get install -q -y libboost-all-dev build-essential sudo git ronn cmake
cd /opt/
git clone https://github.com/hzi-bifo/Haploflow
cd Haploflow
git checkout 9a5a0ff6c3a0435e723e41f98fe82ec2ad19cf50
yes | sh build.sh

echo ===== Installing Python packages ===== >/dev/null
# Install dependencies for genetracks/drawsvg
Expand Down
53 changes: 31 additions & 22 deletions micall/core/denovo.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,20 +2,15 @@
import logging
import os
from typing import Optional, TextIO, cast, BinaryIO
from csv import DictReader
from datetime import datetime
from glob import glob
from shutil import rmtree, copyfileobj
from subprocess import PIPE, CalledProcessError, STDOUT
import subprocess
from tempfile import mkdtemp

from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord


IVA = "iva"
HAPLOFLOW = "haploflow"
logger = logging.getLogger(__name__)


Expand All @@ -40,6 +35,10 @@ def denovo(fastq1_path: str,
amplicon reads
"""

if merged_contigs_csv is not None:
# TODO: implement this.
logger.error("Haploflow implementation does not support contig extensions yet.")

old_tmp_dirs = glob(os.path.join(work_dir, 'assembly_*'))
for old_tmp_dir in old_tmp_dirs:
rmtree(old_tmp_dir, ignore_errors=True)
Expand All @@ -55,26 +54,36 @@ def denovo(fastq1_path: str,
'--interleave',
'-o', joined_path],
check=True)
iva_out_path = os.path.join(tmp_dir, 'iva_out')
contigs_fasta_path = os.path.join(iva_out_path, 'contigs.fasta')
iva_args = [IVA, '--fr', joined_path, '-t', '2']
if merged_contigs_csv is not None:
seeds_fasta_path = os.path.join(tmp_dir, 'seeds.fasta')
with open(seeds_fasta_path, 'w') as seeds_fasta:
SeqIO.write((SeqRecord(Seq(row['contig']), f'seed-{i}', '', '')
for i, row in enumerate(DictReader(merged_contigs_csv))),
seeds_fasta,
'fasta')
seeds_size = seeds_fasta.tell()
if seeds_size > 0:
iva_args.extend(['--contigs', seeds_fasta_path, '--make_new_seeds'])
iva_args.append(iva_out_path)

haplo_args = {'long': 0,
'filter': 500,
'thres': -1,
'strict': 5,
'error': 0.02,
'kmer': 41,
'merge': False,
'scaffold': False,
'patch': False,
'ref': None,
'RP': False,
}
assembly_out_path = os.path.join(tmp_dir, 'haplo_out')
contigs_fasta_path = os.path.join(assembly_out_path, 'contigs.fa')
haplo_cmd = [HAPLOFLOW,
'--read-file', joined_path,
'--out', assembly_out_path,
'--k', str(haplo_args['kmer']),
'--error-rate', str(haplo_args['error']),
'--strict', str(haplo_args['strict']),
'--filter', str(haplo_args['filter']),
'--thres', str(haplo_args['thres']),
'--long', str(haplo_args['long'])]
try:
subprocess.run(iva_args, check=True, stdout=PIPE, stderr=STDOUT)
subprocess.run(haplo_cmd, check=True, stdout=PIPE, stderr=STDOUT)
except CalledProcessError as ex:
output = ex.output and ex.output.decode('UTF8')
if output != 'Failed to make first seed. Cannot continue\n':
logger.warning('iva failed to assemble.', exc_info=True)
logger.warning('Haploflow failed to assemble.', exc_info=True)
logger.warning(output)
with open(contigs_fasta_path, 'a'):
pass
Expand Down
5 changes: 1 addition & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -64,10 +64,7 @@ dev = [
"gprof2dot==2024.6.6",
"codecov==2.1.13", # For reporting the code coverage.
]
denovo = [
# Requirements for running De-Novo pipeline (only problematic ones).
"iva @ git+https://github.com/cfe-lab/[email protected]",
]
denovo = []
watcher = [
# Requirements for running the MISEQ_MONITOR.py script
"kiveapi @ git+https://github.com/cfe-lab/[email protected]#egg=kiveapi&subdirectory=api",
Expand Down

0 comments on commit e229317

Please sign in to comment.