Skip to content

Commit

Permalink
Merge pull request #230 from rhysnewell/dev
Browse files Browse the repository at this point in the history
Dev
  • Loading branch information
rhysnewell authored Nov 21, 2024
2 parents 4fc6532 + 28c63a3 commit 3c9b501
Show file tree
Hide file tree
Showing 11 changed files with 68 additions and 68 deletions.
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -10,3 +10,5 @@ aviary_genome.egg-info

example/
test/data/.conda
test/data/wgsim.metaspades.assembly.fna.fai
config.yaml
2 changes: 1 addition & 1 deletion aviary/__init__.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
__version__ = "0.9.2"
__version__ = "0.10.0"


# CONSTANTS
Expand Down
9 changes: 5 additions & 4 deletions aviary/aviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -260,11 +260,13 @@ def main():

base_group.add_argument(
'--build',
help='Build conda environments and then exits. Equivalent to \"--snakemake-cmds \'--conda-create-envs-only True \' \"',
help='Build conda environments necessary to run the pipeline, and then exit. Equivalent to "--snakemake-cmds \'--conda-create-envs-only True \' ". Other inputs should be specified as if running normally so that the right set of conda environments is built.',
type=str2bool,
nargs='?',
const=True,
dest='build',
metavar='yes|no',
default='no',
)

base_group.add_argument(
Expand Down Expand Up @@ -790,13 +792,12 @@ def main():
assemble_group.add_argument(
'--coassemble', '--co-assemble', '--co_assemble',
help='Specifies whether or not, when given multiple input reads, to coassemble them. \n'
'If False, Aviary will use the first set of short reads and first set of long reads to perform assembly \n'
'If False (no), Aviary will use the first set of short reads and first set of long reads to perform assembly \n'
'All read files will still be used during the MAG recovery process for differential coverage.',
type=str2bool,
nargs='?',
const=True,
dest='coassemble',
default=False,
metavar='yes|no',
)

assemble_group.add_argument(
Expand Down
4 changes: 2 additions & 2 deletions aviary/modules/assembly/envs/final_assembly.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,5 +2,5 @@ channels:
- conda-forge
- bioconda
dependencies:
- unicycler = 0.4.8
- samtools = 1.11
- unicycler = 0.5.1
- samtools = 1.21
8 changes: 4 additions & 4 deletions aviary/modules/assembly/envs/spades.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@ channels:
- conda-forge
- bioconda
dependencies:
- python <= 3.9
- spades = 3.15.4
- python = 3.12.0 # Keep things consistent to aid debugging.
- spades = 4.0.0
- megahit = 1.2.9
- pyyaml
- joblib
- pyyaml = 6.0.2
- joblib = 1.4.2
2 changes: 1 addition & 1 deletion aviary/modules/binning/binning.smk
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,7 @@ rule vamb:
config["max_threads"]
resources:
mem_mb = lambda wildcards, attempt: min(int(config["max_memory"])*1024, 128*1024*attempt),
runtime = lambda wildcards, attempt: 24*60*attempt,
runtime = lambda wildcards, attempt: 48*60*attempt,
gpus = 1 if config["request_gpu"] else 0
output:
"data/vamb_bins/done"
Expand Down
3 changes: 2 additions & 1 deletion aviary/modules/binning/scripts/das_tool.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,8 @@
binners = []
for (binner, extension) in unrefined_binners_to_use:
if binner not in snakemake.config['skip_binners']:
binners.append((f'{binner}_bins/', extension, f'data/{binner}_bins.tsv'))
extra = 'bins/' if binner == 'vamb' else ''
binners.append((f'{binner}_bins/'+extra, extension, f'data/{binner}_bins.tsv'))

for (binner, extension) in refined_binners_to_use:
if binner not in snakemake.config['skip_binners']:
Expand Down
13 changes: 11 additions & 2 deletions aviary/modules/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -233,7 +233,6 @@ def __init__(self,
self.pe2 = 'none'
self.short_percent_identity = 'none'


try:
self.kmer_sizes = args.kmer_sizes
self.use_megahit = args.use_megahit
Expand All @@ -246,7 +245,7 @@ def __init__(self,
except AttributeError:
self.kmer_sizes = ['auto']
self.use_megahit = False
self.coassemble = True
self.coassemble = False
self.min_cov_long = 20
self.min_cov_short = 3
self.exclude_contig_cov = 100
Expand Down Expand Up @@ -346,6 +345,16 @@ def make_config(self):

with open(template_conf_file) as template_config:
conf = yaml.load(template_config)

if self.assembly == 'none' or self.assembly is None:
# Check if coassembly or not needs to be specified by the user.
if self.coassemble is None:
if (self.pe1 != 'none' and len(self.pe1) > 1) or \
(self.longreads != 'none' and len(self.longreads) > 1):
logging.error("Multiple readsets detected. Either specify '--coassemble' for coassembly of or '--coassemble no'.")
sys.exit(-1)
if self.coassemble is None:
self.coassemble = False # ensure that something is specified so that the config file is well formed

if self.assembly != "none" and self.assembly is not None:
self.assembly = list(dict.fromkeys([os.path.abspath(p) for p in self.assembly]))
Expand Down
53 changes: 0 additions & 53 deletions config.yaml

This file was deleted.

4 changes: 4 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
[build-system]
# XXX: If your project needs other packages to build properly, add them to this list.
requires = ["setuptools >= 64"]
build-backend = "setuptools.build_meta"
36 changes: 36 additions & 0 deletions test/test_integration.py
Original file line number Diff line number Diff line change
Expand Up @@ -156,6 +156,42 @@ def test_short_read_recovery_fast(self):

self.assertFalse(os.path.isfile(f"{output_dir}/aviary_out/data/final_contigs.fasta"))

def test_short_read_recovery_vamb(self):
output_dir = os.path.join("example", "test_short_read_recovery_vamb")
self.setup_output_dir(output_dir)

# Create inflated assembly file
cmd = f"cat {data}/assembly.fasta > {output_dir}/assembly.fasta"
multiplier = 100
for i in range(multiplier):
cmd += f" && awk '/^>/ {{print $0 \"{i}\"}} !/^>/ {{print $0}}' {data}/assembly.fasta >> {output_dir}/assembly.fasta"

subprocess.run(cmd, shell=True, check=True)

cmd = (
f"aviary recover "
f"--assembly {output_dir}/assembly.fasta "
f"-o {output_dir}/aviary_out "
f"-1 {data}/wgsim.1.fq.gz "
f"-2 {data}/wgsim.2.fq.gz "
f"--binning-only "
f"--skip-binners rosella semibin metabat "
f"--skip-qc "
f"--refinery-max-iterations 0 "
f"--conda-prefix {path_to_conda} "
f"-n 32 -t 32 "
)
subprocess.run(cmd, shell=True, check=True)

bin_info_path = f"{output_dir}/aviary_out/bins/bin_info.tsv"
self.assertTrue(os.path.isfile(bin_info_path))
with open(bin_info_path) as f:
num_lines = sum(1 for _ in f)
self.assertTrue(num_lines > 2)

self.assertFalse(os.path.isfile(f"{output_dir}/aviary_out/data/final_contigs.fasta"))

@unittest.skip("Skipping test due to queue submission")
def test_short_read_recovery_queue_submission(self):
output_dir = os.path.join("example", "test_short_read_recovery_queue_submission")
self.setup_output_dir(output_dir)
Expand Down

0 comments on commit 3c9b501

Please sign in to comment.