Skip to content

Commit

Permalink
fix: batch submission now does not require user to write to script first
Browse files Browse the repository at this point in the history
  • Loading branch information
rhysnewell committed Apr 8, 2024
1 parent 7195d25 commit bfebaf9
Show file tree
Hide file tree
Showing 3 changed files with 35 additions and 15 deletions.
4 changes: 2 additions & 2 deletions aviary/aviary.py
Original file line number Diff line number Diff line change
Expand Up @@ -1078,14 +1078,14 @@ def main():
aviary batch -f batch_file.tsv -t 32 -o batch_test
An example batch file can be found at:
An example batch file can be found at: https://rhysnewell.github.io/aviary/examples
''')

batch_options.add_argument(
'-f', '--batch_file', '--batch-file',
help='The tab or comma separated batch file containing the input samples to assemble and/or recover MAGs from. \n'
'An example batch file can be found at XXX. The heading line is required. \n'
'An example batch file can be found at https://rhysnewell.github.io/aviary/examples. The heading line is required. \n'
'The number of reads provided to each sample is flexible as is the type of assembly being performed (if any). \n'
'Multiple reads can be supplied by providing a comma-separated list (surrounded by double quotes \"\" if using a \n'
'comma separated batch file) within the specific read column.',
Expand Down
43 changes: 30 additions & 13 deletions aviary/modules/processor.py
Original file line number Diff line number Diff line change
Expand Up @@ -468,6 +468,8 @@ def run_workflow(self, cores=16, profile=None, cluster_retries=None,
resources=f"--resources mem_mb={int(self.max_memory)*1024} {self.resources}" if not dryrun else ""
)

logging.debug(f"Command: {cmd}")

if write_to_script is not None:
write_to_script.append(cmd)
continue
Expand Down Expand Up @@ -495,20 +497,31 @@ def process_batch(args, prefix):

logging.info(f"Reading batch file: {args.batch_file}")

header=0
header=None
separator=' '
with open(args.batch_file, mode='r') as check_batch:
for line in check_batch.readlines():
if "sample\tshort_reads_1\tshort_reads_2\tlong_reads\tlong_read_type\tassembly\tcoassemble" in line \
or "sample,short_reads_1,short_reads_2,long_reads,long_read_type,assembly,coassemble" in line \
or "sample short_reads_1 short_reads_2 long_reads long_read_type assembly coassemble" in line \
or "sample short_reads_1 short_reads_2 long_reads long_read_type assembly coassemble" in line:
header=1
logging.debug("Inferred header")
else:
logging.debug("No heading inferred.")
line = line.strip()
for sep in ['\t', ',', ' ']:
separated = line.split(sep)
print(separated)
if separated == ['sample', 'short_reads_1', 'short_reads_2', 'long_reads', 'long_read_type', 'assembly', 'coassemble']:
header=0
separator=sep
logging.debug("Inferred header")
break
elif len(separated) >= 7:
header=None
separator=sep
logging.debug("Inferred no header")
break
if header is None:
logging.debug("No header found")
break

batch = pd.read_csv(args.batch_file, sep=None, engine='python', skiprows=header)
if header is not None:
batch = pd.read_csv(args.batch_file, sep=separator, engine='python', header=header)
else:
batch = pd.read_csv(args.batch_file, sep=separator, engine='python', names=['sample', 'short_reads_1', 'short_reads_2', 'long_reads', 'long_read_type', 'assembly', 'coassemble'])
if len(batch.columns) != 7:
logging.critical(f"Batch file contains incorrect number of columns ({len(batch.columns)}). Should contain 7.")
logging.critical(f"Current columns: {batch.columns}")
Expand All @@ -525,11 +538,15 @@ def process_batch(args, prefix):

try:
script_file = args.write_script
write_to_script = []
except AttributeError:
script_file = None
write_to_script = None

write_to_script = None
if script_file is not None:
write_to_script = []

print(script_file)
print(write_to_script)
runs = []
args.interleaved = "none" # hacky solution to skip attribute error
args.coupled = "none"
Expand Down
3 changes: 3 additions & 0 deletions test/example_batch.tsv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
sample short_reads_1 short_reads_2 long_reads long_read_type assembly coassemble
sample_1 test/data/wgsim.1.fq.gz test/data/wgsim.2.fq.gz NA ont NA NA
sample_2 NA NA test/data/pbsim.fq.gz pacbio NA NA

0 comments on commit bfebaf9

Please sign in to comment.