From 7b589c011e41aca54a5ff66257b273b1f9c7ba99 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 9 Oct 2023 14:47:49 +0100 Subject: [PATCH 01/23] Adding pacbio barcode check --- subworkflows/local/pacbio_barcode_check.nf | 56 ++++++++++++++++++++++ 1 file changed, 56 insertions(+) create mode 100644 subworkflows/local/pacbio_barcode_check.nf diff --git a/subworkflows/local/pacbio_barcode_check.nf b/subworkflows/local/pacbio_barcode_check.nf new file mode 100644 index 00000000..48baf0f9 --- /dev/null +++ b/subworkflows/local/pacbio_barcode_check.nf @@ -0,0 +1,56 @@ +import { CHECK_BARCODE } from '../modules/local/check_barcode' +import { BLAST_MAKEBLASTDB } from '../modules/local/blast/makeblastdb' +import { BLAST_BLASTN } from '../modules/local/blast/blastn' +import { FILTER_BARCODE } from '../modules/local/filter_barcode' + +workflow PACBIO_BARCODE_CHECK () { + take: + reference_tuple + pacbio_tuple + barcode_file + barcode_multiplex + + main: + ch_versions = Channel.empty() + + // + // MODULE: + // + CHECK_BARCODE ( + pacbio_tuple + barcode_file, + barcode_multiplex + ) + ch_versions = ch_versions.mix(CHECK_BARCODE.out.versions) + + // + // MODULE: GENERATE BLAST DB ON ORGANELLAR GENOME + // + BLAST_MAKEBLASTDB ( + barcode_file + ) + ch_versions = ch_versions.mix(BLAST_MAKEBLASTDB.out.versions) + + // + // MODULE: RUN BLAST WITH GENOME AGAINST ORGANELLAR GENOME + // + BLAST_BLASTN ( + reference_tuple, + BLAST_MAKEBLASTDB.out.db + ) + ch_versions = ch_versions.mix(BLAST_BLASTN.out.versions) + + // + // LOGIC: FOR I IN CSV LIST RUN FILTER BLAST + // + // TODO: CLAFFIFY THIS BIT + + FILTER_BARCODE ( + reference_tuple, + //i in csv + ) + ch_versions = ch_versions.mix(FILTER_BARCODE.out.versions) + + emit: + filtered = FILTER_BARCODE.out.debarcoded +} From 9fc3a4ce3be34a1f1e4d0e1cf63d00616c0961f6 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 9 Oct 2023 14:48:07 +0100 Subject: [PATCH 02/23] Updating modules --- modules/local/check_barcode.nf | 4 ++-- modules/local/filter_barcode.nf | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/local/check_barcode.nf b/modules/local/check_barcode.nf index b02ce59f..675f9ccf 100644 --- a/modules/local/check_barcode.nf +++ b/modules/local/check_barcode.nf @@ -8,8 +8,8 @@ process CHECK_BARCODE { 'quay.io/biocontainers/pandas:1.5.2' }" input: - tuple val(meta) , path(barcodes) - tuple val(meta2) , path(pacbio_dir) + tuple val(meta) , path(pacbio_dir) + tuple val(meta2) , path(barcodes) tuple val(meta3) , path(multiplex_csv) output: diff --git a/modules/local/filter_barcode.nf b/modules/local/filter_barcode.nf index 0b2c90d6..fed8be93 100644 --- a/modules/local/filter_barcode.nf +++ b/modules/local/filter_barcode.nf @@ -8,7 +8,7 @@ process FILTER_BARCODE { 'quay.io/biocontainers/pandas:1.5.2' }" input: - tuple val(meta), path(fasta) + tuple val(meta) , path(fasta) tuple val(meta2), path(barcodes) output: From 98e51f902d4cdd8a47cfbd28337cfadd44eef8fb Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 9 Oct 2023 15:54:12 +0100 Subject: [PATCH 03/23] Adding note --- subworkflows/local/pacbio_barcode_check.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/subworkflows/local/pacbio_barcode_check.nf b/subworkflows/local/pacbio_barcode_check.nf index 48baf0f9..90e5d56d 100644 --- a/subworkflows/local/pacbio_barcode_check.nf +++ b/subworkflows/local/pacbio_barcode_check.nf @@ -14,7 +14,7 @@ workflow PACBIO_BARCODE_CHECK () { ch_versions = Channel.empty() // - // MODULE: + // MODULE: CHECK FOR KNOWN BARCODES IN SAMPLE DATA // CHECK_BARCODE ( pacbio_tuple From d89e707304397450c8959887c71b5f320c84751b Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Oct 2023 14:21:38 +0100 Subject: [PATCH 04/23] Update files --- assets/test.yaml | 3 ++- subworkflows/local/yaml_input.nf | 4 +++- workflows/ascc.nf | 13 ++++++++++++- 3 files changed, 17 insertions(+), 3 deletions(-) diff --git a/assets/test.yaml b/assets/test.yaml index 9419b091..da60f5f0 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,5 +1,6 @@ -assembly_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa +assembly_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/genomic.fna assembly_title: asccTinyTest +pacbio_barcodes: '' pacbio_multiplexing_barcode_names: something pacbio_reads_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/pacbio/ sci_name: "Plasmodium yoelii yoelii 17XNL" diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index b4891b09..cbd99234 100644 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -22,6 +22,7 @@ workflow YAML_INPUT { assembly_title: ( data.assembly_title ) pacbio_reads: ( data.pacbio_reads_path ) assembly_path: ( file(data.assembly_path) ) + pacbio_barcodes: ( data.pacbio_barcodes ) pacbio_multiplexing_barcode_names: ( data.pacbio_multiplexing_barcode_names) sci_name: ( data.sci_name ) taxid: ( data.taxid ) @@ -34,7 +35,7 @@ workflow YAML_INPUT { ncbi_taxonomy_path: ( data.ncbi_taxonomy_path ) ncbi_rankedlineage_path: ( data.ncbi_rankedlineage_path ) busco_lineages_folder: ( data.busco_lineages_folder ) - seqkit_values : ( data.seqkit ) + seqkit_values: ( data.seqkit ) } .set{ group } @@ -50,6 +51,7 @@ workflow YAML_INPUT { emit: pacbio_reads = group.pacbio_reads + pacbio_multiplex_codes = group.pacbio_multiplexing_barcode_names reference = group.assembly_path assembly_title = group.assembly_title taxid = group.taxid diff --git a/workflows/ascc.nf b/workflows/ascc.nf index f7b07734..83f17bfd 100644 --- a/workflows/ascc.nf +++ b/workflows/ascc.nf @@ -25,6 +25,7 @@ include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' include { EXTRACT_TIARA_HITS } from '../subworkflows/local/extract_tiara_hits' include { EXTRACT_NT_BLAST } from '../subworkflows/local/extract_nt_blast' include { RUN_FCSADAPTOR } from '../subworkflows/local/run_fcsadaptor' +include { PACBIO_BARCODE_CHECK } from '../subworkflows/local/pacbio_barcode_check' // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -115,7 +116,17 @@ workflow ASCC { RUN_FCSADAPTOR ( GENERATE_GENOME.out.reference_tuple ) - ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions) + ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions) + + // + // + // + PACBIO_BARCODE_CHECK ( + YAML_INPUT.out.reference_tuple, + YAML_INPUT.out.pacbio_tuple, + YAML_INPUT.out.pacbio_barcodes + YAML_INPUT.out.pacbio_multiplex_codes, + ) // // SUBWORKFLOW: COLLECT SOFTWARE VERSIONS From 9ed9bb54ca761aae7d7062ed9fe878ce8ee6622d Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Oct 2023 14:42:01 +0100 Subject: [PATCH 05/23] Updates --- subworkflows/local/pacbio_barcode_check.nf | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/pacbio_barcode_check.nf b/subworkflows/local/pacbio_barcode_check.nf index 90e5d56d..214584aa 100644 --- a/subworkflows/local/pacbio_barcode_check.nf +++ b/subworkflows/local/pacbio_barcode_check.nf @@ -13,12 +13,22 @@ workflow PACBIO_BARCODE_CHECK () { main: ch_versions = Channel.empty() + if (barcode_file.isEmpty("YES") == "YES") { + Channel + .fromPath("./assets/pacbio_adaptors.fa") + .set { barcodes } + } else { + Channel + .fromPath(barcode_file) + .set { barcodes } + } + // // MODULE: CHECK FOR KNOWN BARCODES IN SAMPLE DATA // CHECK_BARCODE ( pacbio_tuple - barcode_file, + barcodes, barcode_multiplex ) ch_versions = ch_versions.mix(CHECK_BARCODE.out.versions) @@ -27,7 +37,7 @@ workflow PACBIO_BARCODE_CHECK () { // MODULE: GENERATE BLAST DB ON ORGANELLAR GENOME // BLAST_MAKEBLASTDB ( - barcode_file + barcodes ) ch_versions = ch_versions.mix(BLAST_MAKEBLASTDB.out.versions) From 1dc91c75b83dcad79e058fae5357603c5640a6e7 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Tue, 10 Oct 2023 14:56:41 +0100 Subject: [PATCH 06/23] Correction to channel from dev --- subworkflows/local/pacbio_barcode_check.nf | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/subworkflows/local/pacbio_barcode_check.nf b/subworkflows/local/pacbio_barcode_check.nf index 214584aa..614dde9f 100644 --- a/subworkflows/local/pacbio_barcode_check.nf +++ b/subworkflows/local/pacbio_barcode_check.nf @@ -12,17 +12,31 @@ workflow PACBIO_BARCODE_CHECK () { main: ch_versions = Channel.empty() + barcodes = Channel.empty() if (barcode_file.isEmpty("YES") == "YES") { Channel .fromPath("./assets/pacbio_adaptors.fa") + .map { it -> + tuple( [id: "pacbio_barcodes"], + it + ) + } .set { barcodes } } else { Channel .fromPath(barcode_file) + .map { it -> + tuple( [id: "pacbio_barcodes"], + it + ) + } .set { barcodes } } + + barcodes.view() + // // MODULE: CHECK FOR KNOWN BARCODES IN SAMPLE DATA // From fde4531e95e4c3d4fc32a9186a3c4eaf4aa4bbfc Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Oct 2023 16:22:03 +0100 Subject: [PATCH 07/23] Completing Pacbio Check --- assets/test.yaml | 4 +- bin/pacbio_barcode_check.py | 54 ++++++------ conf/modules.config | 4 + modules/local/check_barcode.nf | 23 +++--- modules/local/filter_barcode.nf | 28 +++---- subworkflows/local/extract_nt_blast.nf | 1 - subworkflows/local/extract_tiara_hits.nf | 4 +- subworkflows/local/generate_genome.nf | 1 + subworkflows/local/pacbio_barcode_check.nf | 96 +++++++++++++--------- subworkflows/local/yaml_input.nf | 11 ++- workflows/ascc.nf | 30 +++---- 11 files changed, 147 insertions(+), 109 deletions(-) diff --git a/assets/test.yaml b/assets/test.yaml index 3a40aa3c..b7ebdece 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,7 +1,7 @@ assembly_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/genomic.fna assembly_title: asccTinyTest -pacbio_barcodes: '' -pacbio_multiplexing_barcode_names: something +pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa +pacbio_multiplexing_barcode_names: "bc1008_BAK8A_OA,bc1009_BAK8A_OA" pacbio_reads_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/pacbio/ sci_name: "Plasmodium yoelii yoelii 17XNL" taxid: 352914 diff --git a/bin/pacbio_barcode_check.py b/bin/pacbio_barcode_check.py index 243b2e45..17480bc8 100755 --- a/bin/pacbio_barcode_check.py +++ b/bin/pacbio_barcode_check.py @@ -1,3 +1,5 @@ +#!/usr/bin/env python3 + """ Notes: Forces sys.exit(1) to kill pipeline @@ -21,7 +23,9 @@ def detect_barcodes_from_read_file_names(barcodes_fasta_path, pacbio_read_files) barcodes_fasta_data = gpf.l(barcodes_fasta_path) barcode_names = [n.split(">")[1] for n in barcodes_fasta_data if n.startswith(">")] if len(barcode_names) == 0: - print("NO BARCODES, KILL PIPELINE") + sys.stderr.write( + f"Failed to read PacBio multiplexing barcode names from the specified file {barcodes_fasta_data}\n" + ) sys.exit(1) detected_barcodes = list() for barcode_name in barcode_names: @@ -40,44 +44,48 @@ def check_if_barcodes_exist_in_barcodes_fasta(barcodes_list, barcodes_fasta_path barcode_names_in_fasta = [n.split(">")[1] for n in barcodes_fasta_data if n.startswith(">")] for barcode in barcodes_list: if barcode not in barcode_names_in_fasta: - # sys.stderr.write(f"The PacBio multiplexing barcode ({barcode}) was not found in the barcode sequences file ({barcodes_fasta_path})\n") - print("NO BARCODES, KILL PIPELINE") + sys.stderr.write( + f"The PacBio multiplexing barcode ({barcode}) was not found in the barcode sequences file ({barcodes_fasta_path})\n" + ) sys.exit(1) + # If this print statement is reached, all user-supplied codes are present. + print("BARCODES FOUND\n") + def main(barcodes_fasta_path, pacbio_read_files, pacbio_multiplexing_barcode_names): pacbio_read_files = pacbio_read_files.split(",") barcodes_list = [] - if pacbio_multiplexing_barcode_names != "NA": - barcodes_list = pacbio_multiplexing_barcode_names.split(",") - - current_script_dir = os.path.dirname(sys.argv[0]) + if len(pacbio_multiplexing_barcode_names) > 0: + barcodes_list = pacbio_multiplexing_barcode_names.strip("[").strip("]").split(",") - if barcodes_fasta_path is None: - barcodes_fasta_path = f"{current_script_dir}/third_party_files/pacbio_barcode_screen/pacbio_adaptors.fa" - else: - if os.path.isfile(barcodes_fasta_path) is False: - print("NO BARCODES, KILL PIPELINE") - sys.exit(1) + if os.path.isfile(barcodes_fasta_path) is False: + sys.stderr.write( + "FASTA file with PacBio multiplexing barcode sequences ({barcodes_fasta_path}) was not found\n" + ) + sys.exit(1) - if barcodes_list == []: + if len(barcodes_list) == 0: barcodes_list = detect_barcodes_from_read_file_names(barcodes_fasta_path, pacbio_read_files) + if len(barcodes_list) == 0: - print("NO BARCODES, KILL PIPELINE") - sys.exit(1) + sys.stderr.write( + "Skipping the PacBio barcodes check, as no barcodes were specified by the user and no barcodes were found in PacBio read file names\n" + ) + sys.exit(0) - check_if_barcodes_exist_in_barcodes_fasta( - barcodes_list, barcodes_fasta_path - ) # This is a TRUE | FALSE check, if FALSE kill pipeline. - print("BARCODES FOUND!") + check_if_barcodes_exist_in_barcodes_fasta(barcodes_list, barcodes_fasta_path) if __name__ == "__main__": parser = argparse.ArgumentParser(description=__doc__) - parser.add_argument("barcode_fasta", type=str, help="Pacbio Barcode FASTA file") - parser.add_argument("pacbio_reads", type=str, help="Pacbio Read FASTA.gz files") - parser.add_argument("multiplex_name", type=str, help="Pacbio Multiplex Barcode Name") + parser.add_argument("-b", "--barcode_fasta", type=str, help="Pacbio Barcode FASTA file") + + parser.add_argument("-p", "--pacbio_reads", type=str, help="Pacbio Read FASTA.gz files") + + parser.add_argument("-m", "--multiplex_name", type=str, help="Pacbio Multiplex Barcode Name") + parser.add_argument("-v", action="version", version="1.0") args = parser.parse_args() main(args.barcode_fasta, args.pacbio_reads, args.multiplex_name) diff --git a/conf/modules.config b/conf/modules.config index b950c9b6..58b8cc69 100644 --- a/conf/modules.config +++ b/conf/modules.config @@ -26,6 +26,10 @@ process { ext.args = 'nucleotide' } + withName: BLAST_MAKEBLASTDB { + ext.args = { "-dbtype nucl" } + } + withName: BLAST_BLASTN { ext.args = { "-outfmt '6 qseqid staxids bitscore std' -max_target_seqs 10 -max_hsps 1 -evalue 1e-25 -dust yes -lcase_masking" } } diff --git a/modules/local/check_barcode.nf b/modules/local/check_barcode.nf index ed9b3e59..9607208d 100644 --- a/modules/local/check_barcode.nf +++ b/modules/local/check_barcode.nf @@ -8,22 +8,23 @@ process CHECK_BARCODE { 'biocontainers/python:3.9' }" input: - tuple val(meta) , path(pacbio_dir) - tuple val(meta2) , path(barcodes) - tuple val(meta3) , path(multiplex_csv) + tuple val(meta) , path(pacbio_dir) + path barcodes + val multiplex_csv output: - stdout , emit: debarcoded + env OUTPUT , emit: result path "versions.yml" , emit: versions script: - def prefix = task.ext.prefix ?: "${meta.id}" - def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def args = task.ext.args ?: '' """ - pacbio_barcode_check.py \\ - ${barcode_fasta} \\ - ${pacbio_dir} \\ - ${multiplex_csv} + OUTPUT=\$(\\ + pacbio_barcode_check.py \\ + -b ${barcodes} \\ + -p ${pacbio_dir} \\ + -m ${multiplex_csv}) cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -34,7 +35,7 @@ process CHECK_BARCODE { stub: """ - echo "BARCODES FOUND!" + OUTPUT="BARCODES FOUND" cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/modules/local/filter_barcode.nf b/modules/local/filter_barcode.nf index 52cf4a71..7ab37ab6 100644 --- a/modules/local/filter_barcode.nf +++ b/modules/local/filter_barcode.nf @@ -2,29 +2,29 @@ process FILTER_BARCODE { tag "${meta.id}" label 'process_low' - conda "conda-forge::python=3.9" + conda "conda-forge::python=3.9 conda-forge::biopython=1.78" container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ? - 'https://depot.galaxyproject.org/singularity/python:3.9' : - 'biocontainers/python:3.9' }" + 'https://depot.galaxyproject.org/singularity/biopython:1.78' : + 'biocontainers/biopython:1.78' }" input: tuple val(meta) , path(fasta) - tuple val(meta2), path(barcodes) + tuple val(meta2), path(blast_data) + val barcodes output: - tuple val(meta), path( "*txt" ) , emit: debarcoded - path "versions.yml" , emit: versions + tuple val(meta), path( "*filtered.txt" ) , emit: debarcoded + path "versions.yml" , emit: versions script: - def prefix = task.ext.prefix ?: "${meta.id}" def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}.debarcoded" // args is for `pacbio_multiplexing_barcodes_check_${meta.barcode}.txt` + def prefix = task.ext.prefix ?: "${meta.id}" """ filter_barcode_blast_results.py \\ --input ${fasta} \\ - --barcodes ${barcodes} \\ + --barcode ${barcodes} \\ --blast ${blast_data} \\ - --output ${prefix}.txt + --output ${prefix}-${barcodes}-filtered.txt cat <<-END_VERSIONS > versions.yml "${task.process}": @@ -34,11 +34,11 @@ process FILTER_BARCODE { """ stub: - def prefix = task.ext.prefix ?: "${meta.id}" - def args = task.ext.args ?: '' - def prefix = task.ext.prefix ?: "${meta.id}.debarcoded" // args is for `pacbio_multiplexing_barcodes_check_${meta.barcode}.txt` + def args = task.ext.args ?: '' + def prefix = task.ext.prefix ?: "${meta.id}" + def barcodes = "bc1008_BAK8A_OA" """ - touch ${prefix}.txt + touch ${prefix}-${barcodes}-filtered.txt cat <<-END_VERSIONS > versions.yml "${task.process}": diff --git a/subworkflows/local/extract_nt_blast.nf b/subworkflows/local/extract_nt_blast.nf index a8102734..5ebb9bd6 100644 --- a/subworkflows/local/extract_nt_blast.nf +++ b/subworkflows/local/extract_nt_blast.nf @@ -1,6 +1,5 @@ // MODULE IMPORT BLOCK include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn/main' - include { SEQKIT_SLIDING } from '../../modules/nf-core/seqkit/sliding/main' include { BLAST_CHUNK_TO_FULL } from '../../modules/local/blast_chunk_to_full' include { REFORMAT_FULL_OUTFMT6 } from '../../modules/local/reformat_full_outfmt6' diff --git a/subworkflows/local/extract_tiara_hits.nf b/subworkflows/local/extract_tiara_hits.nf index d2eed376..e639854b 100644 --- a/subworkflows/local/extract_tiara_hits.nf +++ b/subworkflows/local/extract_tiara_hits.nf @@ -1,7 +1,5 @@ include { TIARA_TIARA } from '../../modules/nf-core/tiara/tiara/main' - - workflow EXTRACT_TIARA_HITS { take: @@ -19,4 +17,4 @@ workflow EXTRACT_TIARA_HITS { ch_tiara = TIARA_TIARA.out.classifications versions = ch_versions.ifEmpty(null) -} \ No newline at end of file +} diff --git a/subworkflows/local/generate_genome.nf b/subworkflows/local/generate_genome.nf index 4480d0b4..c251d23c 100755 --- a/subworkflows/local/generate_genome.nf +++ b/subworkflows/local/generate_genome.nf @@ -11,6 +11,7 @@ include { GET_LARGEST_SCAFF } from '../../modules/local/get_largest_scaff' workflow GENERATE_GENOME { take: to_chromsize // tuple [[meta.id], file] + barcodes main: ch_versions = Channel.empty() diff --git a/subworkflows/local/pacbio_barcode_check.nf b/subworkflows/local/pacbio_barcode_check.nf index 614dde9f..130f2242 100644 --- a/subworkflows/local/pacbio_barcode_check.nf +++ b/subworkflows/local/pacbio_barcode_check.nf @@ -1,57 +1,62 @@ -import { CHECK_BARCODE } from '../modules/local/check_barcode' -import { BLAST_MAKEBLASTDB } from '../modules/local/blast/makeblastdb' -import { BLAST_BLASTN } from '../modules/local/blast/blastn' -import { FILTER_BARCODE } from '../modules/local/filter_barcode' +// +// PACBIO_BARCODE_CHECK IDENTIFIED LOCATIONS OF BARCODE SEQUENCES IN THE INPUT ASSEMBLY +// -workflow PACBIO_BARCODE_CHECK () { +// +// MODULE IMPORT BLOCK +// +include { CHECK_BARCODE } from '../../modules/local/check_barcode' +include { BLAST_MAKEBLASTDB } from '../../modules/nf-core/blast/makeblastdb' +include { BLAST_BLASTN } from '../../modules/nf-core/blast/blastn' +include { FILTER_BARCODE } from '../../modules/local/filter_barcode' + +workflow PACBIO_BARCODE_CHECK { take: reference_tuple pacbio_tuple - barcode_file + barcodes barcode_multiplex main: ch_versions = Channel.empty() - barcodes = Channel.empty() - - if (barcode_file.isEmpty("YES") == "YES") { - Channel - .fromPath("./assets/pacbio_adaptors.fa") - .map { it -> - tuple( [id: "pacbio_barcodes"], - it - ) - } - .set { barcodes } - } else { - Channel - .fromPath(barcode_file) - .map { it -> - tuple( [id: "pacbio_barcodes"], - it - ) - } - .set { barcodes } - } - - - barcodes.view() // // MODULE: CHECK FOR KNOWN BARCODES IN SAMPLE DATA // CHECK_BARCODE ( - pacbio_tuple - barcodes, + pacbio_tuple, + barcodes.map{it[1]}, barcode_multiplex ) ch_versions = ch_versions.mix(CHECK_BARCODE.out.versions) + // + // LOGIC: INCASE THE PIPELINE MANAGES TO CONTINUE AFTER FAILING CHECK_BARCODE + // HERE WE ENSURE THE REST OF THE SUBWORKFLOW DOES NOT RUN + // + CHECK_BARCODE.out.result + .branch { + valid : it.toString().contains('BARCODES FOUND') + invalid : it.toString().contains('FAILED') + } + .set { gatekeeping } + + // + // LOGIC: ENSURE THE VALID CHANNEL IS MIXED WITH THE BARCODES CHANNEL + // ACTS AS A GATEKEEPER FOR THE FLOW + // + gatekeeping.valid + .combine( barcodes ) + .map {str, meta, file -> + file + } + .set {ch_new_barcodes} + // // MODULE: GENERATE BLAST DB ON ORGANELLAR GENOME // BLAST_MAKEBLASTDB ( - barcodes + ch_new_barcodes ) ch_versions = ch_versions.mix(BLAST_MAKEBLASTDB.out.versions) @@ -65,16 +70,33 @@ workflow PACBIO_BARCODE_CHECK () { ch_versions = ch_versions.mix(BLAST_BLASTN.out.versions) // - // LOGIC: FOR I IN CSV LIST RUN FILTER BLAST + // LOGIC: FOR I (MAPPED TO OTHER CHANNELS) IN CSV LIST RUN FILTER BLAST // - // TODO: CLAFFIFY THIS BIT + barcode_multiplex + .map { it -> + tuple( it.split(',') ) + } + .flatten() + .combine( reference_tuple ) + .combine( BLAST_BLASTN.out.txt ) + .multiMap { code, ref_meta, ref, blast_meta, blast -> + barcodes: code + reference: tuple( ref_meta, ref ) + blastdata: tuple( blast_meta, blast ) + } + .set {testing} + // + // MODULE: CREATE A FILTERED BLAST OUTPUT PER BARCODE + // FILTER_BARCODE ( - reference_tuple, - //i in csv + testing.reference, + testing.blastdata, + testing.barcodes ) ch_versions = ch_versions.mix(FILTER_BARCODE.out.versions) emit: filtered = FILTER_BARCODE.out.debarcoded + versions = ch_versions.ifEmpty(null) } diff --git a/subworkflows/local/yaml_input.nf b/subworkflows/local/yaml_input.nf index f287a8fe..be6c2659 100644 --- a/subworkflows/local/yaml_input.nf +++ b/subworkflows/local/yaml_input.nf @@ -22,7 +22,7 @@ workflow YAML_INPUT { assembly_title: ( data.assembly_title ) pacbio_reads: ( data.pacbio_reads_path ) assembly_path: ( file(data.assembly_path) ) - pacbio_barcodes: ( data.pacbio_barcodes ) + pacbio_barcodes: ( file(data.pacbio_barcodes) ) pacbio_multiplexing_barcode_names: ( data.pacbio_multiplexing_barcode_names) sci_name: ( data.sci_name ) taxid: ( data.taxid ) @@ -68,9 +68,18 @@ workflow YAML_INPUT { } .set { ch_pacbio } + group.pacbio_barcodes + .map { file -> + tuple( [ id: "pacbio barcodes" ], + file + ) + } + .set { ch_barcodes } + emit: reference_tuple = ch_reference pacbio_tuple = ch_pacbio + pacbio_barcodes = ch_barcodes pacbio_multiplex_codes = group.pacbio_multiplexing_barcode_names assembly_title = group.assembly_title taxid = group.taxid diff --git a/workflows/ascc.nf b/workflows/ascc.nf index b182ea9c..cb6aa3da 100644 --- a/workflows/ascc.nf +++ b/workflows/ascc.nf @@ -20,15 +20,6 @@ WorkflowAscc.initialise(params, log) IMPORT LOCAL MODULES/SUBWORKFLOWS ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ -<<<<<<< HEAD -include { YAML_INPUT } from '../subworkflows/local/yaml_input' -include { GENERATE_GENOME } from '../subworkflows/local/generate_genome' -include { EXTRACT_TIARA_HITS } from '../subworkflows/local/extract_tiara_hits' -include { EXTRACT_NT_BLAST } from '../subworkflows/local/extract_nt_blast' -include { RUN_FCSADAPTOR } from '../subworkflows/local/run_fcsadaptor' -include { PACBIO_BARCODE_CHECK } from '../subworkflows/local/pacbio_barcode_check' -======= ->>>>>>> dev // // SUBWORKFLOW: Consisting of a mix of local and nf-core/modules @@ -38,8 +29,9 @@ include { GENERATE_GENOME } from '../subworkflows/local/generate_g include { EXTRACT_TIARA_HITS } from '../subworkflows/local/extract_tiara_hits' include { EXTRACT_NT_BLAST } from '../subworkflows/local/extract_nt_blast' include { RUN_FCSADAPTOR } from '../subworkflows/local/run_fcsadaptor' -include { RUN_NT_KRAKEN } from '..//subworkflows/local/run_nt_kraken' +include { RUN_NT_KRAKEN } from '../subworkflows/local/run_nt_kraken' include { RUN_FCSGX } from '../subworkflows/local/run_fcsgx' +include { PACBIO_BARCODE_CHECK } from '../subworkflows/local/pacbio_barcode_check' // // MODULE: Local modules @@ -78,6 +70,9 @@ workflow ASCC { ) ch_versions = ch_versions.mix(YAML_INPUT.out.versions) + // + // MODULE: CALCULATE GC CONTENT PER SCAFFOLD IN INPUT FASTA + // GC_CONTENT ( YAML_INPUT.out.reference_tuple ) @@ -91,22 +86,23 @@ workflow ASCC { // SUBWORKFLOW: GENERATE GENOME FILE // GENERATE_GENOME ( - YAML_INPUT.out.reference_tuple + YAML_INPUT.out.reference_tuple, + YAML_INPUT.out.pacbio_barcodes ) ch_versions = ch_versions.mix(GENERATE_GENOME.out.versions) // // SUBWORKFLOW: EXTRACT RESULTS HITS FROM TIARA // -/* EXTRACT_TIARA_HITS ( + EXTRACT_TIARA_HITS ( GENERATE_GENOME.out.reference_tuple ) - ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions) */ + ch_versions = ch_versions.mix(EXTRACT_TIARA_HITS.out.versions) // // LOGIC: INJECT SLIDING WINDOW VALUES INTO REFERENCE // - /*YAML_INPUT.out.reference_tuple + YAML_INPUT.out.reference_tuple .combine ( YAML_INPUT.out.seqkit_sliding.toInteger() ) .combine ( YAML_INPUT.out.seqkit_window.toInteger() ) .map { meta, ref, sliding, window -> @@ -116,7 +112,7 @@ workflow ASCC { ], file(ref) )} - .set { modified_input }*/ + .set { modified_input } // // SUBWORKFLOW: EXTRACT RESULTS HITS FROM NT-BLAST @@ -149,7 +145,7 @@ workflow ASCC { ch_versions = ch_versions.mix(RUN_FCSADAPTOR.out.versions) // - // SUBWORKFLOW: + // SUBWORKFLOW: IDENTITY PACBIO BARCODES IN INPUT DATA // PACBIO_BARCODE_CHECK ( YAML_INPUT.out.reference_tuple, @@ -157,7 +153,7 @@ workflow ASCC { YAML_INPUT.out.pacbio_barcodes, YAML_INPUT.out.pacbio_multiplex_codes ) - ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions) + //ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions) // // SUBWORKFLOW: COLLECT SOFTWARE VERSIONS From e92f0d9f3ebd2ead9d1d0b20da1cae7db5a1e832 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Oct 2023 16:27:05 +0100 Subject: [PATCH 08/23] Adding test genome back --- assets/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/test.yaml b/assets/test.yaml index d733870d..4428f0df 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,4 +1,4 @@ -assembly_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/genomic.fna +assembly_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa assembly_title: asccTinyTest pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa pacbio_multiplexing_barcode_names: "bc1008_BAK8A_OA,bc1009_BAK8A_OA" From 8054daa983af5eedb0cde6fc06d0ca5e950f51bb Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Oct 2023 16:29:02 +0100 Subject: [PATCH 09/23] improving note --- bin/pacbio_barcode_check.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/bin/pacbio_barcode_check.py b/bin/pacbio_barcode_check.py index 17480bc8..1b120e20 100755 --- a/bin/pacbio_barcode_check.py +++ b/bin/pacbio_barcode_check.py @@ -1,7 +1,10 @@ #!/usr/bin/env python3 """ -Notes: Forces sys.exit(1) to kill pipeline +Pacbio Barcode Check +------------------------ +Looks for Pacbio barcodes in ref and data. +If supplied barcodes arn't in data then pipeline dies. Originally written by Eerik Aunin @eeaunin From e2cdff0446f5732e58e6d10587442b50fa164479 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Thu, 12 Oct 2023 16:34:08 +0100 Subject: [PATCH 10/23] Adding pacbio_barcodes file path --- assets/github_testing/test.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/assets/github_testing/test.yaml b/assets/github_testing/test.yaml index 022e9f6e..f50930d1 100755 --- a/assets/github_testing/test.yaml +++ b/assets/github_testing/test.yaml @@ -1,6 +1,7 @@ assembly_path: /home/runner/work/ascc/ascc/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa assembly_title: asccTinyTest -pacbio_multiplexing_barcode_names: "" +pacbio_barcodes: /home/runner/work/ascc/ascc/assets/pacbio_adaptors.fa +pacbio_multiplexing_barcode_names: "bc1008_BAK8A_OA,bc1009_BAK8A_OA" pacbio_reads_path: /home/runner/work/ascc/ascc/asccTinyTest/pacbio/ sci_name: "Plasmodium yoelii yoelii 17XNL" taxid: 352914 From 56379709076c62b63eda250d4ac943e18b3a5bf9 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 13:21:33 +0100 Subject: [PATCH 11/23] Updates for reviews --- modules/local/filter_barcode.nf | 3 ++- modules/local/gc_content.nf | 2 +- modules/local/get_lineage_for_kraken.nf | 1 + 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/modules/local/filter_barcode.nf b/modules/local/filter_barcode.nf index 7ab37ab6..59e0eb3e 100644 --- a/modules/local/filter_barcode.nf +++ b/modules/local/filter_barcode.nf @@ -24,11 +24,12 @@ process FILTER_BARCODE { --input ${fasta} \\ --barcode ${barcodes} \\ --blast ${blast_data} \\ - --output ${prefix}-${barcodes}-filtered.txt + --output ${prefix}_${barcodes}_filtered.txt cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') + biopython: \$(python3 -c 'import Bio; print(Bio.__version__)'') filter_barcode_blast_results: \$(filter_barcode_blast_results.py -v) END_VERSIONS """ diff --git a/modules/local/gc_content.nf b/modules/local/gc_content.nf index 76520f1f..825b0ec8 100644 --- a/modules/local/gc_content.nf +++ b/modules/local/gc_content.nf @@ -12,7 +12,7 @@ process GC_CONTENT { output: tuple val(meta), path( "*-gc.txt" ) , emit: txt - path "versions.yml" , emit: versions + path "versions.yml" , emit: versions script: def prefix = task.ext.prefix ?: "${meta.id}" diff --git a/modules/local/get_lineage_for_kraken.nf b/modules/local/get_lineage_for_kraken.nf index 6a55a5ed..362135fa 100755 --- a/modules/local/get_lineage_for_kraken.nf +++ b/modules/local/get_lineage_for_kraken.nf @@ -31,6 +31,7 @@ process GET_LINEAGE_FOR_KRAKEN { cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') + pandas: \$(python3 -c 'import pandas; print(pandas.__version__)') general_purpose_functions.py: \$(general_purpose_functions.py --version | cut -d' ' -f2) get_lineage_for_kraken_results.py: \$(get_lineage_for_kraken_results.py --version | cut -d' ' -f2) END_VERSIONS From efdd8198678423717cdf0ba943ab9d05ab8bb90d Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 13:22:00 +0100 Subject: [PATCH 12/23] input types --- subworkflows/local/pacbio_barcode_check.nf | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/subworkflows/local/pacbio_barcode_check.nf b/subworkflows/local/pacbio_barcode_check.nf index 130f2242..465fdb3e 100644 --- a/subworkflows/local/pacbio_barcode_check.nf +++ b/subworkflows/local/pacbio_barcode_check.nf @@ -12,10 +12,10 @@ include { FILTER_BARCODE } from '../../modules/local/filter_barcode' workflow PACBIO_BARCODE_CHECK { take: - reference_tuple - pacbio_tuple - barcodes - barcode_multiplex + reference_tuple // tuple [[meta.id], reference ] + pacbio_tuple // tuple [[meta.id], pacbio-files] + barcodes // tuple [[meta.id], barcode-file] + barcode_multiplex // val (csv-list-string) main: ch_versions = Channel.empty() From 8d942a3d29a03d282de893eaa93400c4841e0ce7 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 13:22:28 +0100 Subject: [PATCH 13/23] Uncomment versions mix --- workflows/ascc.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/workflows/ascc.nf b/workflows/ascc.nf index cb6aa3da..15d7b2f3 100644 --- a/workflows/ascc.nf +++ b/workflows/ascc.nf @@ -153,7 +153,7 @@ workflow ASCC { YAML_INPUT.out.pacbio_barcodes, YAML_INPUT.out.pacbio_multiplex_codes ) - //ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions) + ch_versions = ch_versions.mix(PACBIO_BARCODE_CHECK.out.versions) // // SUBWORKFLOW: COLLECT SOFTWARE VERSIONS From 71eea92e853a3c800c9a7fcf9ebbc9960e8c582a Mon Sep 17 00:00:00 2001 From: Damon-Lee Pointon <51855558+DLBPointon@users.noreply.github.com> Date: Mon, 16 Oct 2023 13:34:32 +0100 Subject: [PATCH 14/23] Update filter_barcode.nf Too many ''' --- modules/local/filter_barcode.nf | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/local/filter_barcode.nf b/modules/local/filter_barcode.nf index 59e0eb3e..8d6e52f8 100644 --- a/modules/local/filter_barcode.nf +++ b/modules/local/filter_barcode.nf @@ -29,7 +29,7 @@ process FILTER_BARCODE { cat <<-END_VERSIONS > versions.yml "${task.process}": python: \$(python --version | sed 's/Python //g') - biopython: \$(python3 -c 'import Bio; print(Bio.__version__)'') + biopython: \$(python3 -c 'import Bio; print(Bio.__version__)') filter_barcode_blast_results: \$(filter_barcode_blast_results.py -v) END_VERSIONS """ From 7e515836a8b197431247dced4d6537f2bc7ba87c Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 16:05:50 +0100 Subject: [PATCH 15/23] Reviewer corrections --- subworkflows/local/pacbio_barcode_check.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/pacbio_barcode_check.nf b/subworkflows/local/pacbio_barcode_check.nf index 465fdb3e..02b53b38 100644 --- a/subworkflows/local/pacbio_barcode_check.nf +++ b/subworkflows/local/pacbio_barcode_check.nf @@ -36,8 +36,8 @@ workflow PACBIO_BARCODE_CHECK { // CHECK_BARCODE.out.result .branch { - valid : it.toString().contains('BARCODES FOUND') - invalid : it.toString().contains('FAILED') + valid : it.toString().contains('barcodes') + invalid : !it.toString().contains('barcodes') } .set { gatekeeping } From 38493731b1861f4a1569a2eb94a5e5c501987949 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 16:07:01 +0100 Subject: [PATCH 16/23] Reviewer corrections --- modules/local/filter_barcode.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/local/filter_barcode.nf b/modules/local/filter_barcode.nf index 59e0eb3e..2849052e 100644 --- a/modules/local/filter_barcode.nf +++ b/modules/local/filter_barcode.nf @@ -37,9 +37,9 @@ process FILTER_BARCODE { stub: def args = task.ext.args ?: '' def prefix = task.ext.prefix ?: "${meta.id}" - def barcodes = "bc1008_BAK8A_OA" + def barcodes = "bc1008" """ - touch ${prefix}-${barcodes}-filtered.txt + touch ${prefix}_${barcodes}_filtered.txt cat <<-END_VERSIONS > versions.yml "${task.process}": From 8c23023d1b66a178c86d6df613e65a3176acb159 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 16:07:39 +0100 Subject: [PATCH 17/23] Reviewer corrections --- bin/pacbio_barcode_check.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/bin/pacbio_barcode_check.py b/bin/pacbio_barcode_check.py index 1b120e20..44f8e584 100755 --- a/bin/pacbio_barcode_check.py +++ b/bin/pacbio_barcode_check.py @@ -4,7 +4,11 @@ Pacbio Barcode Check ------------------------ Looks for Pacbio barcodes in ref and data. -If supplied barcodes arn't in data then pipeline dies. + +If any User-supplied barcodes arn't in data then pipeline dies. +If no barcodes are found then pipeline carries on + +Based on a standard operating procedure developed by James Torrance Originally written by Eerik Aunin @eeaunin @@ -53,7 +57,7 @@ def check_if_barcodes_exist_in_barcodes_fasta(barcodes_list, barcodes_fasta_path sys.exit(1) # If this print statement is reached, all user-supplied codes are present. - print("BARCODES FOUND\n") + print("The query barcodes exist in the barcodes database file") def main(barcodes_fasta_path, pacbio_read_files, pacbio_multiplexing_barcode_names): @@ -72,10 +76,12 @@ def main(barcodes_fasta_path, pacbio_read_files, pacbio_multiplexing_barcode_nam if len(barcodes_list) == 0: barcodes_list = detect_barcodes_from_read_file_names(barcodes_fasta_path, pacbio_read_files) + # Here script should break successfully if len(barcodes_list) == 0: sys.stderr.write( "Skipping the PacBio barcodes check, as no barcodes were specified by the user and no barcodes were found in PacBio read file names\n" ) + print("Skipping the PacBio barcodes check, as no barcodes were specified by the user and no barcodes were found in PacBio read file names\n") sys.exit(0) check_if_barcodes_exist_in_barcodes_fasta(barcodes_list, barcodes_fasta_path) From 4e4862f82278d4902ee0bd5cc7cf1de866f95af7 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 16:09:07 +0100 Subject: [PATCH 18/23] Updating adaptors --- assets/pacbio_adaptors.fa | 224 +++++++++++++++++++++++++++++++++++--- 1 file changed, 208 insertions(+), 16 deletions(-) diff --git a/assets/pacbio_adaptors.fa b/assets/pacbio_adaptors.fa index 49ffc3f0..34826e8c 100644 --- a/assets/pacbio_adaptors.fa +++ b/assets/pacbio_adaptors.fa @@ -1,32 +1,224 @@ ->bc1001_BAK8A_OA +>bc1001 CACATATCAGAGTGCGT ->bc1002_BAK8A_OA +>bc1002 ACACACAGACTGTGAGT ->bc1003_BAK8A_OA +>bc1003 ACACATCTCGTGAGAGT ->bc1008_BAK8A_OA +>bc1008 ACAGTCGAGCGCTGCGT ->bc1009_BAK8A_OA +>bc1009 ACACACGCGAGACAGAT ->bc1010_BAK8A_OA +>bc1010 ACGCGCTATCTCAGAGT ->bc1011_BAK8A_OA +>bc1011 CTATACGTATATCTATT ->bc1012_BAK8A_OA +>bc1012 ACACTAGATCGCGTGTT ->bc1015_BAK8B_OA +>bc1015 CGCATGACACGTGTGTT ->bc1016_BAK8B_OA +>bc1016 CATAGAGAGATAGTATT ->bc1017_BAK8B_OA +>bc1017 CACACGCGCGCTATATT ->bc1018_BAK8B_OA +>bc1018 TCACGTGCTCACTGTGT ->bc1019_BAK8B_OA +>bc1019 ACACACTCTATCAGATT ->bc1020_BAK8B_OA +>bc1020 CACGACACGACGATGTT ->bc1021_BAK8B_OA +>bc1021 CTATACATAGTGATGTT ->bc1022_BAK8B_OA +>bc1022 CACTCACGTGTGATATT +>bc2001 +ATCGTGCGACGAGTAT +>bc2002 +TGCATGTCATGAGTAT +>bc2003 +ACGAGTGCTCGAGTAT +>bc2004 +TGCAGTGCTCGAGTAT +>bc2005 +TGACTCGATCGAGTAT +>bc2006 +CATGCGATCTGAGTAT +>bc2007 +ACTAGCATCTGAGTAT +>bc2008 +ACGCTAGTCTGAGTAT +>bc2009 +CGATCGCACTGAGTAT +>bc2010 +TACGTAGTATGAGTAT +>bc2011 +CTGACAGTACGAGTAT +>bc2012 +TCGTACTACTGAGTAT +>bc2013 +CTGCGTAGACGAGTAT +>bc2014 +ATACATGCACGAGTAT +>bc2015 +CGACATAGATGAGTAT +>bc2016 +ATCTGCACGTGAGTAT +>bc2017 +CTATGATAGCGAGTAT +>bc2018 +CGATCAGTGCGAGTAT +>bc2019 +CGTCATAGTCGAGTAT +>bc2020 +ACTATGCGTCGAGTAT +>bc2021 +CGTACATGCTGAGTAT +>bc2022 +TCATCGACGTGAGTAT +>bc2023 +TCGCATGACTGAGTAT +>bc2024 +CATGATCGACGAGTAT +>bc2025 +ACGCACGTACGAGTAT +>bc2026 +CAGTAGCGTCGAGTAT +>bc2027 +TGACTGTAGCGAGTAT +>bc2028 +ACTGCAGCACGAGTAT +>bc2029 +TAGCAGTATCGAGTAT +>bc2030 +CATACAGCATGAGTAT +>bc2031 +ATAGCGTACTGAGTAT +>bc2032 +ATAGACGAGTGAGTAT +>bc2033 +CGACTCGTATGAGTAT +>bc2034 +TACTAGTGACGAGTAT +>bc2035 +CAGCTGACATGAGTAT +>bc2036 +ACGTCGCTGCGAGTAT +>bc2037 +CAGTATGAGCGAGTAT +>bc2038 +TCACGACGACGAGTAT +>bc2039 +CATGTATGTCGAGTAT +>bc2040 +TGCTGCGACTGAGTAT +>bc2041 +TATGATCACTGAGTAT +>bc2042 +TCTGCACTGCGAGTAT +>bc2043 +ACGATGACGTGAGTAT +>bc2044 +CGATGATGCTGAGTAT +>bc2045 +TACGACAGTCGAGTAT +>bc2046 +ATAGCATGTCGAGTAT +>bc2047 +CATAGTACTCGAGTAT +>bc2048 +TGATGCTAGTGAGTAT +>bc2049 +TAGTCTGCGTGAGTAT +>bc2050 +CTCATCTATCGAGTAT +>bc2051 +TGCATACTGCGAGTAT +>bc2052 +CAGACTAGTCGAGTAT +>bc2053 +ATCGTGATCTGAGTAT +>bc2054 +CTGCGATCACGAGTAT +>bc2055 +CTCAGCATACGAGTAT +>bc2056 +TCGCAGCGTCGAGTAT +>bc2057 +TAGCACGCATGAGTAT +>bc2058 +TACTGACGCTGAGTAT +>bc2059 +ATCTGACTATGAGTAT +>bc2060 +ATACGAGCTCGAGTAT +>bc2061 +CGAGCACGCTGAGTAT +>bc2062 +TCTGCGTATCGAGTAT +>bc2063 +TCTGCATCATGAGTAT +>bc2064 +TGCGTGATGCGAGTAT +>bc2065 +TGAGCTATGCGAGTAT +>bc2066 +CTGTCGTAGTGAGTAT +>bc2067 +ATCGATGCATGAGTAT +>bc2068 +ACTACGTGATGAGTAT +>bc2069 +TCTATGACATGAGTAT +>bc2070 +TACTGCTCACGAGTAT +>bc2071 +CGAGTCTAGCGAGTAT +>bc2072 +TATCAGTAGTGAGTAT +>bc2073 +ATCACTAGTCGAGTAT +>bc2074 +TATCACGACTGAGTAT +>bc2075 +CTCGTCAGATGAGTAT +>bc2076 +CAGCAGTGACGAGTAT +>bc2077 +TGCGACGTGCGAGTAT +>bc2078 +CTCACTGAGTGAGTAT +>bc2079 +CACTGAGCGTGAGTAT +>bc2080 +CAGCGTCTACGAGTAT +>bc2081 +CTACTATGTCGAGTAT +>bc2082 +ATGTACAGACGAGTAT +>bc2083 +ACTCATCAGTGAGTAT +>bc2084 +CTGAGCACTCGAGTAT +>bc2085 +ATCATCTACTGAGTAT +>bc2086 +TACATGCGATGAGTAT +>bc2087 +TCGCTGTCACGAGTAT +>bc2088 +ACGCTCATGCGAGTAT +>bc2089 +TACTAGCAGCGAGTAT +>bc2090 +CGTAGCAGATGAGTAT +>bc2091 +CGTGCTCGTCGAGTAT +>bc2092 +ACAGCTGTACGAGTAT +>bc2093 +TCGATGCTACGAGTAT +>bc2094 +TAGATACAGCGAGTAT +>bc2095 +CTACTCATACGAGTAT +>bc2096 +ATGTACTAGTGAGTAT From 1e5b9acf3b6b0ac1e0ca6bd4411d61d8796a355a Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 16:11:15 +0100 Subject: [PATCH 19/23] black linting --- bin/pacbio_barcode_check.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/bin/pacbio_barcode_check.py b/bin/pacbio_barcode_check.py index 44f8e584..5103d5bf 100755 --- a/bin/pacbio_barcode_check.py +++ b/bin/pacbio_barcode_check.py @@ -81,7 +81,9 @@ def main(barcodes_fasta_path, pacbio_read_files, pacbio_multiplexing_barcode_nam sys.stderr.write( "Skipping the PacBio barcodes check, as no barcodes were specified by the user and no barcodes were found in PacBio read file names\n" ) - print("Skipping the PacBio barcodes check, as no barcodes were specified by the user and no barcodes were found in PacBio read file names\n") + print( + "Skipping the PacBio barcodes check, as no barcodes were specified by the user and no barcodes were found in PacBio read file names\n" + ) sys.exit(0) check_if_barcodes_exist_in_barcodes_fasta(barcodes_list, barcodes_fasta_path) From 83151127127ee4a06c933b61cd7262852935ea46 Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 16:14:25 +0100 Subject: [PATCH 20/23] Updating arguments in test.yaml --- assets/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/test.yaml b/assets/test.yaml index 4428f0df..a53d6324 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,7 +1,7 @@ assembly_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa assembly_title: asccTinyTest pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa -pacbio_multiplexing_barcode_names: "bc1008_BAK8A_OA,bc1009_BAK8A_OA" +pacbio_multiplexing_barcode_names: "bc1008,bc1009" pacbio_reads_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/pacbio/ sci_name: "Plasmodium yoelii yoelii 17XNL" taxid: 352914 From cd2e32ac428af492cf9d8132e54a9fd4120ce1fe Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 16:20:28 +0100 Subject: [PATCH 21/23] Updating test.yaml --- assets/github_testing/test.yaml | 2 +- assets/test.yaml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/assets/github_testing/test.yaml b/assets/github_testing/test.yaml index f50930d1..79130625 100755 --- a/assets/github_testing/test.yaml +++ b/assets/github_testing/test.yaml @@ -1,7 +1,7 @@ assembly_path: /home/runner/work/ascc/ascc/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa assembly_title: asccTinyTest pacbio_barcodes: /home/runner/work/ascc/ascc/assets/pacbio_adaptors.fa -pacbio_multiplexing_barcode_names: "bc1008_BAK8A_OA,bc1009_BAK8A_OA" +pacbio_multiplexing_barcode_names: "bc1008,bc1009" pacbio_reads_path: /home/runner/work/ascc/ascc/asccTinyTest/pacbio/ sci_name: "Plasmodium yoelii yoelii 17XNL" taxid: 352914 diff --git a/assets/test.yaml b/assets/test.yaml index a53d6324..8cf9dd29 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,4 +1,4 @@ -assembly_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa +assembly_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/genomic.fna assembly_title: asccTinyTest pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa pacbio_multiplexing_barcode_names: "bc1008,bc1009" From 0c538e63487eaaf0dce6bc78dbe47b5305cf6bcb Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 16:20:43 +0100 Subject: [PATCH 22/23] Updating test.yaml --- assets/test.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/assets/test.yaml b/assets/test.yaml index 8cf9dd29..a53d6324 100755 --- a/assets/test.yaml +++ b/assets/test.yaml @@ -1,4 +1,4 @@ -assembly_path: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/genomic.fna +assembly_path: /lustre/scratch123/tol/resources/treeval/treeval-testdata/asccTinyTest/assembly/Pyoeliiyoelii17XNL_assembly.fa assembly_title: asccTinyTest pacbio_barcodes: /nfs/treeoflife-01/teams/tola/users/dp24/ascc/assets/pacbio_adaptors.fa pacbio_multiplexing_barcode_names: "bc1008,bc1009" From b35f73214fe9896db68a3a92f364e06d2e664bba Mon Sep 17 00:00:00 2001 From: DLBPointon Date: Mon, 16 Oct 2023 17:03:19 +0100 Subject: [PATCH 23/23] Removing references to organellar blast --- subworkflows/local/pacbio_barcode_check.nf | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/subworkflows/local/pacbio_barcode_check.nf b/subworkflows/local/pacbio_barcode_check.nf index 02b53b38..953f68d9 100644 --- a/subworkflows/local/pacbio_barcode_check.nf +++ b/subworkflows/local/pacbio_barcode_check.nf @@ -53,7 +53,7 @@ workflow PACBIO_BARCODE_CHECK { .set {ch_new_barcodes} // - // MODULE: GENERATE BLAST DB ON ORGANELLAR GENOME + // MODULE: GENERATE BLAST DB ON PACBIO BARCODES // BLAST_MAKEBLASTDB ( ch_new_barcodes @@ -61,7 +61,7 @@ workflow PACBIO_BARCODE_CHECK { ch_versions = ch_versions.mix(BLAST_MAKEBLASTDB.out.versions) // - // MODULE: RUN BLAST WITH GENOME AGAINST ORGANELLAR GENOME + // MODULE: RUN BLAST WITH GENOME AGAINST BARCODE DB // BLAST_BLASTN ( reference_tuple,