Skip to content

Commit

Permalink
Merge pull request #55 from sanger-tol/dp24_btk_datasets_ea10_edits2
Browse files Browse the repository at this point in the history
Edits to the dp24_btk_datasets branch
  • Loading branch information
DLBPointon authored Aug 7, 2024
2 parents 6d6b6fa + 7e7370b commit 748b3ca
Show file tree
Hide file tree
Showing 7 changed files with 40 additions and 30 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -139,4 +139,4 @@ jobs:
# For example: adding multiple test runs with different parameters
# Remember that you can parallelise this by using strategy.matrix
run: |
nextflow run ./sanger-ascc/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test,singularity --outdir ./results --include ALL --exclude busco_btk
nextflow run ./sanger-ascc/${{ steps.branch-names.outputs.current_branch }}/main.nf -profile test,singularity --outdir ./results --include ALL --exclude btk_busco
6 changes: 3 additions & 3 deletions bin/ascc_merge_tables.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
Script for merging contaminant check results into one table
Version: {VERSION}
---
Written by Eerik Aunin
Written by Eerik Anuin
Re-Written by Damon-Lee Pointon (dp24/DLBPointon)
"""
Expand Down Expand Up @@ -35,7 +35,7 @@ def parse_args():
parser.add_argument("-ud", "--uniprot_diamond", type=str, help="Uniprot Diamond file")
parser.add_argument("-cv", "--contigviz", type=str, help="Contigviz file")
parser.add_argument("-btk", "--blobtoolkit", type=str, help="Blobtoolkit file")
parser.add_argument("-bb", "--busco_btk", type=str, help="Busco Blobtoolkit file")
parser.add_argument("-bb", "--btk_busco", type=str, help="Busco Blobtoolkit file")
parser.add_argument("-fg", "--fcs_gx", type=str, help="FCS_GX file")
parser.add_argument("-n", "--sample_name", type=str, help="Name for the sample")
parser.add_argument("-m", "--markerscan", type=str, help="MarkerScan file")
Expand Down Expand Up @@ -284,7 +284,7 @@ def main(args):
paths_dict["cobiontid_markerscan"] = args.markerscan
paths_dict["contigviz"] = args.contigviz
paths_dict["blobtoolkit"] = args.blobtoolkit
paths_dict["btk_busco"] = args.busco_btk
paths_dict["btk_busco"] = args.btk_busco
paths_dict["fcs_gx"] = args.fcs_gx

required_files = ["gc_content"]
Expand Down
25 changes: 18 additions & 7 deletions bin/autofilter.py
Original file line number Diff line number Diff line change
Expand Up @@ -42,9 +42,9 @@ def parse_args():
help="Path to the assembly_autofiltered.fasta file",
default="autofiltered.fasta",
)
parser.add_argument(
"-c", "--fcs_gx_and_tiara_summary", type=str, help="Path to the fcs-gx_and_tiara_combined_summary.csv file"
)
# parser.add_argument(
# "-c", "--fcs_gx_and_tiara_summary", type=str, help="Path to the fcs-gx_and_tiara_combined_summary.csv file"
# )
parser.add_argument(
"-r",
"--rejected_seq",
Expand All @@ -56,6 +56,13 @@ def parse_args():
parser.add_argument(
"-n", "--ncbi_rankedlineage_path", type=str, help="Path to the rankedlineage.dmp of NCBI taxonomy"
)
parser.add_argument(
"--tiara_action_mode",
type=str,
choices=["warn", "remove"],
default="warn",
help="Action when Tiara detects a putative contaminant that is not reported as a contaminant by FCS-GX. The choices are 'warn' (print a warning) or 'remove' (remove this sequence from the assembly). Default: warn",
)
parser.add_argument("-v", "--version", action="version", version=VERSION)
return parser.parse_args()

Expand Down Expand Up @@ -179,15 +186,15 @@ def main():
tiara_results_path = args.tiara
fcs_gx_summary_path = args.fcsgx_summary
filtered_assembly_path = args.output_auto_filtered
combined_summary = args.fcs_gx_and_tiara_summary
# combined_summary = args.fcs_gx_and_tiara_summary
excluded_seq_list_path = args.rejected_seq
ncbi_rankedlist = args.ncbi_rankedlineage_path

Path(f"./fasta/filtered").mkdir(parents=True, exist_ok=True)

for i in [ncbi_rankedlist, tiara_results_path, fcs_gx_summary_path, assembly_path]:
if not os.path.isfile(i):
sys.stderr.write(f"{i} WAS NOT AT THE EXPECTED LOCATION\n")
sys.stderr.write(f"{i} was not at the expected location\n")
sys.exit(1)

target_domain = get_domain_from_taxid(args.taxid, ncbi_rankedlist)
Expand All @@ -207,8 +214,12 @@ def main():
tiara_action = tiara_action_dict[scaff]
combined_action = fcs_gx_action
if fcs_gx_action == "NA" and tiara_action == "EXCLUDE":
combined_action = "EXCLUDE"
combined_action_source = "Tiara"
if args.tiara_action_mode == "remove":
combined_action = "EXCLUDE"
combined_action_source = "Tiara"
elif args.tiara_action_mode == "warn":
combined_action = "WARN"
combined_action_source = "Tiara"
if fcs_gx_action == "EXCLUDE" and tiara_action == "EXCLUDE":
combined_action_source = "FCS-GX_and_Tiara"
if combined_action == "EXCLUDE":
Expand Down
6 changes: 3 additions & 3 deletions modules/local/ascc_merge_tables.nf
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,10 @@ process ASCC_MERGE_TABLES {
tag "$meta.id"
label 'process_low'

conda "conda-forge::python=3.9"
conda "conda-forge::python=3.9 conda-forge::pandas=1.5.2"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/python:3.9' :
'biocontainers/python:3.9' }"
'https://depot.galaxyproject.org/singularity/pandas:1.5.2' :
'quay.io/biocontainers/pandas:1.5.2' }"

input:
tuple val(meta), path(gc_content, stageAs: "GC.txt")
Expand Down
4 changes: 2 additions & 2 deletions modules/local/merge_btk_datasets.nf
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ process MERGE_BTK_DATASETS {

input:
tuple val(meta), path(create_btk_datasets)
tuple val(meta2), path(busco_btk_datasets)
tuple val(meta2), path(btk_busco_datasets)

output:
tuple val(meta), path("merged_datasets"), emit: merged_datasets
Expand All @@ -29,7 +29,7 @@ process MERGE_BTK_DATASETS {
merge_btk_datasets.py \\
-m $create_btk_datasets \\
-o ./merged_datasets \\
-b $busco_btk_datasets \\
-b $btk_busco_datasets \\
$args
cat <<-END_VERSIONS > versions.yml
Expand Down
6 changes: 2 additions & 4 deletions modules/local/sanger_tol_btk.nf
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ process SANGER_TOL_BTK {
def profiles = task.ext.profiles ?: ""
def get_version = task.ext.version_data ?: "UNKNOWN - SETTING NOT SET"
def btk_config = btk_config_file ? "-c $btk_config_file" : ""
def pipeline_version = task.ext.version ?: "main"
def pipeline_version = task.ext.version ?: "draft_assemblies"
// YAML used to avoid the use of GCA accession number
// https://github.com/sanger-tol/blobtoolkit/issues/77

Expand All @@ -42,8 +42,6 @@ process SANGER_TOL_BTK {

// outdir should be an arg

// --accession draft \\

// blastx and blastp use the same database hence the StageAs


Expand Down Expand Up @@ -76,7 +74,7 @@ process SANGER_TOL_BTK {

stub:
def prefix = task.ext.prefix ?: "${meta.id}"
def pipeline_version = task.ext.version ?: "main"
def pipeline_version = task.ext.version ?: "draft_assemblies"

"""
mkdir -p ${prefix}_btk_out/blobtoolkit/$gca_accession
Expand Down
21 changes: 11 additions & 10 deletions workflows/ascc.nf
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,8 @@ workflow ASCC {
include_workflow_steps = params.include ? params.include.split(",") : ""
exclude_workflow_steps = params.exclude ? params.exclude.split(",") : ""

btk_busco_run_mode = params.btk_busco_run_mode ? params.btk_busco_run_mode : "conditional"

full_list = ["kmers", "tiara", "coverage", "nt_blast", "nr_diamond", "uniprot_diamond", "kraken", "fcs-gx", "fcs-adaptor", "vecscreen", "btk_busco", "pacbio_barcodes", "organellar_blast", "autofilter_assembly", "ALL", ""]

if (!full_list.containsAll(include_workflow_steps) && !full_list.containsAll(exclude_workflow_steps)) {
Expand Down Expand Up @@ -290,7 +292,7 @@ workflow ASCC {
//
// SUBWORKFLOW: IDENTITY PACBIO BARCODES IN INPUT DATA
//
if ( include_workflow_steps.contains('barcodes') || include_workflow_steps.contains('ALL') ) {
if ( include_workflow_steps.contains('pacbio_barcodes') || include_workflow_steps.contains('ALL') ) {
PACBIO_BARCODE_CHECK (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.pacbio_tuple,
Expand All @@ -315,7 +317,7 @@ workflow ASCC {
//
// SUBWORKFLOW: CALCULATE AVERAGE READ COVERAGE
//
if ( include_workflow_steps.contains('coverage') || include_workflow_steps.contains('busco_btk') || include_workflow_steps.contains('ALL') ) {
if ( include_workflow_steps.contains('coverage') || include_workflow_steps.contains('btk_busco') || include_workflow_steps.contains('ALL') ) {
RUN_READ_COVERAGE (
YAML_INPUT.out.reference_tuple,
YAML_INPUT.out.assembly_path,
Expand Down Expand Up @@ -372,12 +374,12 @@ workflow ASCC {
modified_input,
YAML_INPUT.out.diamond_nr_database_path
)
nt_full = NUCLEOT_DIAMOND.out.reformed.map{it[1]}
nt_hits = NUCLEOT_DIAMOND.out.hits_file.map{it[1]}
nr_full = NUCLEOT_DIAMOND.out.reformed.map{it[1]}
nr_hits = NUCLEOT_DIAMOND.out.hits_file.map{it[1]}
ch_versions = ch_versions.mix(NUCLEOT_DIAMOND.out.versions)
} else {
nt_hits = []
nt_full = []
nr_hits = []
nr_full = []
}

//
Expand Down Expand Up @@ -411,7 +413,7 @@ workflow ASCC {
ch_kraken1,
ch_kraken2,
ch_kraken3,
nt_full,
nr_full,
un_full,
YAML_INPUT.out.ncbi_taxonomy_path.first()
)
Expand Down Expand Up @@ -449,8 +451,7 @@ workflow ASCC {
// WE ARE USING THE PIPELINE HERE AS A MODULE THIS REQUIRES IT
// TO BE USED AS A AN INTERACTIVE JOB ON WHAT EVER EXECUTOR YOU ARE USING.
// This will also eventually check for the above run_btk boolean from autofilter
if ( !exclude_workflow_steps.contains("busco_btk") && include_workflow_steps.contains('busco_btk') && include_workflow_steps.contains("autofilter") && btk_bool.run_btk == "ABNORMAL" || !exclude_workflow_steps.contains("busco_btk") && include_workflow_steps.contains('ALL') ) {

if ( !exclude_workflow_steps.contains("btk_busco") && include_workflow_steps.contains('btk_busco') && btk_busco_run_mode == "conditional" && include_workflow_steps.contains("autofilter_assembly") && btk_bool.run_btk == "ABNORMAL" || !exclude_workflow_steps.contains("btk_busco") && include_workflow_steps.contains('ALL') || btk_busco_run_mode == "mandatory" && !exclude_workflow_steps.contains('btk_busco') && include_workflow_steps.contains('btk_busco') ) {
YAML_INPUT.out.reference_tuple
.combine(ch_bam)
.map{ meta, ref, bam ->
Expand Down Expand Up @@ -505,7 +506,7 @@ workflow ASCC {
ch_kraken3, // FROM -- RUN_NT_KRAKEN.out.lineage.map{it[1]}
ch_nt_blast, // FROM -- EXTRACT_NT_BLAST.out.ch_blast_hits.map{it[1]}
ch_kmers, // FROM -- GET_KMERS_PROFILE.out.combined_csv
nt_hits, // FROM -- NUCLEOT_DIAMOND.out.reformed.map{it[1]}
nr_hits, // FROM -- NUCLEOT_DIAMOND.out.reformed.map{it[1]}
un_hits, // FROM -- UNIPROT_DIAMOND.out.reformed.map{it[1]}
[], // <-- MARKER SCAN -- NOT IN PIPELINE YET
[], // <-- CONTIGVIZ -- NOT IN PIPELINE YET
Expand Down

0 comments on commit 748b3ca

Please sign in to comment.