From a4feae6ecef9f2bd9d5caa4b9fe2437cd484eaad Mon Sep 17 00:00:00 2001 From: fraser-combe Date: Tue, 29 Oct 2024 15:02:41 -0500 Subject: [PATCH] update basecall to scatter process and reduce input params --- tasks/basecalling/task_dorado_basecall.wdl | 56 +++++++------------ workflows/utilities/wf_dorado_basecalling.wdl | 22 ++++---- 2 files changed, 31 insertions(+), 47 deletions(-) diff --git a/tasks/basecalling/task_dorado_basecall.wdl b/tasks/basecalling/task_dorado_basecall.wdl index 3afe09e2b..14b8547c2 100644 --- a/tasks/basecalling/task_dorado_basecall.wdl +++ b/tasks/basecalling/task_dorado_basecall.wdl @@ -2,10 +2,8 @@ version 1.0 task basecall { input { - Array[File] input_files - String? dorado_model # Optional: Manual model input - Boolean use_auto_model = true # Use automatic model selection if true - String model_accuracy = "sup" # Default to 'sup' (most accurate model) + File input_file # Single POD5 file for scatter processing + String dorado_model = "sup" # Default model to 'sup', can be overridden with full model name see docs String kit_name # Sequencing kit name String docker = "us-docker.pkg.dev/general-theiagen/staphb/dorado:0.8.0" } @@ -17,34 +15,24 @@ task basecall { sam_output="output/sam/" mkdir -p "$sam_output" - echo "### Starting basecalling ###" - - # Determine which model to use (auto or manual) - model_to_use=$(if [[ ~{use_auto_model} == "true" ]]; then echo ~{model_accuracy}; else echo ~{dorado_model}; fi) - - # Loop through input files and basecall - for file in ~{sep=" " input_files}; do - base_name=$(basename "$file" .pod5) - sam_file="$sam_output/${base_name}.sam" - - echo "Processing $file, output: $sam_file" - - # Run Dorado basecaller - if dorado basecaller \ - "$model_to_use" \ - "$file" \ - --kit-name ~{kit_name} \ - --emit-sam \ - --no-trim \ - --output-dir "$sam_output" \ - --verbose; then - echo "Basecalling completed successfully for $file. SAM file: $sam_file" - else - echo "ERROR: Dorado basecaller failed for $file. Moving on to the next file." - fi - done - - echo "Basecalling steps completed." + echo "### Starting basecalling for ~{input_file} ###" + + base_name=$(basename "~{input_file}" .pod5) + sam_file="$sam_output/${base_name}.sam" + + echo "Processing ~{input_file}, output: $sam_file" + + # Run Dorado basecaller + dorado basecaller \ + "~{dorado_model}" \ + "~{input_file}" \ + --kit-name ~{kit_name} \ + --emit-sam \ + --no-trim \ + --output-dir "$sam_output" \ + --verbose || { echo "ERROR: Dorado basecaller failed for ~{input_file}"; exit 1; } + + echo "Basecalling completed for ~{input_file}. SAM file: $sam_file" >>> output { @@ -56,7 +44,5 @@ task basecall { cpu: 8 memory: "32GB" gpuCount: 1 - gpuType: "nvidia-tesla-t4" - maxRetries: 3 - } + gpuType: "nvidia-tesla-t4" } } diff --git a/workflows/utilities/wf_dorado_basecalling.wdl b/workflows/utilities/wf_dorado_basecalling.wdl index 64e3fec84..fde60de0a 100644 --- a/workflows/utilities/wf_dorado_basecalling.wdl +++ b/workflows/utilities/wf_dorado_basecalling.wdl @@ -13,13 +13,11 @@ workflow dorado_basecalling_workflow { input { Array[File] input_files - String? dorado_model + String dorado_model = "sup" # Default to sup model, user can override with a full model name String kit_name - Boolean use_auto_model = true - String model_accuracy = "sup" String new_table_name String fastq_upload_path - Boolean paired_end = false + Boolean paired_end = false Boolean assembly_data = false String? file_ending String terra_project @@ -27,18 +25,18 @@ workflow dorado_basecalling_workflow { String fastq_file_name } - call basecall_task.basecall as basecall_step { - input: - input_files = input_files, - use_auto_model = use_auto_model, - model_accuracy = model_accuracy, - dorado_model = dorado_model, - kit_name = kit_name + scatter (file in input_files) { + call basecall_task.basecall as basecall_step { + input: + input_file = file, + dorado_model = dorado_model, + kit_name = kit_name + } } call samtools_convert_task.samtools_convert { input: - sam_files = basecall_step.sam_files + sam_files = flatten(basecall_step.sam_files) } call dorado_demux_task.dorado_demux {