Skip to content

Commit

Permalink
update basecall to scatter process and reduce input params
Browse files Browse the repository at this point in the history
  • Loading branch information
fraser-combe committed Oct 29, 2024
1 parent cc8770e commit a4feae6
Show file tree
Hide file tree
Showing 2 changed files with 31 additions and 47 deletions.
56 changes: 21 additions & 35 deletions tasks/basecalling/task_dorado_basecall.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,8 @@ version 1.0

task basecall {
input {
Array[File] input_files
String? dorado_model # Optional: Manual model input
Boolean use_auto_model = true # Use automatic model selection if true
String model_accuracy = "sup" # Default to 'sup' (most accurate model)
File input_file # Single POD5 file for scatter processing
String dorado_model = "sup" # Default model to 'sup', can be overridden with full model name see docs
String kit_name # Sequencing kit name
String docker = "us-docker.pkg.dev/general-theiagen/staphb/dorado:0.8.0"
}
Expand All @@ -17,34 +15,24 @@ task basecall {
sam_output="output/sam/"
mkdir -p "$sam_output"

echo "### Starting basecalling ###"

# Determine which model to use (auto or manual)
model_to_use=$(if [[ ~{use_auto_model} == "true" ]]; then echo ~{model_accuracy}; else echo ~{dorado_model}; fi)

# Loop through input files and basecall
for file in ~{sep=" " input_files}; do
base_name=$(basename "$file" .pod5)
sam_file="$sam_output/${base_name}.sam"

echo "Processing $file, output: $sam_file"

# Run Dorado basecaller
if dorado basecaller \
"$model_to_use" \
"$file" \
--kit-name ~{kit_name} \
--emit-sam \
--no-trim \
--output-dir "$sam_output" \
--verbose; then
echo "Basecalling completed successfully for $file. SAM file: $sam_file"
else
echo "ERROR: Dorado basecaller failed for $file. Moving on to the next file."
fi
done

echo "Basecalling steps completed."
echo "### Starting basecalling for ~{input_file} ###"

base_name=$(basename "~{input_file}" .pod5)
sam_file="$sam_output/${base_name}.sam"

echo "Processing ~{input_file}, output: $sam_file"

# Run Dorado basecaller
dorado basecaller \
"~{dorado_model}" \
"~{input_file}" \
--kit-name ~{kit_name} \
--emit-sam \
--no-trim \
--output-dir "$sam_output" \
--verbose || { echo "ERROR: Dorado basecaller failed for ~{input_file}"; exit 1; }

echo "Basecalling completed for ~{input_file}. SAM file: $sam_file"
>>>

output {
Expand All @@ -56,7 +44,5 @@ task basecall {
cpu: 8
memory: "32GB"
gpuCount: 1
gpuType: "nvidia-tesla-t4"
maxRetries: 3
}
gpuType: "nvidia-tesla-t4" }
}
22 changes: 10 additions & 12 deletions workflows/utilities/wf_dorado_basecalling.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -13,32 +13,30 @@ workflow dorado_basecalling_workflow {

input {
Array[File] input_files
String? dorado_model
String dorado_model = "sup" # Default to sup model, user can override with a full model name
String kit_name
Boolean use_auto_model = true
String model_accuracy = "sup"
String new_table_name
String fastq_upload_path
Boolean paired_end = false
Boolean paired_end = false
Boolean assembly_data = false
String? file_ending
String terra_project
String terra_workspace
String fastq_file_name
}

call basecall_task.basecall as basecall_step {
input:
input_files = input_files,
use_auto_model = use_auto_model,
model_accuracy = model_accuracy,
dorado_model = dorado_model,
kit_name = kit_name
scatter (file in input_files) {
call basecall_task.basecall as basecall_step {
input:
input_file = file,
dorado_model = dorado_model,
kit_name = kit_name
}
}

call samtools_convert_task.samtools_convert {
input:
sam_files = basecall_step.sam_files
sam_files = flatten(basecall_step.sam_files)
}

call dorado_demux_task.dorado_demux {
Expand Down

0 comments on commit a4feae6

Please sign in to comment.