Skip to content

Commit

Permalink
Run one star instance
Browse files Browse the repository at this point in the history
  • Loading branch information
aawdeh committed Jan 16, 2025
1 parent cde33b7 commit 84fb1f8
Show file tree
Hide file tree
Showing 2 changed files with 34 additions and 57 deletions.
69 changes: 24 additions & 45 deletions pipelines/skylab/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -166,23 +166,10 @@ workflow Optimus {
ubuntu_docker_path = ubuntu_docker_prefix + ubuntu_docker
}

call FastqProcessing.FastqProcessing as SplitFastq {
input:
i1_fastq = i1_fastq,
r1_fastq = r1_fastq,
r2_fastq = r2_fastq,
whitelist = whitelist,
chemistry = tenx_chemistry_version,
sample_id = input_id,
read_struct = read_struct,
warp_tools_docker_path = docker_prefix + warp_tools_docker
}

scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) {
call StarAlign.STARsoloFastq as STARsoloFastq {
call StarAlign.STARsoloFastq as STARsoloFastq {
input:
r1_fastq = [SplitFastq.fastq_R1_output_array[idx]],
r2_fastq = [SplitFastq.fastq_R2_output_array[idx]],
r1_fastq = r1_fastq,
r2_fastq = r2_fastq,
star_strand_mode = star_strand_mode,
white_list = whitelist,
tar_star_reference = tar_star_reference,
Expand All @@ -195,16 +182,9 @@ workflow Optimus {
is_slidetags = is_slidetags
}
}
call Merge.MergeSortBamFiles as MergeBam {
input:
bam_inputs = STARsoloFastq.bam_output,
output_bam_filename = output_bam_basename + ".bam",
sort_order = "coordinate",
picard_cloud_docker_path = docker_prefix + picard_cloud_docker
}
call Metrics.CalculateGeneMetrics as GeneMetrics {
input:
bam_input = MergeBam.output_bam,
bam_input = STARsoloFastq.output_bam,
mt_genes = mt_genes,
original_gtf = annotations_gtf,
input_id = input_id,
Expand All @@ -213,7 +193,7 @@ workflow Optimus {

call Metrics.CalculateCellMetrics as CellMetrics {
input:
bam_input = MergeBam.output_bam,
bam_input = STARsoloFastq.output_bam,
mt_genes = mt_genes,
original_gtf = annotations_gtf,
input_id = input_id,
Expand All @@ -222,13 +202,13 @@ workflow Optimus {

call StarAlign.MergeStarOutput as MergeStarOutputs {
input:
barcodes = STARsoloFastq.barcodes,
features = STARsoloFastq.features,
matrix = STARsoloFastq.matrix,
cell_reads = STARsoloFastq.cell_reads,
summary = STARsoloFastq.summary,
align_features = STARsoloFastq.align_features,
umipercell = STARsoloFastq.umipercell,
barcodes = [STARsoloFastq.barcodes],
features = [STARsoloFastq.features],
matrix = [STARsoloFastq.matrix],
cell_reads = [STARsoloFastq.cell_reads],
summary = [STARsoloFastq.summary],
align_features = [STARsoloFastq.align_features],
umipercell = [STARsoloFastq.umipercell],
input_id = input_id,
counting_mode = counting_mode,
star_merge_docker_path = docker_prefix + star_merge_docker,
Expand Down Expand Up @@ -272,15 +252,15 @@ workflow Optimus {
if (count_exons && counting_mode=="sn_rna") {
call StarAlign.MergeStarOutput as MergeStarOutputsExons {
input:
barcodes = STARsoloFastq.barcodes_sn_rna,
features = STARsoloFastq.features_sn_rna,
matrix = STARsoloFastq.matrix_sn_rna,
cell_reads = STARsoloFastq.cell_reads_sn_rna,
barcodes = [STARsoloFastq.barcodes_sn_rna],
features = [STARsoloFastq.features_sn_rna],
matrix = [STARsoloFastq.matrix_sn_rna],
cell_reads = ]STARsoloFastq.cell_reads_sn_rna],
input_id = input_id,
counting_mode = "sc_rna",
summary = STARsoloFastq.summary_sn_rna,
align_features = STARsoloFastq.align_features_sn_rna,
umipercell = STARsoloFastq.umipercell_sn_rna,
summary = [STARsoloFastq.summary_sn_rna],
align_features = [STARsoloFastq.align_features_sn_rna],
umipercell = [STARsoloFastq.umipercell_sn_rna],
star_merge_docker_path = docker_prefix + star_merge_docker,
gex_nhash_id = gex_nhash_id
}
Expand Down Expand Up @@ -351,7 +331,7 @@ workflow Optimus {
# version of this pipeline
String pipeline_version_out = pipeline_version
File genomic_reference_version = ReferenceCheck.genomic_ref_version
File bam = MergeBam.output_bam
File bam = STARsoloFastq.output_bam
File matrix = MergeStarOutputs.sparse_counts
File matrix_row_index = MergeStarOutputs.row_index
File matrix_col_index = MergeStarOutputs.col_index
Expand All @@ -363,12 +343,11 @@ workflow Optimus {
File? mtx_files = MergeStarOutputs.mtx_files
File? filtered_mtx_files = MergeStarOutputs.filtered_mtx_files

Array[File?] multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix
Array[File?] multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix
Array[File?] multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix
Array[File?] multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix
File? multimappers_EM_matrix = STARsoloFastq.multimappers_EM_matrix
File? multimappers_Uniform_matrix = STARsoloFastq.multimappers_Uniform_matrix
File? multimappers_Rescue_matrix = STARsoloFastq.multimappers_Rescue_matrix
File? multimappers_PropUnique_matrix = STARsoloFastq.multimappers_PropUnique_matrix


# h5ad
File h5ad_output_file = final_h5ad_output

Expand Down
22 changes: 10 additions & 12 deletions tasks/skylab/StarAlign.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -227,18 +227,19 @@ task STARsoloFastq {

# runtime values
String samtools_star_docker_path
Int machine_mem_mb = 64000
Int cpu = 8
# by default request non preemptible machine to make sure the slow star alignment step completes
Int preemptible = 3

# if slide_tags true set disk to 1000 otherwise dynamic allocation based on input size
# dynamic allocation multiplies input size by 2.2 to account for output bam file + 20% overhead, add size of reference.
Boolean is_slidetags
Int disk = if is_slidetags then 1000 else
ceil(size(tar_star_reference, "Gi") * 3) +
ceil(size(r1_fastq, "Gi") * 20) +
ceil(size(r2_fastq, "Gi") * 20)

# runtime values
String cpu_platform = "Intel Ice Lake"
Int machine_mem_mb = 512000
Int mem_size = 512
Int cpu = 128
Int disk = 2000
# by default request non preemptible machine to make sure the slow star alignment step completes
Int preemptible = 1
}

meta {
Expand Down Expand Up @@ -340,9 +341,7 @@ task STARsoloFastq {
# validate the bam with samtools quickcheck
samtools quickcheck -v Aligned.sortedByCoord.out.bam


echo "UMI LEN " $UMILen

touch barcodes_sn_rna.tsv
touch features_sn_rna.tsv
touch matrix_sn_rna.mtx
Expand All @@ -351,7 +350,6 @@ task STARsoloFastq {
touch Summary_sn_rna.csv
touch UMIperCellSorted_sn_rna.txt


if [[ "~{counting_mode}" == "sc_rna" ]]
then
SoloDirectory="Solo.out/Gene/raw"
Expand Down Expand Up @@ -425,7 +423,7 @@ task STARsoloFastq {

runtime {
docker: samtools_star_docker_path
memory: "~{machine_mem_mb} MiB"
memory: "~{mem_size} GiB"
disks: "local-disk ~{disk} HDD"
disk: disk + " GB" # TES
cpu: cpu
Expand Down

0 comments on commit 84fb1f8

Please sign in to comment.