Skip to content

Commit

Permalink
Merge branch 'develop' into aa-remove-mergeandfastqprocess
Browse files Browse the repository at this point in the history
  • Loading branch information
aawdeh authored Jan 16, 2025
2 parents 80d4506 + cde33b7 commit dec4923
Show file tree
Hide file tree
Showing 12 changed files with 53 additions and 30 deletions.
12 changes: 6 additions & 6 deletions pipeline_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -27,14 +27,14 @@ BroadInternalImputation 1.1.14 2024-11-04
BroadInternalArrays 1.1.14 2024-11-04
BroadInternalRNAWithUMIs 1.0.36 2024-11-04
RNAWithUMIsPipeline 1.0.18 2024-11-04
Multiome 5.9.5 2024-12-12
MultiSampleSmartSeq2SingleNucleus 2.0.7 2024-12-12
Multiome 5.9.5 2025-01-13
MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13
BuildIndices 3.1.0 2024-11-26
SlideSeq 3.4.8 2024-12-12
PairedTag 1.9.1 2024-12-12
atac 2.5.3 2024-11-22
SlideSeq 3.4.8 2025-01-13
PairedTag 1.9.1 2025-01-13
atac 2.5.4 2025-01-13
scATAC 1.3.2 2023-08-03
snm3C 4.0.4 2024-08-06
Optimus 7.9.1 2024-12-12
Optimus 7.9.1 2025-01-13
MultiSampleSmartSeq2 2.2.22 2024-09-11
SmartSeq2SingleSample 5.1.21 2024-09-11
5 changes: 5 additions & 0 deletions pipelines/skylab/atac/atac.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,8 @@
# 2.5.4
2025-01-13 (Date of Last Commit)

* Added reference_gtf_file to the output h5ad unstructured metadata

# 2.5.3
2024-11-22 (Date of Last Commit)

Expand Down
13 changes: 10 additions & 3 deletions pipelines/skylab/atac/atac.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -49,15 +49,15 @@ workflow ATAC {
String adapter_seq_read3 = "TCGTCGGCAGCGTCAGATGTGTATAAGAGACAG"
}

String pipeline_version = "2.5.3"
String pipeline_version = "2.5.4"

# Determine docker prefix based on cloud provider
String gcr_docker_prefix = "us.gcr.io/broad-gotc-prod/"
String acr_docker_prefix = "dsppipelinedev.azurecr.io/"
String docker_prefix = if cloud_provider == "gcp" then gcr_docker_prefix else acr_docker_prefix

# Docker image names
String warp_tools_2_2_0 = "warp-tools:2.5.0"
String warp_tools_docker = "warp-tools:2.6.0"
String cutadapt_docker = "cutadapt:1.0.0-4.4-1686752919"
String samtools_docker = "samtools-dist-bwa:3.0.0"
String upstools_docker = "upstools:1.0.0-2023.03.03-1704300311"
Expand Down Expand Up @@ -99,7 +99,7 @@ workflow ATAC {
output_base_name = input_id,
num_output_files = GetNumSplits.ranks_per_node_out,
whitelist = whitelist,
docker_path = docker_prefix + warp_tools_2_2_0
docker_path = docker_prefix + warp_tools_docker
}

scatter(idx in range(length(SplitFastq.fastq_R1_output_array))) {
Expand Down Expand Up @@ -521,6 +521,7 @@ task CreateFragmentFile {
String atac_nhash_id = ""
String input_id
Int atac_expected_cells = 3000
String gtf_path = annotations_gtf
}

parameter_meta {
Expand Down Expand Up @@ -601,6 +602,12 @@ task CreateFragmentFile {
atac_data = ad.read_h5ad("temp_metrics.h5ad")
# Add nhash_id to h5ad file as unstructured metadata
atac_data.uns['NHashID'] = atac_nhash_id
# Add GTF to uns field
# Original path from args.annotation_file
gtf_path = "~{gtf_path}" # e.g., 'gs://gcp-public-data--broad-references/hg38/v0/star/v2_7_10a/modified_v43.annotation.gtf'
atac_data.uns["reference_gtf_file"] = gtf_path
# calculate tsse metrics
snap.metrics.tsse(atac_data, atac_gtf)
# Write new atac file
Expand Down
3 changes: 2 additions & 1 deletion pipelines/skylab/multiome/Multiome.changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# 5.9.5
2024-12-12 (Date of Last Commit)
2025-01-13 (Date of Last Commit)

* Added a boolean variable is_slidetags; default is false but it is set to true if the Slide-Tags pipeline is calling Optimus
* Added reference_gtf_file to the output h5ad unstructured metadata

# 5.9.4
2024-12-05 (Date of Last Commit)
Expand Down
4 changes: 3 additions & 1 deletion pipelines/skylab/optimus/Optimus.changelog.md
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
# 7.9.1
2024-12-12 (Date of Last Commit)
2025-01-13 (Date of Last Commit)

* Added a boolean variable is_slidetags; set to false by default, but set to true if the Slide-Tags pipeline is calling Optimus

* Added reference_gtf_file to the output h5ad unstructured metadata

# 7.9.0
2024-12-05 (Date of Last Commit)

Expand Down
10 changes: 5 additions & 5 deletions pipelines/skylab/optimus/Optimus.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,7 @@ workflow Optimus {
String pytools_docker = "pytools:1.0.0-1661263730"
String empty_drops_docker = "empty-drops:1.0.1-4.2"
String star_docker = "star:1.0.1-2.7.11a-1692706072"
String warp_tools_docker_2_2_0 = "warp-tools:2.5.0"
String warp_tools_docker = "warp-tools:2.6.0"
String star_merge_docker = "star-merge-npz:1.3.0"
String samtools_star = "samtools-star:1.0.0-1.11-2.7.11a-1731516196"

Expand Down Expand Up @@ -188,7 +188,7 @@ workflow Optimus {
mt_genes = mt_genes,
original_gtf = annotations_gtf,
input_id = input_id,
warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0
warp_tools_docker_path = docker_prefix + warp_tools_docker
}

call Metrics.CalculateCellMetrics as CellMetrics {
Expand All @@ -197,7 +197,7 @@ workflow Optimus {
mt_genes = mt_genes,
original_gtf = annotations_gtf,
input_id = input_id,
warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0
warp_tools_docker_path = docker_prefix + warp_tools_docker
}

call StarAlign.MergeStarOutput as MergeStarOutputs {
Expand Down Expand Up @@ -246,7 +246,7 @@ workflow Optimus {
empty_drops_result = RunEmptyDrops.empty_drops_result,
counting_mode = counting_mode,
pipeline_version = "Optimus_v~{pipeline_version}",
warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0
warp_tools_docker_path = docker_prefix + warp_tools_docker
}
}
if (count_exons && counting_mode=="sn_rna") {
Expand Down Expand Up @@ -285,7 +285,7 @@ workflow Optimus {
cell_id_exon = MergeStarOutputsExons.row_index,
gene_id_exon = MergeStarOutputsExons.col_index,
pipeline_version = "Optimus_v~{pipeline_version}",
warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0
warp_tools_docker_path = docker_prefix + warp_tools_docker
}
}

Expand Down
5 changes: 3 additions & 2 deletions pipelines/skylab/paired_tag/PairedTag.changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# 1.9.1
2024-12-12 (Date of Last Commit)
2025-01-13 (Date of Last Commit)

* Added a boolean variable is_slidetags. Set to true if Slide-Tags pipeline calling Optimus, otherwise false.
* Added a boolean variable is_slidetags; default is false, but set to true if Slide-Tags pipeline is calling Optimus
* Added reference_gtf_file to the output h5ad unstructured metadata

# 1.9.0
2024-12-05 (Date of Last Commit)
Expand Down
3 changes: 2 additions & 1 deletion pipelines/skylab/slideseq/SlideSeq.changelog.md
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# 3.4.8
2024-12-12 (Date of Last Commit)
2025-01-13 (Date of Last Commit)

* Added a boolean variable is_slidetags; this does not affect the outputs of the pipeline
* Added reference_gtf_file to the output h5ad unstructured metadata

# 3.4.7
2024-12-3 (Date of Last Commit)
Expand Down
10 changes: 5 additions & 5 deletions pipelines/skylab/slideseq/SlideSeq.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -48,7 +48,7 @@ workflow SlideSeq {
# docker images
String pytools_docker = "pytools:1.0.0-1661263730"
String picard_cloud_docker = "picard-cloud:2.26.10"
String warp_tools_docker_2_2_0 = "warp-tools:2.5.0"
String warp_tools_docker = "warp-tools:2.6.0"
String star_merge_docker = "star-merge-npz:1.3.0"

String ubuntu_docker = "ubuntu_16_0_4@sha256:025124e2f1cf4d29149958f17270596bffe13fc6acca6252977c572dd5ba01bf"
Expand Down Expand Up @@ -124,7 +124,7 @@ workflow SlideSeq {
bam_input = MergeBam.output_bam,
original_gtf = annotations_gtf,
input_id = input_id,
warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0
warp_tools_docker_path = docker_prefix + warp_tools_docker
}
call Metrics.CalculateUMIsMetrics as UMIsMetrics {
input:
Expand All @@ -138,7 +138,7 @@ workflow SlideSeq {
bam_input = MergeBam.output_bam,
original_gtf = annotations_gtf,
input_id = input_id,
warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0
warp_tools_docker_path = docker_prefix + warp_tools_docker

}

Expand All @@ -162,7 +162,7 @@ workflow SlideSeq {
gene_id = MergeStarOutputs.col_index,
add_emptydrops_data = "no",
pipeline_version = "SlideSeq_v~{pipeline_version}",
warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0
warp_tools_docker_path = docker_prefix + warp_tools_docker

}
}
Expand All @@ -188,7 +188,7 @@ workflow SlideSeq {
cell_id_exon = MergeStarOutputsExons.row_index,
gene_id_exon = MergeStarOutputsExons.col_index,
pipeline_version = "SlideSeq_v~{pipeline_version}",
warp_tools_docker_path = docker_prefix + warp_tools_docker_2_2_0
warp_tools_docker_path = docker_prefix + warp_tools_docker
}
}

Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
# 2.0.7
2024-12-12 (Date of Last Commit)
2025-01-13 (Date of Last Commit)

* Added a boolean variable is_slidetags; this does not affect the outputs of the pipeline
* Added reference_gtf_file to the output h5ad unstructured metadata

# 2.0.6
2024-11-15 (Date of Last Commit)
Expand Down
2 changes: 1 addition & 1 deletion tasks/skylab/FastqProcessing.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ task FastqProcessingSlidSeq {
# Runtime attributes
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.5.0"
String docker = "us.gcr.io/broad-gotc-prod/warp-tools:2.6.0"
Int cpu = 16
Int machine_mb = 40000
Int disk = ceil(size(r1_fastq, "GiB")*3 + size(r2_fastq, "GiB")*3) + 50
Expand Down
13 changes: 9 additions & 4 deletions tasks/skylab/H5adUtils.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ task OptimusH5adGeneration {
File? empty_drops_result
String counting_mode = "sc_rna"
String add_emptydrops_data = "yes"
String gtf_path = annotation_file


String pipeline_version
Expand All @@ -55,7 +56,7 @@ task OptimusH5adGeneration {
command <<<
set -euo pipefail

touch empty_drops_result.csv
touch empty_drops_result.csvs

if [ "~{counting_mode}" == "sc_rna" ]; then
python3 /warptools/scripts/create_h5ad_optimus.py \
Expand All @@ -73,7 +74,8 @@ task OptimusH5adGeneration {
~{"--input_name_metadata_field " + input_name_metadata_field} \
--count_matrix ~{sparse_count_matrix} \
--expression_data_type "exonic" \
--pipeline_version ~{pipeline_version}
--pipeline_version ~{pipeline_version} \
--gtf_path ~{gtf_path}
else
python3 /warptools/scripts/create_snrna_optimus_full_h5ad.py \
--annotation_file ~{annotation_file} \
Expand All @@ -88,7 +90,8 @@ task OptimusH5adGeneration {
~{"--input_name_metadata_field " + input_name_metadata_field} \
--count_matrix ~{sparse_count_matrix} \
--expression_data_type "whole_transcript"\
--pipeline_version ~{pipeline_version}
--pipeline_version ~{pipeline_version} \
--gtf_path ~{gtf_path}
fi

# modify h5ad to include doublets, NHASHID, and build library metrics
Expand Down Expand Up @@ -158,6 +161,7 @@ task SingleNucleusOptimusH5adOutput {
File? library_metrics
# Cell calls from starsolo in TSV format
File? cellbarcodes
String gtf_path = annotation_file

String pipeline_version

Expand Down Expand Up @@ -194,7 +198,8 @@ task SingleNucleusOptimusH5adOutput {
~{"--input_id_metadata_field " + input_id_metadata_field} \
~{"--input_name_metadata_field " + input_name_metadata_field} \
--expression_data_type "whole_transcript" \
--pipeline_version ~{pipeline_version}
--pipeline_version ~{pipeline_version} \
--gtf_path ~{gtf_path}

# modify h5ad to include doublets, NHASHID, and build library metrics
python3 /warptools/scripts/add_library_tso_doublets.py \
Expand Down

0 comments on commit dec4923

Please sign in to comment.