Skip to content

Commit

Permalink
Lk pd 2836 marmoset (#1480)
Browse files Browse the repository at this point in the history
Updated BuildIndices to include docker with Marmoset scripts for modifying custom marmoset reference
  • Loading branch information
ekiernan authored Jan 17, 2025
1 parent cde33b7 commit 256b324
Show file tree
Hide file tree
Showing 3 changed files with 55 additions and 20 deletions.
2 changes: 1 addition & 1 deletion pipeline_versions.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ BroadInternalRNAWithUMIs 1.0.36 2024-11-04
RNAWithUMIsPipeline 1.0.18 2024-11-04
Multiome 5.9.5 2025-01-13
MultiSampleSmartSeq2SingleNucleus 2.0.7 2025-01-13
BuildIndices 3.1.0 2024-11-26
BuildIndices 4.0.0 2025-01-17
SlideSeq 3.4.8 2025-01-13
PairedTag 1.9.1 2025-01-13
atac 2.5.4 2025-01-13
Expand Down
6 changes: 6 additions & 0 deletions pipelines/skylab/build_indices/BuildIndices.changelog.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,9 @@
# 4.0.0
2025-01-17 (Date of Last Commit)

* Updated the WDL to include a new docker version 2.1.0 which has new python scripts for handling a custom marmoset GTF input
* Updated the WDL to run new marmoset scripts if the organism input is set to marmoset

# 3.1.0
2024-11-26 (Date of Last Commit)

Expand Down
67 changes: 48 additions & 19 deletions pipelines/skylab/build_indices/BuildIndices.wdl
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ workflow BuildIndices {
}

# version of this pipeline
String pipeline_version = "3.1.0"
String pipeline_version = "4.0.0"


parameter_meta {
Expand Down Expand Up @@ -114,29 +114,58 @@ task BuildStarSingleNucleus {
String annotation_gtf_modified = "modified_v~{gtf_annotation_version}.annotation.gtf"

command <<<
# Check that input GTF files contain input genome source, genome build version, and annotation version
if head -10 ~{annotation_gtf} | grep -qi ~{genome_build}
# First check for marmoset GTF and modify header
echo "checking for marmoset"
if [[ "~{organism}" == "marmoset" || "~{organism}" == "Marmoset" ]]
then
echo Genome version found in the GTF file
echo "marmoset is detected, running header modification"
python3 /script/create_marmoset_header_mt_genes.py \
~{annotation_gtf} > "/cromwell_root/header.gtf"
else
echo Error: Input genome version does not match version in GTF file
exit 1;
echo "marmoset is not detected"

# Check that input GTF files contain input genome source, genome build version, and annotation version
if head -10 ~{annotation_gtf} | grep -qi ~{genome_build}
then
echo Genome version found in the GTF file
else
echo Error: Input genome version does not match version in GTF file
exit 1;
fi

# Check that GTF file contains correct build source info in the first 10 lines of the GTF
if head -10 ~{annotation_gtf} | grep -qi ~{genome_source}
then
echo Source of genome build identified in the GTF file
else
echo Error: Source of genome build not identified in the GTF file
exit 1;
fi
set -eo pipefail
fi
# Check that GTF file contains correct build source info in the first 10 lines of the GTF
if head -10 ~{annotation_gtf} | grep -qi ~{genome_source}

if [[ "~{organism}" == "marmoset" || "~{organism}" == "Marmoset" ]]
then
echo Source of genome build identified in the GTF file
echo "marmoset detected, running marmoset GTF modification"
echo "Listing files to check for head.gtf"
ls
python3 /script/modify_gtf_marmoset.py \
--input-gtf "/cromwell_root/header.gtf" \
--output-gtf ~{annotation_gtf_modified} \
--species ~{organism}
echo "listing files, should see modified gtf"
ls
else
echo Error: Source of genome build not identified in the GTF file
exit 1;
echo "running GTF modification for non-marmoset"
python3 /script/modify_gtf.py \
--input-gtf ~{annotation_gtf} \
--output-gtf ~{annotation_gtf_modified} \
--biotypes ~{biotypes}
fi

set -eo pipefail

python3 /script/modify_gtf.py \
--input-gtf ~{annotation_gtf} \
--output-gtf ~{annotation_gtf_modified} \
--biotypes ~{biotypes}
# python3 /script/modify_gtf.py \
# --input-gtf ~{annotation_gtf} \
# --output-gtf ~{annotation_gtf_modified} \
# --biotypes ~{biotypes}

mkdir star
STAR --runMode genomeGenerate \
Expand All @@ -156,7 +185,7 @@ task BuildStarSingleNucleus {
}

runtime {
docker: "us.gcr.io/broad-gotc-prod/build-indices:2.0.0"
docker: "us.gcr.io/broad-gotc-prod/build-indices:2.1.0"
memory: "50 GiB"
disks: "local-disk ${disk} HDD"
disk: disk + " GB" # TES
Expand Down

0 comments on commit 256b324

Please sign in to comment.