Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Module/gatk rnaseq/1.0 #184

Open
wants to merge 18 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions demo/genome_Snakefile.smk
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@ configfile: "../modules/sequenza/1.4/config/default.yaml"
configfile: "../modules/bwa_mem/1.1/config/default.yaml"
configfile: "../modules/controlfreec/1.2/config/default.yaml"
configfile: "../modules/slms_3/1.0/config/default.yaml"
configfile: "../modules/ichorcna/1.0/config/default.yaml"
configfile: "../modules/gatk_rnaseq/1.0/config/default.yaml"
configfile: "../modules/gridss/1.1/config/default.yaml"
configfile: "../modules/liftover/1.2/config/default.yaml"
configfile: "../modules/battenberg/1.2/config/default.yaml"
Expand Down Expand Up @@ -66,6 +68,13 @@ include: "../modules/bwa_mem/1.1/bwa_mem.smk"
include: "../modules/controlfreec/1.2/controlfreec.smk"
include: "../modules/slms_3/1.0/slms_3.smk"
include: "../modules/gridss/1.1/gridss.smk"
include: "../modules/bam2fastq/1.2/bam2fastq.smk"
include: "../modules/controlfreec/1.1/controlfreec.smk"
include: "../modules/lofreq/1.0/lofreq.smk"
include: "../modules/starfish/2.0/starfish.smk"
include: "../modules/sage/1.0/sage.smk"
include: "../modules/ichorcna/1.0/ichorcna.smk"
include: "../modules/gatk_rnaseq/1.0/gatk_rnaseq.smk"
include: "../modules/liftover/1.2/liftover.smk"
include: "../modules/battenberg/1.2/battenberg.smk"
include: "../modules/pathseq/1.0/pathseq.smk"
Expand All @@ -82,6 +91,9 @@ rule all:
rules._bwa_mem_all.input,
rules._controlfreec_all.input,
rules._slms_3_all.input,
rules._ichorcna_all.input,
rules._gatk_rnaseq_all.input

rules._gridss_all.input,
rules._liftover_all.input,
rules._battenberg_all.input,
Expand Down
146 changes: 146 additions & 0 deletions envs/cnvkit/cnvkit-0.9.9.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
name: null
channels:
- conda-forge
- bioconda
- defaults
dependencies:
- _libgcc_mutex=0.1
- _openmp_mutex=4.5
- _r-mutex=1.0.1
- binutils_impl_linux-64=2.36.1
- binutils_linux-64=2.36
- bioconductor-dnacopy=1.68.0
- biopython=1.79
- brotli=1.0.9
- brotli-bin=1.0.9
- bwidget=1.9.14
- bzip2=1.0.8
- c-ares=1.18.1
- ca-certificates=2021.10.8
- cairo=1.16.0
- certifi=2021.10.8
- cnvkit=0.9.9
- curl=7.81.0
- cycler=0.11.0
- font-ttf-dejavu-sans-mono=2.37
- font-ttf-inconsolata=3.000
- font-ttf-source-code-pro=2.038
- font-ttf-ubuntu=0.83
- fontconfig=2.13.94
- fonts-conda-ecosystem=1
- fonts-conda-forge=1
- fonttools=4.29.1
- freetype=2.10.4
- fribidi=1.0.10
- gcc_impl_linux-64=9.4.0
- gcc_linux-64=9.4.0
- gettext=0.19.8.1
- gfortran_impl_linux-64=9.4.0
- gfortran_linux-64=9.4.0
- graphite2=1.3.13
- gsl=2.7
- gxx_impl_linux-64=9.4.0
- gxx_linux-64=9.4.0
- harfbuzz=3.3.1
- icu=69.1
- jbig=2.1
- joblib=0.17.0
- jpeg=9e
- kernel-headers_linux-64=2.6.32
- kiwisolver=1.3.2
- krb5=1.19.2
- lcms2=2.12
- ld_impl_linux-64=2.36.1
- lerc=2.2.1
- libblas=3.9.0
- libbrotlicommon=1.0.9
- libbrotlidec=1.0.9
- libbrotlienc=1.0.9
- libcblas=3.9.0
- libcurl=7.81.0
- libdeflate=1.7
- libedit=3.1.20191231
- libev=4.33
- libffi=3.4.2
- libgcc-devel_linux-64=9.4.0
- libgcc-ng=11.2.0
- libgfortran-ng=11.2.0
- libgfortran5=11.2.0
- libglib=2.70.2
- libgomp=11.2.0
- libiconv=1.16
- liblapack=3.9.0
- libnghttp2=1.46.0
- libnsl=2.0.0
- libopenblas=0.3.18
- libpng=1.6.37
- libsanitizer=9.4.0
- libssh2=1.10.0
- libstdcxx-devel_linux-64=9.4.0
- libstdcxx-ng=11.2.0
- libtiff=4.3.0
- libuuid=2.32.1
- libwebp-base=1.2.2
- libxcb=1.13
- libxml2=2.9.12
- libzlib=1.2.11
- lz4-c=1.9.3
- make=4.3
- matplotlib-base=3.5.1
- munkres=1.1.4
- ncurses=6.3
- networkx=2.6.3
- numpy=1.22.1
- olefile=0.46
- openjpeg=2.4.0
- openssl=1.1.1l
- packaging=21.3
- pandas=1.4.0
- pango=1.48.10
- pcre=8.45
- pcre2=10.37
- pillow=8.4.0
- pip=22.0.2
- pixman=0.40.0
- pomegranate=0.13.3
- pthread-stubs=0.4
- pyfaidx=0.6.4
- pyparsing=3.0.7
- pysam=0.17.0
- python=3.9.10
- python-dateutil=2.8.2
- python_abi=3.9
- pytz=2021.3
- pyyaml=6.0
- r-base=4.1.2
- r-cghflasso=0.2_1
- readline=8.1
- reportlab=3.5.68
- scipy=1.7.3
- sed=4.8
- setuptools=60.6.0
- six=1.16.0
- sqlite=3.37.0
- sysroot_linux-64=2.12
- tk=8.6.11
- tktable=2.10
- tzdata=2021e
- unicodedata2=14.0.0
- wheel=0.37.1
- xorg-kbproto=1.0.7
- xorg-libice=1.0.10
- xorg-libsm=1.2.3
- xorg-libx11=1.7.2
- xorg-libxau=1.0.9
- xorg-libxdmcp=1.1.3
- xorg-libxext=1.3.4
- xorg-libxrender=0.9.10
- xorg-libxt=1.2.1
- xorg-renderproto=0.11.1
- xorg-xextproto=7.3.0
- xorg-xproto=7.0.31
- xz=5.2.5
- yaml=0.2.5
- zlib=1.2.11
- zstd=1.5.2
prefix: /projects/rmorin/projects/tumour_contam/envs/cnvkit
84 changes: 84 additions & 0 deletions modules/gatk_rnaseq/1.0/config/default.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
lcr-modules:

gatk_rnaseq:

inputs:
# Available wildcards: {seq_type} {genome_build} {sample_id}
sample_bam: "__UPDATE__"
sample_bai: "__UPDATE__"


scratch_subdirectories: []

options:
java_opts: "-XX:ConcGCThreads=1"
gatk_splitntrim: " -fixNDN TRUE -RF GoodCigarReadFilter "
gatk_addRG:
platform: "illumina"
unit: "unit1"
stringency: "LENIENT"
gatk_baserecalibrator: ""
gatk_applybqsr: ""
gatk_variant_calling:
min_conf_thres: 20.0
gatk_opts: ""
gatk_variant_filtration:
window: 35 # window size between SNPs in cluster
cluster_size: 3 # at least 3 SNPs in cluster
# hard filtering (filters OUT) based on metrics:
# FS (FisherStrand): Phred-scale probability that there is a strand bias from a Fisher's test. (default FS > 30.0)
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thanks for detailed documentation!

# QD (QualByDepth): variant confidence/unfiltered depth (normalizes variant quality to avoid inflation from deep coverage) (default QD < 2.0)
# DP (depth): minimum depth (default DP < 5.0)
filter_expression: "-filter-expression \"FS > 30.0\" -filter-name FS -filter-expression \"QD < 2.0\" -filter-name QD -filter-expression \"DP < 5.0\" -filter-name DP"
gatk_rnaseq_filter_passed:
params: "-f '.,PASS' "
# Can be modified to filter on additional criteria using bcftools view syntax
# For example, to remove all variants with -log10(POPAF) > 4.0:
#"-f '.,PASS' -i 'INFO/POPAF > 4'"


conda_envs:
picard: "{MODSDIR}/envs/picard-2.22.3.yaml"
gatk_rnaseq: "{MODSDIR}/envs/gatk-4.1.8.1.yaml"
bcftools: "{MODSDIR}/envs/bcftools-1.10.2.yaml"

threads:
gatk_splitntrim: 12
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It is possible to combine these keys and reuse them across rules. In other words, if several rules need the same number of threads, they can refer to the same key in config. Same can be applied to resources as well. I think this reduces the number of keys to specify/adjust if needed, and reduces complexity of the config. What do you think?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It's possible, but wouldn't it also cause confusion if there's ever a need to change the numbers? Also, would there be a unique name that could be applied to a subset of the rules ("thread_12" would be non-descriptive and wouldn't indicate which rules would use this parameter)?

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

You are right, let's keep the more detailed and informative names

gatk_addRG: 12
gatk_base_recalibration: 12
gatk_applybqsr: 12
gatk_variant_calling: 24
gatk_variant_filtration: 24
merge_vcfs: 10
gatk_rnaseq_passed: 10
gnomad_filter: 4

resources:
gatk_splitntrim:
mem_mb: 48000
bam: 1
gatk_addRG:
mem_mb: 12000
gatk_base_recalibration:
mem_mb: 12000
gatk_applybqsr:
mem_mb: 12000
gatk_variant_calling:
mem_mb: 12000
gatk_variant_filtration:
mem_mb: 12000
merge_vcfs:
mem_mb: 10000
gatk_rnaseq_passed:
mem_mb: 10000
gnomad_filter:
mem_mb: 2000

pairing_config:
mrna:
run_paired_tumours: False
run_unpaired_tumours_with: "no_normal"
run_paired_tumours_as_unpaired: True



1 change: 1 addition & 0 deletions modules/gatk_rnaseq/1.0/envs/bcftools-1.10.2.yaml
1 change: 1 addition & 0 deletions modules/gatk_rnaseq/1.0/envs/gatk-4.1.8.1.yaml
1 change: 1 addition & 0 deletions modules/gatk_rnaseq/1.0/envs/picard-2.22.3.yaml
Loading