Skip to content

Commit

Permalink
update to decoy fasta
Browse files Browse the repository at this point in the history
  • Loading branch information
anoronh4 committed Oct 10, 2024
1 parent 95e737e commit 3d97ab8
Show file tree
Hide file tree
Showing 5 changed files with 48 additions and 4 deletions.
4 changes: 2 additions & 2 deletions conf/igenomes.config
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,8 @@ params {
}
'GRCh38' {
ensembl_version = 111
//fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
fasta = "https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
fasta = "${params.igenomes_base}/Homo_sapiens/NCBI/GRCh38Decoy/Sequence/WholeGenomeFasta/genome.fa"
//fasta = "https://ftp.ensembl.org/pub/release-111/fasta/homo_sapiens/dna/Homo_sapiens.GRCh38.dna.primary_assembly.fa.gz"
gtf = "https://ftp.ensembl.org/pub/release-111/gtf/homo_sapiens/Homo_sapiens.GRCh38.111.gtf.gz"
//forte will generate refflat from gtf
refflat = null
Expand Down
5 changes: 5 additions & 0 deletions modules/local/fastaremoveprefix/environment.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
channels:
- conda-forge
- bioconda
dependencies:
- conda-forge::gawk=5.3.0
32 changes: 32 additions & 0 deletions modules/local/fastaremoveprefix/main.nf
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
process FASTAREMOVEPREFIX {
tag "$fasta"
label 'process_single'

conda "${moduleDir}/environment.yml"
container "${ workflow.containerEngine == 'singularity' && !task.ext.singularity_pull_docker_container ?
'https://depot.galaxyproject.org/singularity/gawk:5.3.0' :
'biocontainers/gawk:5.3.0' }"

when:
task.ext.when == null || task.ext.when

input:
tuple val(meta), path(fasta, name: 'input/*')

output:
tuple val(meta), path("*.{fa,fasta}"), emit: fasta
path "versions.yml" , emit: versions

script:
def modified_fasta = fasta.fileName.name
"""
cat ${fasta} | sed "s/^>chr/>/g" > ${modified_fasta}
cat <<-END_VERSIONS > versions.yml
"${task.process}":
gawk: \$(awk -Wversion | sed '1!d; s/.*Awk //; s/,.*//')
END_VERSIONS
"""


}
5 changes: 3 additions & 2 deletions modules/local/prepare_rrna/main.nf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ process PREPARE_RRNA {
path "rna.bed", emit: rRNA_bed

script:
def extra_filter_chr = params.genome == "GRCh38" ? "|^GL000220|^KI270733" : ""
if (gtf) {
"""
(${"${gtf}".endsWith(".gz") ? "z" : ""}grep "rRNA" ${gtf} || true) | \\
Expand All @@ -23,7 +24,7 @@ process PREPARE_RRNA {
/transcript_id "([^"]+)"/ or die "no transcript_id on \$.";
print join "\t", (@F[0,1,2,3], \$1)
' | \\
(grep -vP "^HG|^HSCHR" || true) | \\
(grep -vP "^HG|^HSCHR${extra_filter_chr}" || true) | \\
sort -k1V -k2n -k3n \\
> rna.bed
Expand All @@ -32,7 +33,7 @@ process PREPARE_RRNA {
"""
(${"${refflat}".endsWith(".gz") ? "z" : ""}grep -P "^RNA5|^RNA1|^RNA2" ${refflat} || true) | \\
awk -F"\\t" -v OFS="\\t" '{ print \$3,\$5,\$6,\$4,\$2 }' | \\
(grep -vP "^HG|^HSCHR" || true) | \\
(grep -vP "^HG|^HSCHR${extra_filter_chr}" || true) | \\
sort -k1V -k2n -k3n \\
> rna.bed
"""
Expand Down
6 changes: 6 additions & 0 deletions subworkflows/local/prepare_references.nf
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@ include { AGFUSION_DOWNLOAD } from '../../modules/local/agfusion/do
include { AGAT_SPADDINTRONS } from '../../modules/nf-core/agat/spaddintrons/main'
include { METAFUSION_GENEBED } from '../../modules/local/metafusion/genebed/main'
include { METAFUSION_GENEINFO } from '../../modules/local/metafusion/geneinfo/main'
include { FASTAREMOVEPREFIX } from '../../modules/local/fastaremoveprefix/main'

workflow PREPARE_REFERENCES {

Expand All @@ -32,6 +33,11 @@ workflow PREPARE_REFERENCES {
fasta = Channel.of([[id:params.genome],params.fasta]).first()
}

if (params.genome == "GRCh38" ){
FASTAREMOVEPREFIX(fasta)
fasta = FASTAREMOVEPREFIX.out.fasta
}

if (params.gtf.endsWith(".gz")){
GUNZIP_GTF([[id:params.genome],params.gtf])
gtf = GUNZIP_GTF.out.gunzip.first()
Expand Down

0 comments on commit 3d97ab8

Please sign in to comment.