forked from CRG-Beato/utils_beatolab
-
Notifications
You must be signed in to change notification settings - Fork 0
/
star_generate_genome1_sjdb.sh
executable file
·92 lines (76 loc) · 2.88 KB
/
star_generate_genome1_sjdb.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
#!/bin/bash
#==================================================================================================
# Created on: 2015-11-27
# Usage: ./star_generate_genome1_sjdb.sh
# Author: Javier Quilez (GitHub: jaquol)
# Goal: generates genome index files for STAR aligner using a known splice junctions database (sjdb)
#==================================================================================================
# workflow:
# the assembly version (1) reference sequence and (2) gene annotation are passed to STAR
# note that for the `*_mmtv` assembly versions (whose reference sequence include that of the MMTV)
# we used the gene annotation of the assembly without the MMTV
#==================================================================================================
# CONFIGURATION VARIABLES AND PATHS
#==================================================================================================
# Variables
process="star_generate_genome1_sjdb"
species="homo_sapiens"
version="hg38_mmtv"
read_length=54
fasta_name=${version}_chr1-22XYMUn
# Paths
GENOME_DIR=$HOME/assemblies/$species/$version/star_genome_index/read_length_${read_length}bp
genome_fasta=$HOME/assemblies/$species/$version/ucsc/$fasta_name.fa
JOB_CMD=$HOME/utils/job_cmd
JOB_OUT=$HOME/utils/job_out
mkdir -p $GENOME_DIR
mkdir -p $JOB_CMD
mkdir -p $JOB_OUT
star=`which star`
# define gene annoation file based on the assembly version
if [[ $version == "hg19" ]]; then
sjdb=$HOME/assemblies/$species/$version/gencode/gencode.v19.annotation.gtf
elif [[ $version == "hg19_mmtv" ]]; then
sjdb=$HOME/assemblies/$species/hg19/gencode/gencode.v19.annotation.gtf
elif [[ $version == "hg38" ]]; then
sjdb=$HOME/assemblies/$species/$version/gencode/gencode.v24.annotation.gtf
elif [[ $version == "hg38_mmtv" ]]; then
sjdb=$HOME/assemblies/$species/hg38/gencode/gencode.v24.annotation.gtf
fi
# CRG cluster parameters
queue=long-sl7
memory=50G
max_time=06:00:00
slots=8
#==================================================================================================
# JOB
#==================================================================================================
# Build job: parameters
job_name=${process}_${species}_${version}_read_length_${read_length}bp
job_file=$JOB_CMD/$job_name.sh
m_out=$JOB_OUT
echo "#!/bin/bash
#$ -N $job_name
#$ -q $queue
#$ -l virtual_free=$memory
#$ -l h_rt=$max_time
#$ -M [email protected]
#$ -m abe
#$ -j y
#$ -o $m_out/${job_name}_\$JOB_ID.out
#$ -e $m_out/${job_name}_\$JOB_ID.err
#$ -pe smp $slots" > $job_file
# STAR commands
echo "`echo $star --version`" >> $job_file
job_cmd="$star \
--runMode genomeGenerate \
--genomeDir $GENOME_DIR \
--genomeFastaFiles $genome_fasta \
--runThreadN $slots \
--sjdbOverhang $read_length \
--sjdbGTFfile $sjdb \
--outFileNamePrefix $GENOME_DIR/"
echo $job_cmd >> $job_file
# Submit job
chmod a+x $job_file
qsub < $job_file