forked from Xinglab/rMATS-DVR
-
Notifications
You must be signed in to change notification settings - Fork 0
/
bam_calibration.py
76 lines (61 loc) · 3.28 KB
/
bam_calibration.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
#!/usr/bin/env python
import argparse,os,sys,time,logging,datetime
startTime = time.time()
parser = argparse.ArgumentParser(description='Bam calibration for rMATS-DVR')
parser.add_argument('--bam',help='Input bam file')
parser.add_argument('--output',help='Path and prefix of the output file')
parser.add_argument('--genome',help='genome sequence in fasta format')
parser.add_argument('--known', default='NA', help='Known SNVs in vcf format')
parser.add_argument('--KeepTemp', action='store_true', help='Keep tempory files. Disable by default.')
args = parser.parse_args()
bam=args.bam
label=args.output
genome=args.genome
known=args.known
keep=args.KeepTemp
directory='/'.join(sys.argv[0].split('/')[:-1])
if (directory!=''):
directory+='/'
logging.basicConfig(level=logging.DEBUG,
format='%(asctime)s %(message)s',
filename=label+'.bamCalibration.log'+ str(datetime.datetime.now())+'.txt' ,
filemode='w')
# mark and index bam
com1='java -Xmx4g -jar '+directory+'picard.jar ReorderSam INPUT='+bam+' OUTPUT='+label+'_reordered.bam S=true R='+genome
com2='java -Xmx4g -jar '+directory+'picard.jar AddOrReplaceReadGroups INPUT='+label+'_reordered.bam OUTPUT='+label+'_addrg.bam RGID='+label+' RGLB='+label+' RGPL=ILLUMINA RGPU=lane1 RGSM='+label
com3='java -Xmx4g -jar '+directory+'picard.jar MarkDuplicates INPUT='+label+'_addrg.bam OUTPUT='+label+'_dedup.bam CREATE_INDEX=true VALIDATION_STRINGENCY=SILENT METRICS_FILE='+label+'_metrics.txt'
# SplitNCigarReads
com5='java -Xmx4g -jar '+directory+'GenomeAnalysisTK.jar -T SplitNCigarReads -R '+genome+' -I '+label+'_dedup.bam -o '+label+'_split.bam -rf ReassignOneMappingQuality -RMQF 255 -RMQT 60 -U ALLOW_N_CIGAR_READS'
# Base Quality Score Recalibration
if (known!='NA'):
com6='java -Xmx4g -jar '+directory+'GenomeAnalysisTK.jar -T BaseRecalibrator -I '+label+'_split.bam -R '+genome+' -o '+label+'_recalibration_report.grp -knownSites '+known
else:
com6='java -Xmx4g -jar '+directory+'GenomeAnalysisTK.jar -T BaseRecalibrator -I '+label+'_split.bam -R '+genome+' -o '+label+'_recalibration_report.grp --run_without_dbsnp_potentially_ruining_quality'
com7='java -Xmx4g -jar '+directory+'GenomeAnalysisTK.jar -T PrintReads -R '+genome+' -I '+label+'_split.bam -BQSR '+label+'_recalibration_report.grp -o '+label+'_recalibration.bam -U ALLOW_N_CIGAR_READS'
logging.debug('Running command 1: '+com1+'\n')
os.system(com1)
logging.debug('Running command 2: '+com2+'\n')
os.system(com2)
if (not keep):
os.system('rm -f '+label+'_reordered.bam')
logging.debug('Running command 3: '+com3+'\n')
os.system(com3)
if (not keep):
os.system('rm -f '+label+'_addrg.bam')
logging.debug('Running command 5: '+com5+'\n')
os.system(com5)
if (not keep):
os.system('rm -f '+label+'_dedup.bam')
logging.debug('Running command 6: '+com6+'\n')
os.system(com6)
logging.debug('Running command 7: '+com7+'\n')
os.system(com7)
if (not keep):
os.system('rm -f '+label+'_split.bam')
os.system('rm -f '+label+'_recalibration_report.grp')
os.system('rm -f '+label+'_split.bai')
os.system('rm -f '+label+'_metrics.txt')
logging.debug("Program ended")
currentTime = time.time()
runningTime = currentTime-startTime
logging.debug("Program ran %.2d:%.2d:%.2d" % (runningTime/3600, (runningTime%3600)/60, runningTime%60))