-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathFindTSSs.py
57 lines (44 loc) · 1.53 KB
/
FindTSSs.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
#/usr/bin/env python
'''Quick script to find all TSSs in a GTF file
'''
from __future__ import print_function
from sys import stdin
def main():
curr_transcript = None
curr_chr = None
curr_start = None
curr_strand = None
curr_gene = None
curr_fbgn = None
print('chr\tTSS_start\tgene_name\tfbgn\tfbtr')
for line in stdin:
data = line.strip().split('\t')
chr = data[0]
type = data[2]
strand = data[6]
min_coord = int(data[3])
max_coord = int(data[4])
annot = data[-1]
annot = dict(item.replace('"', '').strip().split()
for item in annot.split(';')
if item.strip()
)
if 'gene_name' not in annot: continue
if type != 'exon': continue
if annot['transcript_id'] != curr_transcript:
if curr_strand == '-':
print("{}\t{}\t{}\t{}\t{}"
.format(curr_chr, curr_start, curr_gene,
curr_fbgn, curr_transcript, ))
curr_transcript = annot['transcript_id']
curr_chr = chr
curr_start = min_coord if strand == '+' else max_coord
curr_strand = strand
curr_gene = annot['gene_name']
curr_fbgn = annot['gene_id']
if strand == '+':
print("{}\t{}\t{}\t{}\t{}"
.format(curr_chr, curr_start, curr_gene,
curr_fbgn, curr_transcript, ))
if __name__ == "__main__":
main()