-
Notifications
You must be signed in to change notification settings - Fork 3
/
Copy pathrepeatMaskerGFFsubset.py
executable file
·79 lines (74 loc) · 2.42 KB
/
repeatMaskerGFFsubset.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
#!/usr/bin/env python3
import sys
import re
def help():
print('''
Takes RepeatMasker-generated GFF3 on stdin and writes lines from the
GFF3 file for each of the following categories: copia, gypsy,
otherltr, nonltr, mudr, hat, and otherdna.
''', file = sys.stderr)
exit(0)
if '-h' in sys.argv or '-help' in sys.argv:
help()
for line in sys.stdin:
if line.startswith('#'):
continue
lowercase_name = line.lower()
if ('copia' in lowercase_name
or 'shacop' in lowercase_name):
with open('copia.gff', 'a') as out:
out.write(line)
elif ('gypsy' in lowercase_name
or 'ogre' in lowercase_name):
with open('gypsy.gff', 'a') as out:
out.write(line)
elif ('dirs' in lowercase_name
or 'erv' in lowercase_name
or 'bel' in lowercase_name
or 'ltr' in lowercase_name
or 'tto1' in lowercase_name
or 'tnt1' in lowercase_name
or 'tlc1' in lowercase_name
or 'tcn1' in lowercase_name
or 'gag' in line
or 'zf-CCHC' in line
or 'DUF4219' in line
or 'RVT' in line
or 'Asp_protease' in line
or 'RVP' in line):
with open('otherltr.gff', 'a') as out:
out.write(line)
elif 'mudr' in lowercase_name:
with open('mudr.gff', 'a') as out:
out.write(line)
elif 'hat' in lowercase_name:
with open('hat.gff', 'a') as out:
out.write(line)
elif ('helitron' in lowercase_name
or 'dna' in lowercase_name
or re.search('dna\d', lowercase_name)
or 'mariner' in lowercase_name
or 'tc1' in lowercase_name
or 'harb' in lowercase_name
or 'enspm' in lowercase_name
or 'cacta' in lowercase_name
or 'penelope' in lowercase_name
or 'polinton' in lowercase_name
or 'maverick' in lowercase_name
or 'piggybac' in lowercase_name
or 'dada' in lowercase_name):
with open('otherdna.gff', 'a') as out:
out.write(line)
elif ('sine' in lowercase_name
or 'jock' in lowercase_name
or 'cr1' in lowercase_name
or 'crack' in lowercase_name
or 'daphne' in lowercase_name
or 'line' in lowercase_name
or 'l1' in lowercase_name
or 'tx1' in lowercase_name
or 'rep' in lowercase_name
or 'rtex' in lowercase_name
or 'rte' in lowercase_name):
with open('nonltr.gff', 'a') as out:
out.write(line)