-
Notifications
You must be signed in to change notification settings - Fork 1
/
plot_compare_kmers.py
executable file
·89 lines (67 loc) · 1.9 KB
/
plot_compare_kmers.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
#!/usr/bin/python
'''
compare kmer positions between two fasta files
usage: plot_compare_kmers kmersize fasta1 fasta2 output.png
'''
#set matplotlib to use a backend suitable for headless operation
import matplotlib
matplotlib.use('Agg')
import matplotlib.pyplot as plt
from rjv.fasta import *
from rjv.kmer import *
import sys
if len(sys.argv) < 5:
print 'usage: plot_compare_kmers kmersize fasta1 fasta2 output.png'
exit(0)
kmersize = int(sys.argv[1])
inp1 = sys.argv[2]
inp2 = sys.argv[3]
out = sys.argv[4]
kmer_posn = {}
inp1_sizes = []
inp2_sizes = []
#index position of all kmers
posn = 0
for fa in next_fasta(inp1):
inp1_sizes.append(len(fa['seq']))
for i,kmer in enumerate(next_kmer(fa['seq'],kmersize)):
try:
#create reverse complement of the kmer
rev = revcomp(kmer)
except:
#invalid base found
continue
#convert to canonical kmer
if rev < kmer: kmer = rev
if not kmer in kmer_posn: kmer_posn[kmer] = []
kmer_posn[kmer].append(posn+i)
posn += len(fa['seq'])
xx = []
yy = []
posn = 0
for fa in next_fasta(inp2):
inp2_sizes.append(len(fa['seq']))
for i,kmer in enumerate(next_kmer(fa['seq'],kmersize)):
try:
#create reverse complement of the kmer
rev = revcomp(kmer)
except:
#invalid base found
continue
#convert to canonical kmer
if rev < kmer: kmer = rev
if not kmer in kmer_posn: continue
for x in kmer_posn[kmer]:
xx.append(x)
yy.append(posn+i)
posn += len(fa['seq'])
plt.plot(xx,yy,'ro',alpha=0.5,markersize=0.1)
posn = 0
for x in inp1_sizes[:-1]:
posn += x
plt.axvline(x=posn)
posn = 0
for x in inp2_sizes[:-1]:
posn += x
plt.axhline(y=posn)
plt.savefig(out, dpi=300, bbox_inches='tight')