-
Notifications
You must be signed in to change notification settings - Fork 4
/
Copy pathcollateRepAlleleMatrix.py
executable file
·73 lines (61 loc) · 2.04 KB
/
collateRepAlleleMatrix.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
#!/bin/env python
'''
collateRepAlleleMatrix.py
Copyright (c) 2015, David Edwards, Bernie Pope, Kat Holt
All rights reserved. (see README.txt for more details)
takes the SNP allele matrix entry for each isolate and generates the full allele matrix
outputs matrix to user-defined file
example:
python collateRepAlleleMatrix.py <temp_dir> <output> sequences_string rep_name
Created: 27/2/2014
Modified: 21/6/2016 - changed splitPath origin
author: David Edwards
'''
from Bio import SeqIO
from Bio.Seq import Seq
from Bio.SeqRecord import SeqRecord
from Bio.Alphabet import IUPAC
import sys, glob
from pipe_utils import splitPath
prefix = sys.argv[1]
output_file = sys.argv[2]
sequences_string = sys.argv[3]
rep_name = sys.argv[4]
input_files = []
sequences = sequences_string.split(',')
for sequence in sequences:
if sequence != '':
input_files.append((prefix+sequence+'/deriveRepAlleleMartix/'+rep_name+'_'+sequence+'_alleles.txt'))
header = ''
SNPmatrix = []
for entry_file in input_files:
matrix_entry_file = open(entry_file, 'r')
matrix_entry = matrix_entry_file.readline()
if matrix_entry.startswith('fail') != True:
if header == '':
header = matrix_entry[:-1]
matrix_entrys = matrix_entry_file.readlines()
for line in matrix_entrys:
SNPmatrix.append([line[:-1]])
else:
new_isolate = matrix_entry.split(',')
header += ',' + new_isolate[2][:-1]
matrix_entrys = matrix_entry_file.readlines()
count = 0
for line in matrix_entrys:
entry = line.split(',')
new_string = SNPmatrix[count][0]
new_string += "," + entry[2][:-1]
SNPmatrix[count][0] = new_string
count += 1
matrix_entry_file.close()
if header == '':
header = 'Pos,Ref'
header = header + "\n"
output = ""
for i in range(len(SNPmatrix)):
output += SNPmatrix[i][0] + "\n"
outputFile = open(output_file, "w")
outputFile.write(header)
outputFile.write(output)
outputFile.close()