Skip to content

Commit 37236da

Browse files
committed
Add script for Kraken to Bandage transform
1 parent 3c2c38e commit 37236da

File tree

2 files changed

+72
-1
lines changed

2 files changed

+72
-1
lines changed

bin/metafx-scripts/tax_to_csv.py

+70
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,70 @@
1+
#!/usr/bin/env python
2+
# Utility transform Kraken class report to csv file for Bandage visualization
3+
# -*- coding: UTF-8 -*-
4+
5+
import sys
6+
import getopt
7+
from ete3 import NCBITaxa
8+
9+
if __name__ == "__main__":
10+
inputFile = ''
11+
resFile = ''
12+
13+
helpString = 'Please add all mandatory parameters: --class-file and --res-file'
14+
15+
argv = sys.argv[1:]
16+
try:
17+
opts, args = getopt.getopt(argv, "h", ["class-file=", "res-file="])
18+
except getopt.GetoptError:
19+
print(helpString)
20+
sys.exit(2)
21+
for opt, arg in opts:
22+
if opt == "-h":
23+
print(helpString)
24+
sys.exit()
25+
elif opt == "--class-file":
26+
inputFile = arg
27+
if inputFile[0] == "'" or inputFile[0] == '"':
28+
inputFile = inputFile[1:]
29+
if inputFile[-1] == "'" or inputFile[-1] == '"':
30+
inputFile = inputFile[:-1]
31+
elif opt == "--res-file":
32+
resFile = arg
33+
if resFile[0] == "'" or resFile[0] == '"':
34+
resFile = resFile[1:]
35+
if resFile[-1] == "'" or resFile[-1] == '"':
36+
resFile = resFile[:-1]
37+
38+
tax_ids = []
39+
fileR = open(inputFile, 'r')
40+
fileW = open(resFile, 'w')
41+
count = 0
42+
while True:
43+
line = fileR.readline()
44+
if not line:
45+
break
46+
count += 1
47+
listLine = line.split('\t')
48+
if (listLine[0] == 'C'):
49+
tax_id = listLine[2].split('taxid')[1][1:-1]
50+
tax_ids.append((listLine[1], tax_id))
51+
fileR.close()
52+
53+
ncbi = NCBITaxa()
54+
fileW.write("Node name,Superkingdom,Phylum,Class,Order,Family,Genus,Species,Serotype,Strains\n")
55+
ranks = {'superkingdom': 1, 'phylum': 2, 'class': 3, 'order': 4, 'family': 5, 'genus': 6, 'species': 7, 'serotype': 8, 'strain': 9}
56+
for (node, tax) in tax_ids:
57+
lineage = ncbi.get_lineage(tax)
58+
names = ncbi.get_taxid_translator(lineage)
59+
fileW.write(node + ",")
60+
prevCount = 0
61+
for taxid in lineage:
62+
rank = ncbi.get_rank([taxid])[taxid]
63+
if (rank in ranks):
64+
curCount = ranks[ncbi.get_rank([taxid])[taxid]]
65+
for i in range(curCount - prevCount - 1):
66+
fileW.write(",")
67+
fileW.write(names[taxid]+",")
68+
prevCount = curCount
69+
fileW.write("\n")
70+
fileW.close()

requirements.txt

+2-1
Original file line numberDiff line numberDiff line change
@@ -2,4 +2,5 @@ numpy
22
pandas
33
scikit-learn
44
matplotlib
5-
joblib
5+
joblib
6+
ete3

0 commit comments

Comments
 (0)