-
Notifications
You must be signed in to change notification settings - Fork 7
/
Copy pathadd_taxonomy.py
79 lines (49 loc) · 1.47 KB
/
add_taxonomy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
import sys, re, os
from urllib.request import urlopen
from threading import Thread
import queue
import json
from threading import Semaphore
writeLock = Semaphore(value=1)
input_file = sys.argv[1]
module_url_prefix = "http://rest.kegg.jp/get/gn:"
print ("ko,name,taxonomy")
in_queue = queue.Queue()
out_queue = queue.Queue()
def work():
while True:
package = in_queue.get()
#print ("first", package, package[1]
kegg = package[0].replace('"', "")
name = package[1].replace('"', "")
taxonomy = ""
#try:
detail = urlopen(module_url_prefix + kegg).read().decode("utf-8")
for line in detail.split("\n"):
line = line.strip()
if line.startswith("LINEAGE"):
taxonomy = line.replace("LINEAGE", "").strip()
writeLock.acquire()
print (",".join([f'"{kegg}"', f'"{name}"', f'"{taxonomy}"']))
writeLock.release()
#except:
#in_queue.put(kegg)
#pass
#finally:
#out_queue.put([kegg, taxid_count])
in_queue.task_done()
for i in range(1):
t = Thread(target=work)
t.daemon = True
t.start()
is_header = False
for line in open(input_file, 'r'):
if is_header == False:
is_header = True
else:
fields = line.strip().split(",")
#print (fields)
kegg = fields[0]
name = fields[1]
in_queue.put([kegg, name])
in_queue.join()