forked from MOZI-AI/knowledge-import
-
Notifications
You must be signed in to change notification settings - Fork 0
/
uniprot2GO.py
executable file
·39 lines (33 loc) · 1.39 KB
/
uniprot2GO.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
__author__ = "Hedra"
__email__ = "[email protected]"
# The following script maps Uniprot to GO
# Requires: goa_human_isoform_valid.gaf
# source: http://current.geneontology.org/annotations/goa_human_isoform.gaf.gz
import os
import wget
import gzip
import metadata
from datetime import date
# Define helper functions
def inherit(node1, node2):
return ""+'\n(MemberLink \n\t'+ node1 +'\n\t'+ node2 +')\n'
dataset_url = "http://current.geneontology.org/annotations/goa_human_isoform.gaf.gz"
lines = []
prot = []
go = []
if not os.path.isfile('raw_data/goa_human_isoform_valid.gaf'):
print("Downloading dataset")
lines = gzip.open(wget.download(dataset_url, "raw_data/")).readlines()
lines = [l.decode("utf-8") for l in lines]
else:
lines = open('raw_data/goa_human_isoform_valid.gaf').readlines()
with open("dataset/uniprot2GO_{}.scm".format(str(date.today())), 'w') as f:
print("\nStarted importing")
for i in lines:
if 'UniProtKB' in i:
f.write(inherit('(MoleculeNode "'+ 'Uniprot:'+i.split('\t')[1] + '")', '(ConceptNode "' + i.split('\t')[4] + '")'))
prot.append(i.split('\t')[1])
go.append(i.split('\t')[4])
script = "https://github.com/MOZI-AI/knowledge-import/uniprot2GO.py"
metadata.update_meta("Uniprot-GO:latest", dataset_url,script,prot=len(set(prot)), goterms={"go-terms":len(set(go))})
print("Done, check dataset/uniprot2GO.scm")