Skip to content

Commit

Permalink
Chebi's atomtype fix when it is a parent
Browse files Browse the repository at this point in the history
  • Loading branch information
tanksha committed Apr 15, 2020
1 parent cdccb51 commit 292e564
Showing 1 changed file with 32 additions and 15 deletions.
47 changes: 32 additions & 15 deletions go-plus.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,27 +19,36 @@ def get_term(class_id):
term = term.replace("CHEBI", "ChEBI")
return term

def get_type(term):
def get_type(term, parent=False):
if "ChEBI" in term:
return "MoleculeNode"
if parent:
return "ConceptNode"
else:
return "MoleculeNode"
else:
return "ConceptNode"

def evaLink(term1 , term2, predicate):
def evaLink(term1 , term2, predicate, parent_chebis=[]):
if not (str(term1) == "nan" or str(term2) == 'nan'):
return ("(EvaluationLink \n" +
"\t (PredicateNode \""+ predicate + "\")\n" +
"\t (ListLink \n" +
"\t\t ({}".format(get_type(term1)) + " \"" + term1 + "\")\n" +
"\t\t ({}".format(get_type(term2)) + " \"" + term2 + "\")))\n" )
"\t\t ({}".format(get_type(term1, parent=is_parent(term1, parent_chebis))) + " \"" + term1 + "\")\n" +
"\t\t ({}".format(get_type(term2, parent=is_parent(term2, parent_chebis))) + " \"" + term2 + "\")))\n" )
else:
return ""

def inheritLink(term1 , term2):
def is_parent(term, parent_chebis):
if term in parent_chebis:
return True
else:
return False

def inheritLink(term1 , term2, parent_chebis=[]):
if not (str(term1) == "nan" or str(term2) == 'nan'):
return ("(InheritanceLink \n" +
"\t ({}".format(get_type(term1)) + " \"" + term1 + "\")\n" +
"\t ({}".format(get_type(term2)) + " \"" + term2 + "\"))\n" )
"\t ({}".format(get_type(term1, parent=is_parent(term1, parent_chebis))) + " \"" + term1 + "\")\n" +
"\t ({}".format(get_type(term2, parent=is_parent(term2, parent_chebis))) + " \"" + term2 + "\"))\n")
else:
return ""

Expand All @@ -49,6 +58,14 @@ def inheritLink(term1 , term2):
if not os.path.exists("raw_data/GO-PLUS.csv.gz"):
dataset = wget.download(source_csv, "raw_data")
df = pd.read_csv("raw_data/GO-PLUS.csv.gz", dtype=str)
# Parent CHEBI's should be a ConceptNode, not a MoleculeNode
parents = df["Parents"]
parent_chebis = []
for i in [i.split("|") for i in parents if str(i) != "nan"]:
for c in i:
term = get_term(c)
if "ChEBI" in term:
parent_chebis.append(term)

go_columns = ["negatively regulated by","negatively regulates", "positively regulated by", "positively regulates", "regulated by", "regulates", "has part", "part of"]
uberon_columns = open("raw_data/uberon_columns.txt", "r").read().splitlines()
Expand Down Expand Up @@ -97,18 +114,18 @@ def inheritLink(term1 , term2):
file_name = chebi
file_name_with_def = chebi_with_def

file_name.write(evaLink(term, get_term(df.iloc[i]["Preferred Label"]), "has_name"))
file_name_with_def.write(evaLink(term, get_term(df.iloc[i]["Preferred Label"]), "has_name"))
file_name_with_def.write(evaLink(term, definition, "has_definition"))
file_name.write(evaLink(term, get_term(df.iloc[i]["Preferred Label"]), "has_name", parent_chebis=parent_chebis))
file_name_with_def.write(evaLink(term, get_term(df.iloc[i]["Preferred Label"]), "has_name",parent_chebis=parent_chebis))
file_name_with_def.write(evaLink(term, definition, "has_definition", parent_chebis=parent_chebis))
for col in cl_columns:
if col == "Parents":
parents = df.iloc[i][col]
if str(parents) != "nan":
for p in parents.split("|"):
file_name.write(inheritLink(term,get_term(p)))
file_name_with_def.write(inheritLink(term, get_term(p)))
file_name.write(inheritLink(term,get_term(p), parent_chebis=parent_chebis))
file_name_with_def.write(inheritLink(term, get_term(p), parent_chebis=parent_chebis))
else:
file_name.write(evaLink(term, get_term(df.iloc[i][col]), col.replace(" ", "_")))
file_name_with_def.write(evaLink(term, get_term(df.iloc[i][col]), col.replace(" ", "_")))
file_name.write(evaLink(term, get_term(df.iloc[i][col]), col.replace(" ", "_"), parent_chebis=parent_chebis))
file_name_with_def.write(evaLink(term, get_term(df.iloc[i][col]), col.replace(" ", "_"), parent_chebis=parent_chebis))
print("Done")

0 comments on commit 292e564

Please sign in to comment.