Skip to content

Commit

Permalink
Adds taxonomyID of non-human organisms
Browse files Browse the repository at this point in the history
  • Loading branch information
tanksha committed Apr 22, 2020
1 parent 8243075 commit 7127df9
Showing 1 changed file with 18 additions and 16 deletions.
34 changes: 18 additions & 16 deletions coronavirus_biogrid.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,16 +33,16 @@ def evaLink(node1, node1_type, node2, node2_type, predicate, prefix1="", prefix2
return ("(EvaluationLink {}\n".format(stv) +
"\t (PredicateNode \""+ predicate + "\")\n" +
"\t ({} \n".format(list_type) +
"\t\t ({}".format(node1_type) + " \"" + prefix1 + node1 + "\")\n" +
"\t\t ({}".format(node2_type) + " \"" + prefix2 + node2 + "\")))\n" )
"\t\t ({}".format(node1_type) + " \"" + prefix1 + str(node1) + "\")\n" +
"\t\t ({}".format(node2_type) + " \"" + prefix2 + str(node2) + "\")))\n" )
else:
return ""

def member(node1, node1_type, node2, node2_type, prefix1="", prefix2=""):
if not (str(node1) in ["-", "nan"] or str(node2) in ["-", "nan"]):
return ('(MemberLink\n' +
'\t({} "'.format(node1_type) + prefix1 + node1 + '")\n'+
'\t({} "'.format(node2_type) + prefix2 + node2 + '"))\n')
'\t({} "'.format(node1_type) + prefix1 + str(node1) + '")\n'+
'\t({} "'.format(node2_type) + prefix2 + str(node2) + '"))\n')
else:
return ""

Expand Down Expand Up @@ -96,7 +96,8 @@ def import_data(data, source, version, gene_level=False, form='tab2'):
entrez1 = str(data.iloc[i]['Entrez Gene Interactor A']).strip()
entrez2 = str(data.iloc[i]['Entrez Gene Interactor B']).strip()
stv = '(stv 1.0 {})'.format(score)

taxonomy_id_1 = int(data.iloc[i]['Organism Interactor A'])
taxonomy_id_2 = int(data.iloc[i]['Organism Interactor B'])

if (gene1, gene2) not in pairs or (gene2, gene1) not in genes:
if not gene1 in entrez:
Expand All @@ -123,25 +124,26 @@ def import_data(data, source, version, gene_level=False, form='tab2'):
f.write(evaLink(gene2, "GeneNode", bio,"ConceptNode", "has_biogridID", prefix2="Bio:"))
f.write(evaLink(prot2, "MoleculeNode", bio,"ConceptNode", "has_biogridID", prefix1="Uniprot:",prefix2="Bio:"))

if gene1 not in genes and int(data.iloc[i]['Organism Interactor A']) == 2697049:
f.write(evaLink(gene1, "GeneNode", "SARS-CoV-2" , "ConceptNode","has_organism"))
f.write(evaLink(prot1, "MoleculeNode", "SARS-CoV-2", "ConceptNode","has_organism", prefix1="Uniprot:"))
if gene1 not in genes and taxonomy_id_1 == 2697049:
f.write(evaLink(gene1, "GeneNode", taxonomy_id_1 , "ConceptNode","from_organism", prefix2="TaxonomyID:"))
f.write(evaLink(prot1, "MoleculeNode", taxonomy_id_1, "ConceptNode","from_organism", prefix1="Uniprot:",prefix2="TaxonomyID:"))
if gene_level:
g.write(evaLink(gene1, "GeneNode", "SARS-CoV-2" , "ConceptNode","has_organism"))
g.write(evaLink(prot1, "MoleculeNode", "SARS-CoV-2", "ConceptNode","has_organism", prefix1="Uniprot:"))
if gene2 not in genes and int(data.iloc[i]['Organism Interactor B']) != 9606:
f.write(evaLink(gene2, "GeneNode", "SARS-CoV-2" , "ConceptNode","has_organism"))
f.write(evaLink(prot2, "MoleculeNode", "SARS-CoV-2", "ConceptNode","has_organism", prefix1="Uniprot:"))
g.write(evaLink(gene1, "GeneNode", taxonomy_id_1 , "ConceptNode","from_organism",prefix2="TaxonomyID:"))
g.write(evaLink(prot1, "MoleculeNode", taxonomy_id_1, "ConceptNode","from_organism", prefix1="Uniprot:", prefix2="TaxonomyID:"))
if gene2 not in genes and taxonomy_id_2 == 2697049:
f.write(evaLink(gene2, "GeneNode", taxonomy_id_2 , "ConceptNode","from_organism",prefix2="TaxonomyID:"))
f.write(evaLink(prot2, "MoleculeNode", taxonomy_id_2, "ConceptNode","from_organism", prefix1="Uniprot:",prefix2="TaxonomyID:"))
if gene_level:
g.write(evaLink(gene2, "GeneNode", "SARS-CoV-2" , "ConceptNode", "has_organism"))
g.write(evaLink(prot2, "MoleculeNode", "SARS-CoV-2", "ConceptNode", "has_organism", prefix1="Uniprot:"))
g.write(evaLink(gene2, "GeneNode", taxonomy_id_2 , "ConceptNode", "from_organism", prefix2="TaxonomyID:"))
g.write(evaLink(prot2, "MoleculeNode", taxonomy_id_2, "ConceptNode", "from_organism", prefix1="Uniprot:", prefix2="TaxonomyID:"))

genes.append(gene1)
genes.append(gene2)
proteins.append(prot1)
proteins.append(prot2)
pairs.append((gene1,gene2))

f.write(evaLink("2697049", "ConceptNode", "SARS-CoV-2", "ConceptNode","has_name",prefix1="TaxonomyID:"))
g.write(evaLink("2697049", "ConceptNode", "SARS-CoV-2", "ConceptNode","has_name",prefix1="TaxonomyID:"))
pairs = set((a,b) if a<=b else (b,a) for a,b in pairs)
number_of_interactions = len(pairs)
script = "https://github.com/MOZI-AI/knowledge-import/coronavirus_biogrid.py"
Expand Down

0 comments on commit 7127df9

Please sign in to comment.