diff --git a/coronavirus_biogrid.py b/coronavirus_biogrid.py index 13a193f..6722190 100644 --- a/coronavirus_biogrid.py +++ b/coronavirus_biogrid.py @@ -33,16 +33,16 @@ def evaLink(node1, node1_type, node2, node2_type, predicate, prefix1="", prefix2 return ("(EvaluationLink {}\n".format(stv) + "\t (PredicateNode \""+ predicate + "\")\n" + "\t ({} \n".format(list_type) + - "\t\t ({}".format(node1_type) + " \"" + prefix1 + node1 + "\")\n" + - "\t\t ({}".format(node2_type) + " \"" + prefix2 + node2 + "\")))\n" ) + "\t\t ({}".format(node1_type) + " \"" + prefix1 + str(node1) + "\")\n" + + "\t\t ({}".format(node2_type) + " \"" + prefix2 + str(node2) + "\")))\n" ) else: return "" def member(node1, node1_type, node2, node2_type, prefix1="", prefix2=""): if not (str(node1) in ["-", "nan"] or str(node2) in ["-", "nan"]): return ('(MemberLink\n' + - '\t({} "'.format(node1_type) + prefix1 + node1 + '")\n'+ - '\t({} "'.format(node2_type) + prefix2 + node2 + '"))\n') + '\t({} "'.format(node1_type) + prefix1 + str(node1) + '")\n'+ + '\t({} "'.format(node2_type) + prefix2 + str(node2) + '"))\n') else: return "" @@ -96,7 +96,8 @@ def import_data(data, source, version, gene_level=False, form='tab2'): entrez1 = str(data.iloc[i]['Entrez Gene Interactor A']).strip() entrez2 = str(data.iloc[i]['Entrez Gene Interactor B']).strip() stv = '(stv 1.0 {})'.format(score) - + taxonomy_id_1 = int(data.iloc[i]['Organism Interactor A']) + taxonomy_id_2 = int(data.iloc[i]['Organism Interactor B']) if (gene1, gene2) not in pairs or (gene2, gene1) not in genes: if not gene1 in entrez: @@ -123,25 +124,26 @@ def import_data(data, source, version, gene_level=False, form='tab2'): f.write(evaLink(gene2, "GeneNode", bio,"ConceptNode", "has_biogridID", prefix2="Bio:")) f.write(evaLink(prot2, "MoleculeNode", bio,"ConceptNode", "has_biogridID", prefix1="Uniprot:",prefix2="Bio:")) - if gene1 not in genes and int(data.iloc[i]['Organism Interactor A']) == 2697049: - f.write(evaLink(gene1, "GeneNode", "SARS-CoV-2" , "ConceptNode","has_organism")) - f.write(evaLink(prot1, "MoleculeNode", "SARS-CoV-2", "ConceptNode","has_organism", prefix1="Uniprot:")) + if gene1 not in genes and taxonomy_id_1 == 2697049: + f.write(evaLink(gene1, "GeneNode", taxonomy_id_1 , "ConceptNode","from_organism", prefix2="TaxonomyID:")) + f.write(evaLink(prot1, "MoleculeNode", taxonomy_id_1, "ConceptNode","from_organism", prefix1="Uniprot:",prefix2="TaxonomyID:")) if gene_level: - g.write(evaLink(gene1, "GeneNode", "SARS-CoV-2" , "ConceptNode","has_organism")) - g.write(evaLink(prot1, "MoleculeNode", "SARS-CoV-2", "ConceptNode","has_organism", prefix1="Uniprot:")) - if gene2 not in genes and int(data.iloc[i]['Organism Interactor B']) != 9606: - f.write(evaLink(gene2, "GeneNode", "SARS-CoV-2" , "ConceptNode","has_organism")) - f.write(evaLink(prot2, "MoleculeNode", "SARS-CoV-2", "ConceptNode","has_organism", prefix1="Uniprot:")) + g.write(evaLink(gene1, "GeneNode", taxonomy_id_1 , "ConceptNode","from_organism",prefix2="TaxonomyID:")) + g.write(evaLink(prot1, "MoleculeNode", taxonomy_id_1, "ConceptNode","from_organism", prefix1="Uniprot:", prefix2="TaxonomyID:")) + if gene2 not in genes and taxonomy_id_2 == 2697049: + f.write(evaLink(gene2, "GeneNode", taxonomy_id_2 , "ConceptNode","from_organism",prefix2="TaxonomyID:")) + f.write(evaLink(prot2, "MoleculeNode", taxonomy_id_2, "ConceptNode","from_organism", prefix1="Uniprot:",prefix2="TaxonomyID:")) if gene_level: - g.write(evaLink(gene2, "GeneNode", "SARS-CoV-2" , "ConceptNode", "has_organism")) - g.write(evaLink(prot2, "MoleculeNode", "SARS-CoV-2", "ConceptNode", "has_organism", prefix1="Uniprot:")) + g.write(evaLink(gene2, "GeneNode", taxonomy_id_2 , "ConceptNode", "from_organism", prefix2="TaxonomyID:")) + g.write(evaLink(prot2, "MoleculeNode", taxonomy_id_2, "ConceptNode", "from_organism", prefix1="Uniprot:", prefix2="TaxonomyID:")) genes.append(gene1) genes.append(gene2) proteins.append(prot1) proteins.append(prot2) pairs.append((gene1,gene2)) - + f.write(evaLink("2697049", "ConceptNode", "SARS-CoV-2", "ConceptNode","has_name",prefix1="TaxonomyID:")) + g.write(evaLink("2697049", "ConceptNode", "SARS-CoV-2", "ConceptNode","has_name",prefix1="TaxonomyID:")) pairs = set((a,b) if a<=b else (b,a) for a,b in pairs) number_of_interactions = len(pairs) script = "https://github.com/MOZI-AI/knowledge-import/coronavirus_biogrid.py"