forked from MOZI-AI/knowledge-import
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Extract required info from the DrugBank XML file
- Loading branch information
1 parent
77cffb7
commit 28200d3
Showing
1 changed file
with
35 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,35 @@ | ||
# To run: | ||
# PYTHONIOENCODING=UTF-8 python3 drugbank.py | ||
|
||
import xml.etree.ElementTree as ET | ||
|
||
xml_file = "raw_data/drugbank/full_database.xml" | ||
tag_prefix = "{http://www.drugbank.ca}" | ||
|
||
def find_tag(obj, tag): | ||
return obj.find(tag_prefix + tag) | ||
|
||
def findall_tag(obj, tag): | ||
return obj.findall(tag_prefix + tag) | ||
|
||
def get_child_tag_text(obj, tag): | ||
return find_tag(obj, tag).text | ||
|
||
for drug in ET.parse(xml_file).getroot(): | ||
drugbank_id = get_child_tag_text(drug, "drugbank-id") | ||
name = get_child_tag_text(drug, "name") | ||
description = get_child_tag_text(drug, "description") | ||
for group in findall_tag(find_tag(drug, "groups"), "group"): | ||
drug_group = group.text | ||
for article in findall_tag(find_tag(find_tag(drug, "general-references"), "articles"), "article"): | ||
pubmed_id = get_child_tag_text(article, "pubmed-id") | ||
for other_drug in findall_tag(find_tag(drug, "drug-interactions"), "drug-interaction"): | ||
# TODO: Need to get ChEBI ID for other_drug | ||
other_drug = get_child_tag_text(other_drug, "drugbank-id") | ||
for pathway in findall_tag(find_tag(drug, "pathways"), "pathway"): | ||
smpdb_id = get_child_tag_text(pathway, "smpdb-id") | ||
for involved_drug in findall_tag(find_tag(pathway, "drugs"), "drug"): | ||
# TODO: Need to get ChEBI ID for involved_drug | ||
involved_drug = get_child_tag_text(involved_drug, "drugbank-id") | ||
for uniprot_id in findall_tag(find_tag(pathway, "enzymes"), "uniprot-id"): | ||
uniprot_id = uniprot_id.text |