diff --git a/ilxutils/Tutorials/interlex_remotes_tutorial.ipynb b/ilxutils/docs/interlex_remotes_tutorial.ipynb
similarity index 79%
rename from ilxutils/Tutorials/interlex_remotes_tutorial.ipynb
rename to ilxutils/docs/interlex_remotes_tutorial.ipynb
index f596fda2..82d7c9a0 100644
--- a/ilxutils/Tutorials/interlex_remotes_tutorial.ipynb
+++ b/ilxutils/docs/interlex_remotes_tutorial.ipynb
@@ -42,78 +42,9 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 4,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " term | \n",
- " curie | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 1 | \n",
- " abdominal cavity | \n",
- " UBERON:0003684 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " abdominal wall | \n",
- " UBERON:0003697 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " adipose tissue | \n",
- " UBERON:0001013 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " adult organism | \n",
- " UBERON:0007023 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " alimentary part of gastrointestinal system | \n",
- " UBERON:0005409 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "0 term curie\n",
- "1 abdominal cavity UBERON:0003684\n",
- "2 abdominal wall UBERON:0003697\n",
- "3 adipose tissue UBERON:0001013\n",
- "4 adult organism UBERON:0007023\n",
- "5 alimentary part of gastrointestinal system UBERON:0005409"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"from pyontutils.sheets import Sheet\n",
"import pandas as pd\n",
@@ -190,29 +121,17 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 5,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[OntTerm('HBA:3999', label='brain (hba)'),\n",
- " OntTerm('FMA:50801', label='Brain'),\n",
- " OntTerm('UBERON:0000955', label='brain'),\n",
- " OntTerm('UBERON:6110636', label='adult cerebral ganglion'),\n",
- " OntTerm('ILX:0101431', label='Brain'),\n",
- " OntTerm('ILX:0101433', label='Brain Infarction'),\n",
- " OntTerm('ILX:0506386', label='Brain Aneurysm'),\n",
- " OntTerm('ILX:0433050', label='Brain Chemistry'),\n",
- " OntTerm('ILX:0641746', label='alpha BRAIN'),\n",
- " OntTerm('ILX:0726394', label='brain meninx'),\n",
- " OntTerm('ILX:0729002', label='brain commissure'),\n",
- " OntTerm('ILX:0101434', label='Brain Ischemia'),\n",
- " OntTerm('ILX:0461406', label='Brain Death'),\n",
- " OntTerm('ILX:0733041', label='brain endothelium')]"
+ "[OntTerm('UBERON:0000955', label='brain'),\n",
+ " OntTerm('UBERON:6110636', label='adult cerebral ganglion')]"
]
},
- "execution_count": 3,
+ "execution_count": 5,
"metadata": {},
"output_type": "execute_result"
}
@@ -225,17 +144,16 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 11,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
- "[OntTerm('ILX:0103358', label='DN1 neuron'),\n",
- " OntTerm('ILX:0109525', label='Pupal DN1 period neuron')]"
+ "[]"
]
},
- "execution_count": 4,
+ "execution_count": 11,
"metadata": {},
"output_type": "execute_result"
}
@@ -243,12 +161,12 @@
"source": [
"# similar entities will show\n",
"# default limit is 10\n",
- "query(term='DN1 neuron', limit=2) "
+ "query(term='brain', limit=10, prefix=('ILX')) "
]
},
{
"cell_type": "code",
- "execution_count": 5,
+ "execution_count": 12,
"metadata": {},
"outputs": [
{
@@ -257,7 +175,7 @@
"[OntTerm('UBERON:0000955', label='brain')]"
]
},
- "execution_count": 5,
+ "execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
@@ -269,7 +187,7 @@
},
{
"cell_type": "code",
- "execution_count": 6,
+ "execution_count": 13,
"metadata": {},
"outputs": [
{
@@ -277,20 +195,6 @@
"text/plain": [
"{'prefix': 'UBERON',\n",
" 'suffix': '0000955',\n",
- " 'orig_kwargs': {'iri': 'http://purl.obolibrary.org/obo/UBERON_0000955',\n",
- " 'curie_or_iri': None,\n",
- " 'label': None,\n",
- " 'term': None,\n",
- " 'search': None,\n",
- " 'validated': None,\n",
- " 'query': None},\n",
- " 'kwargs': {'iri': 'http://purl.obolibrary.org/obo/UBERON_0000955',\n",
- " 'curie_or_iri': None,\n",
- " 'label': None,\n",
- " 'term': None,\n",
- " 'search': None,\n",
- " 'validated': None,\n",
- " 'query': None},\n",
" 'label': 'brain',\n",
" 'labels': ['brain'],\n",
" 'definition': 'The brain is the center of the nervous system in all vertebrate, and most invertebrate, animals. Some primitive animals such as jellyfish and starfish have a decentralized nervous system without a brain, while sponges lack any nervous system at all. In vertebrates, the brain is located in the head, protected by the skull and close to the primary sensory apparatus of vision, hearing, balance, taste, and smell[WP].',\n",
@@ -304,12 +208,12 @@
" '_type': OntId('owl:Class'),\n",
" '_types': (OntId('owl:Class'),),\n",
" '_graph': None,\n",
- " '_source': ,\n",
+ " '_source': ,\n",
" 'validated': True,\n",
- " '_query_result': QueryResult({'iri': 'http://purl.obolibrary.org/obo/UBERON_0000955', 'curie': 'UBERON:0000955', 'label': 'brain', 'labels': ['brain'], 'definition': 'The brain is the center of the nervous system in all vertebrate, and most invertebrate, animals. Some primitive animals such as jellyfish and starfish have a decentralized nervous system without a brain, while sponges lack any nervous system at all. In vertebrates, the brain is located in the head, protected by the skull and close to the primary sensory apparatus of vision, hearing, balance, taste, and smell[WP].', 'synonyms': ['the brain', 'synganglion', 'suprasegmental structures', 'suprasegmental levels of nervous system', 'encephalon'], 'deprecated': False, 'predicates': {}, 'type': OntId('owl:Class'), 'types': (OntId('owl:Class'),), '_graph': None, 'source': })}"
+ " '_query_result': QueryResult({'iri': 'http://purl.obolibrary.org/obo/UBERON_0000955', 'curie': 'UBERON:0000955', 'label': 'brain', 'labels': ['brain'], 'definition': 'The brain is the center of the nervous system in all vertebrate, and most invertebrate, animals. Some primitive animals such as jellyfish and starfish have a decentralized nervous system without a brain, while sponges lack any nervous system at all. In vertebrates, the brain is located in the head, protected by the skull and close to the primary sensory apparatus of vision, hearing, balance, taste, and smell[WP].', 'synonyms': ['the brain', 'synganglion', 'suprasegmental structures', 'suprasegmental levels of nervous system', 'encephalon'], 'deprecated': False, 'predicates': {}, 'type': OntId('owl:Class'), 'types': (OntId('owl:Class'),), '_graph': None, 'source': })}"
]
},
- "execution_count": 6,
+ "execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
@@ -331,7 +235,7 @@
},
{
"cell_type": "code",
- "execution_count": 7,
+ "execution_count": 14,
"metadata": {},
"outputs": [
{
@@ -355,9 +259,12 @@
"\u001b[0;34m\u001b[0m \u001b[0mdirection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'OUTGOING'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mlimit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0minclude_deprecated\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+ "\u001b[0;34m\u001b[0m \u001b[0minclude_supers\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+ "\u001b[0;34m\u001b[0m \u001b[0minclude_all_services\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+ "\u001b[0;34m\u001b[0m \u001b[0mraw\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mType:\u001b[0m OntQueryCli\n",
- "\u001b[0;31mString form:\u001b[0m \n",
+ "\u001b[0;31mString form:\u001b[0m \n",
"\u001b[0;31mFile:\u001b[0m ~/Dropbox/git/ontquery/ontquery/query.py\n",
"\u001b[0;31mDocstring:\u001b[0m \n"
]
@@ -386,7 +293,7 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 16,
"metadata": {},
"outputs": [
{
@@ -404,7 +311,7 @@
" [OntTerm('UBERON:6110636', label='adult cerebral ganglion')])]"
]
},
- "execution_count": 8,
+ "execution_count": 16,
"metadata": {},
"output_type": "execute_result"
}
@@ -445,30 +352,12 @@
" # We do this in case 2+ queries return the same results & the output WILL NOT have the same input order\n",
" gin = lambda kwargs: (kwargs, query(**kwargs))\n",
" # run each query instance at the same time\n",
- " results = Async(use_nest_asyncio=True)(deferred(gin)(kwargs) for kwargs in kwargs_list)\n",
+ " results = Async()(deferred(gin)(kwargs) for kwargs in kwargs_list)\n",
" return results \n",
"\n",
"queries([{'curie':'UBERON:0000955'}, {'curie':'UBERON:6110636'}])"
]
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "from pyontutils.utils import Async, deferred\n",
- "from pyontutils.core import OntTerm, ixr, query\n",
- "from typing import List, Tuple\n",
- "def queries(url_list:List[dict]) -> List[Tuple[str, dict]]:\n",
- " def gin(url):\n",
- " return requests.get(url).text\n",
- " # run each query instance at the same time\n",
- " results = Async(limit=5)(deferred(gin)(url) for url in url_list)\n",
- " return results \n",
- "list_tuples(url, html)"
- ]
- },
{
"cell_type": "markdown",
"metadata": {},
@@ -478,12 +367,13 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": 20,
"metadata": {},
"outputs": [],
"source": [
"# TEST InterLex endpoints\n",
- "from ilxutils.remotes import interlex_remote_test as ixrt"
+ "from ilxutils.remotes import remote\n",
+ "ixrt = remote(server='test3')"
]
},
{
@@ -495,7 +385,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 21,
"metadata": {},
"outputs": [
{
@@ -507,35 +397,35 @@
" 'orig_cid': '0',\n",
" 'cid': '0',\n",
" 'ilx': 'ilx_0738390',\n",
- " 'label': 'Offical label',\n",
+ " 'label': 'official test label',\n",
" 'type': 'term',\n",
- " 'definition': 'official definition',\n",
+ " 'definition': 'definition',\n",
" 'comment': 'helpful misc',\n",
- " 'version': '3',\n",
+ " 'version': '2',\n",
" 'status': '0',\n",
" 'display_superclass': '1',\n",
" 'orig_time': '1564695195',\n",
- " 'time': '1570826848',\n",
- " 'synonyms': [{'id': '1776645',\n",
+ " 'time': '1564695333',\n",
+ " 'synonyms': [{'id': '1776589',\n",
" 'tid': '661544',\n",
" 'literal': 'Encephalon',\n",
" 'type': '',\n",
- " 'time': '1570826848',\n",
- " 'version': '3'},\n",
- " {'id': '1776646',\n",
+ " 'time': '1564695333',\n",
+ " 'version': '2'},\n",
+ " {'id': '1776590',\n",
" 'tid': '661544',\n",
" 'literal': 'Cerebro',\n",
" 'type': '',\n",
- " 'time': '1570826848',\n",
- " 'version': '3'}],\n",
+ " 'time': '1564695333',\n",
+ " 'version': '2'}],\n",
" 'superclasses': [],\n",
- " 'existing_ids': [{'id': '3885545',\n",
+ " 'existing_ids': [{'id': '3885425',\n",
" 'tid': '661544',\n",
" 'curie': 'ILX:0738390',\n",
" 'iri': 'http://uri.interlex.org/base/ilx_0738390',\n",
" 'curie_catalog_id': '3885424',\n",
- " 'version': '3',\n",
- " 'time': '1570826848',\n",
+ " 'version': '2',\n",
+ " 'time': '1564695334',\n",
" 'preferred': '1'}],\n",
" 'relationships': [],\n",
" 'mappings': [],\n",
@@ -543,7 +433,7 @@
" 'ontologies': []}"
]
},
- "execution_count": 2,
+ "execution_count": 21,
"metadata": {},
"output_type": "execute_result"
}
@@ -561,7 +451,7 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": 22,
"metadata": {},
"outputs": [
{
@@ -576,9 +466,11 @@
"\u001b[0;34m\u001b[0m \u001b[0msynonyms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mcomment\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mpredicates\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+ "\u001b[0;34m\u001b[0m \u001b[0mexisting_ids\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+ "\u001b[0;34m\u001b[0m \u001b[0mcid\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
"\u001b[0;31mDocstring:\u001b[0m \n",
- "\u001b[0;31mFile:\u001b[0m ~/Dropbox/git/ontquery/ontquery/plugins/services.py\n",
+ "\u001b[0;31mFile:\u001b[0m ~/Dropbox/git/ontquery/ontquery/plugins/services/interlex.py\n",
"\u001b[0;31mType:\u001b[0m method\n"
]
},
@@ -592,14 +484,14 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 23,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
- "QueryResult({'iri': 'http://uri.interlex.org/base/ilx_0738390', 'curie': 'ILX:0738390', 'label': 'official test label', 'labels': (), 'definition': 'definition', 'synonyms': ('Encephalon', 'Cerebro'), 'deprecated': None, 'predicates': {'comment': 'helpful misc'}, 'type': None, 'types': (), '_graph': None, 'source': })\n"
+ "QueryResult({'iri': 'http://uri.interlex.org/base/ilx_0738390', 'curie': 'ILX:0738390', 'label': 'official test label', 'labels': (), 'definition': 'definition', 'synonyms': ('Encephalon', 'Cerebro'), 'deprecated': None, 'predicates': {'comment': 'helpful misc'}, 'type': None, 'types': (), '_graph': None, 'source': })\n"
]
}
],
@@ -628,7 +520,7 @@
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": 24,
"metadata": {},
"outputs": [
{
@@ -644,10 +536,20 @@
"\u001b[0;34m\u001b[0m \u001b[0msynonyms\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mcomment\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mpredicates_to_add\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+ "\u001b[0;34m\u001b[0m \u001b[0madd_existing_ids\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+ "\u001b[0;34m\u001b[0m \u001b[0mdelete_existing_ids\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mList\u001b[0m\u001b[0;34m[\u001b[0m\u001b[0mdict\u001b[0m\u001b[0;34m]\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m \u001b[0mpredicates_to_delete\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mdict\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
+ "\u001b[0;34m\u001b[0m \u001b[0mcid\u001b[0m\u001b[0;34m:\u001b[0m \u001b[0mstr\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
"\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mDocstring:\u001b[0m \n",
- "\u001b[0;31mFile:\u001b[0m ~/Dropbox/git/ontquery/ontquery/plugins/services.py\n",
+ "\u001b[0;31mDocstring:\u001b[0m\n",
+ "Update existing entity.\n",
+ "\n",
+ ":param List[dict] add_existing_ids: iris and curies to be added to entity.\n",
+ ":param List[dict] delete_existing_ids: iris and curies to be deleted from entity.\n",
+ "\n",
+ ">>>update_entity(add_existing_ids=[{'ilx_id':'ilx_1234567', 'iri':'http://abc.org/abc_123', 'curie':'ABC:123'}])\n",
+ ">>>update_entity(delete_existing_ids=[{'ilx_id':'ilx_1234567', 'iri':'http://abc.org/abc_123', 'curie':'ABC:123'}])\n",
+ "\u001b[0;31mFile:\u001b[0m ~/Dropbox/git/ontquery/ontquery/plugins/services/interlex.py\n",
"\u001b[0;31mType:\u001b[0m method\n"
]
},
@@ -661,26 +563,27 @@
},
{
"cell_type": "code",
- "execution_count": 8,
+ "execution_count": 26,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
- "\u001b[32m[2019-10-11 13:47:28,619]\u001b[0m - \u001b[32m INFO\u001b[0m - ontquery - \u001b[34minterlex_client.py:796 \u001b[0m - {'ilx_id': 'ILX:0738390', 'label': 'Offical label', 'type': 'term', 'definition': 'official definition', 'comment': 'helpful misc', 'superclass': '', 'synonyms': ['Encephalon', 'Cerebro']}\u001b[0m\n"
+ "\u001b[32m[2020-04-10 13:25:53,802]\u001b[0m - \u001b[32m INFO\u001b[0m - ontquery - \u001b[34minterlex_client.py:962 \u001b[0m - {'ilx_id': 'ILX:0738390', 'label': 'Offical label', 'type': 'term', 'definition': 'official definition', 'comment': 'helpful misc', 'superclass': '', 'synonyms': ['Encephalon', 'Cerebro'], 'add_existing_ids': None, 'delete_existing_ids': None, 'status': '0', 'cid': None}\u001b[0m\n"
]
},
{
"name": "stdout",
"output_type": "stream",
"text": [
- "QueryResult({'iri': 'http://uri.interlex.org/base/ilx_0738390', 'curie': 'ILX:0738390', 'label': 'Offical label', 'labels': (), 'definition': 'official definition', 'synonyms': ('Encephalon', 'Cerebro'), 'deprecated': None, 'predicates': {'comment': 'helpful misc'}, 'type': None, 'types': (), '_graph': None, 'source': })\n"
+ "QueryResult({'iri': 'http://uri.interlex.org/base/ilx_0738390', 'curie': 'ILX:0738390', 'label': 'Offical label', 'labels': (), 'definition': 'official definition', 'synonyms': ('Encephalon', 'Cerebro'), 'deprecated': None, 'predicates': {'comment': 'helpful misc'}, 'type': None, 'types': (), '_graph': None, 'source': })\n"
]
}
],
"source": [
- "from ilxutils.remotes import interlex_remote_test as ixrt\n",
+ "from ilxutils.remotes import remote\n",
+ "ixrt = remote(server='test3')\n",
"entity = dict(\n",
" ilx_id = 'ILX:0738390',\n",
" label = 'Offical label', # Can only one unique label per person\n",
@@ -708,14 +611,74 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": 27,
"metadata": {},
"outputs": [],
"source": [
"# PRODUCTION \n",
- "from ilxutils.remotes import interlex_remote_production as ixr\n",
+ "from ilxutils.remotes import remote \n",
+ "ixr = remote()\n",
"# BE CAREFUL :)"
]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 28,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "{'id': '661544',\n",
+ " 'orig_uid': '34142',\n",
+ " 'uid': '34142',\n",
+ " 'orig_cid': '0',\n",
+ " 'cid': '0',\n",
+ " 'ilx': 'ilx_0738390',\n",
+ " 'label': 'Offical label',\n",
+ " 'type': 'term',\n",
+ " 'definition': 'official definition',\n",
+ " 'comment': 'helpful misc',\n",
+ " 'version': '3',\n",
+ " 'status': '0',\n",
+ " 'display_superclass': '1',\n",
+ " 'orig_time': '1564695195',\n",
+ " 'time': '1586550353',\n",
+ " 'synonyms': [{'id': '1845765',\n",
+ " 'tid': '661544',\n",
+ " 'literal': 'Encephalon',\n",
+ " 'type': '',\n",
+ " 'time': '1586550353',\n",
+ " 'version': '3'},\n",
+ " {'id': '1845766',\n",
+ " 'tid': '661544',\n",
+ " 'literal': 'Cerebro',\n",
+ " 'type': '',\n",
+ " 'time': '1586550353',\n",
+ " 'version': '3'}],\n",
+ " 'superclasses': [],\n",
+ " 'existing_ids': [{'id': '4972084',\n",
+ " 'tid': '661544',\n",
+ " 'curie': 'ILX:0738390',\n",
+ " 'iri': 'http://uri.interlex.org/base/ilx_0738390',\n",
+ " 'curie_catalog_id': '3885424',\n",
+ " 'version': '3',\n",
+ " 'time': '1586550353',\n",
+ " 'preferred': '1'}],\n",
+ " 'relationships': [],\n",
+ " 'mappings': [],\n",
+ " 'annotations': [],\n",
+ " 'ontologies': []}"
+ ]
+ },
+ "execution_count": 28,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "ixrt.ilx_cli.get_entity('ilx_0738390')"
+ ]
}
],
"metadata": {
@@ -734,7 +697,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.8.5"
}
},
"nbformat": 4,
diff --git a/ilxutils/ilx-playground.ipynb b/ilxutils/ilx-playground.ipynb
index 7f8afdfb..7072d902 100644
--- a/ilxutils/ilx-playground.ipynb
+++ b/ilxutils/ilx-playground.ipynb
@@ -1,12 +1,5 @@
{
"cells": [
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": []
- },
{
"cell_type": "code",
"execution_count": 33,
@@ -681,7 +674,12 @@
{
"cell_type": "code",
"execution_count": 10,
- "metadata": {},
+ "metadata": {
+ "collapsed": true,
+ "jupyter": {
+ "outputs_hidden": true
+ }
+ },
"outputs": [
{
"name": "stderr",
@@ -6477,7 +6475,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
- "version": "3.7.3"
+ "version": "3.7.5"
}
},
"nbformat": 4,
diff --git a/ilxutils/ilxutils/backup_ilx.py b/ilxutils/ilxutils/backup_ilx.py
index a4b102f7..6e15b868 100644
--- a/ilxutils/ilxutils/backup_ilx.py
+++ b/ilxutils/ilxutils/backup_ilx.py
@@ -1,42 +1,59 @@
from pathlib import Path as p
-from ilxutils.interlex_sql import IlxSql
-from ilxutils.tools import create_pickle
+from .interlex_sql import IlxSql
+
+# from tools import create_pickle
+import pickle
import os
-sql = IlxSql(db_url=os.environ.get('SCICRUNCH_DB_URL_PRODUCTION'))
+HOME = p.home() / "DropboxPersonal/.interlex_backups"
+if HOME.exists is False:
+ HOME.mkdir()
+
+
+def create_pickle(data, outfilename):
+ with open(outfilename, "wb") as outfile:
+ pickle.dump(data, outfile)
-terms = sql.get_terms()
-create_pickle(terms, p.home() / 'Dropbox/interlex_backups/ilx_db_terms_backup.pickle')
-print('=== terms backup complete ===')
-del terms
+def main():
+ sql = IlxSql(db_url=os.environ.get("SCICRUNCH_DB_URL_PRODUCTION"))
+ users = sql.get_users()
+ create_pickle(users, HOME / "ilx_db_users_backup.pickle")
+ print("=== Users backup complete ===")
+ del users
-annos = sql.get_annotations()
-create_pickle(annos, p.home() / 'Dropbox/interlex_backups/ilx_db_annos_backup.pickle')
-print('=== annotations backup complete ===')
-del annos
+ terms = sql.get_terms()
+ create_pickle(terms, HOME / "ilx_db_terms_backup.pickle")
+ print("=== terms backup complete ===")
+ del terms
+ annos = sql.get_annotations()
+ create_pickle(annos, HOME / "ilx_db_annos_backup.pickle")
+ print("=== annotations backup complete ===")
+ del annos
-ex = sql.get_existing_ids()
-create_pickle(ex, p.home() / 'Dropbox/interlex_backups/ilx_db_ex_backup.pickle')
-print('=== existing ids backup complete ===')
-del ex
+ ex = sql.get_existing_ids()
+ create_pickle(ex, HOME / "ilx_db_ex_backup.pickle")
+ print("=== existing ids backup complete ===")
+ del ex
+ synonyms = sql.get_synonyms()
+ create_pickle(synonyms, HOME / "ilx_db_synonyms_backup.pickle")
+ print("=== synonyms backup complete ===")
+ del synonyms
-synonyms = sql.get_synonyms()
-create_pickle(synonyms, p.home() / 'Dropbox/interlex_backups/ilx_db_synonyms_backup.pickle')
-print('=== synonyms backup complete ===')
-del synonyms
+ superclasses = sql.get_superclasses()
+ create_pickle(superclasses, HOME / "ilx_db_superclasses_backup.pickle")
+ print("=== superclasses backup complete ===")
+ del superclasses
+ relationships = sql.get_relationships()
+ create_pickle(relationships, HOME / "ilx_db_relationships_backup.pickle")
+ print("=== relationships backup complete ===")
+ del relationships
-superclasses = sql.get_superclasses()
-create_pickle(superclasses, p.home() / 'Dropbox/interlex_backups/ilx_db_superclasses_backup.pickle')
-print('=== superclasses backup complete ===')
-del superclasses
-relationships = sql.get_relationships()
-create_pickle(relationships, p.home() / 'Dropbox/interlex_backups/ilx_db_relationships_backup.pickle')
-print('=== relationships backup complete ===')
-del relationships
+if __name__ == "__main__":
+ main()
diff --git a/ilxutils/ilxutils/elasticsearch_wrapper.py b/ilxutils/ilxutils/elasticsearch_wrapper.py
new file mode 100644
index 00000000..d203f50e
--- /dev/null
+++ b/ilxutils/ilxutils/elasticsearch_wrapper.py
@@ -0,0 +1,119 @@
+from functools import wraps
+import json
+import os
+import subprocess
+import docopt
+from elasticsearch import Elasticsearch
+BASHRC = lambda s: os.environ.get(s)
+
+
+class ElasticSearchTools:
+ """ Shortcuts for common elasticsearch querys. """
+
+ def __init__(self,
+ host: str, index: str, type: str,
+ user: str, password: str,
+ size: int = 10, start: int = 0,
+ scheme: str = 'https',) -> None:
+ """
+ :param str url: ElasticSearch url endpoint.
+ :param str index:
+ """
+ self.url = f'{scheme}://{host}/{index}'
+ self.host, self.index, self.type = host, index, type
+ self.es = Elasticsearch(self.url, http_auth=(user, password))
+
+ def search(self, body: dict, **kwargs) -> dict:
+ """ Elasticsearch '/_search' feature.
+
+ We use a framented index called a type. The type is the last index
+ while the real index becomes part of the host url.
+
+ :param dict body: query dict.
+ :return: nested elasticsearch dict where hits are in ['hits']['hits']
+
+ >>>__search(body={ 'query': { 'match_all': {} } })
+ """
+ return self.es.search(index=self.type, body=body, **kwargs)
+
+ def scroll(self, body: dict, size: int, **kwargs) -> dict:
+ body['size'] = 10000
+ body['from'] = 0
+ hits = []
+ print(body)
+ for step in range(0, size, 10000):
+ hits += self.es.search(index=self.type, body=body, **kwargs)['hits']['hits']
+ body['from'] = step
+ print(body)
+ return hits
+
+ def all_matches(self, sorting: str, size, start) -> dict:
+ """First or last set of entities.
+
+ :param str sorting: asc for head or desc for tail.
+ :param int size: number of entities you want from head or tails.
+ :param int start: position of index you want to start from.
+ :return: elasticsearch _search dict
+ """
+ if sorting.lower().strip() not in ['asc', 'desc']:
+ raise ValueError('sorting can only be asc or desc.')
+ body = {
+ 'query': { 'match_all': {} },
+ 'sort': [ { '_id': sorting } ],
+ 'size': size,
+ 'from': start,
+ }
+ return self.search(body)
+
+ def head(self, size=10, start=0):
+ """ See __end doc. """
+ return self.all_matches(sorting='asc', size=size, start=start)
+
+ def tail(self, size=10, start=0):
+ """ See __end doc. """
+ return self.all_matches(sorting='desc', size=size, start=start)
+
+
+class InterLexES(ElasticSearchTools):
+
+ def __init__(self, beta=True):
+ super().__init__(
+ host = BASHRC('SCICRUNCH_ELASTIC_URL'),
+ # index = 'Interlex_old',
+ index = 'interlex',
+ type = 'term',
+ user = BASHRC('INTERLEX_ELASTIC_USER'),
+ password = BASHRC('INTERLEX_ELASTIC_PASSWORD'),
+ )
+ self.beta = beta
+
+ def filter_tmp(self):
+ prefix = 'tmp_' if self.beta else 'ilx_'
+ return { 'prefix': { 'ilx' : { 'value': prefix } } }
+
+ def all_matches(self, sorting: str, size, start) -> dict:
+ """First or last set of entities.
+
+ :param str sorting: asc for head or desc for tail.
+ :param int size: number of entities you want from head or tails.
+ :param int start: position of index you want to start from.
+ :return: elasticsearch _search dict
+ """
+ if sorting.lower().strip() not in ['asc', 'desc']:
+ raise ValueError('sorting can only be asc or desc.')
+ body = {
+ 'query': self.filter_tmp(),
+ 'sort': [ { '_id': sorting } ],
+ 'size': size,
+ 'from': start,
+ }
+ return self.search(body)
+
+
+def main():
+ ilxes = InterLexES(beta=False)
+ print(ilxes.tail(1))
+
+
+if __name__ == '__main__':
+ main()
diff --git a/ilxutils/ilxutils/interlex_sanity_checks.py b/ilxutils/ilxutils/interlex_sanity_checks.py
new file mode 100644
index 00000000..d953953b
--- /dev/null
+++ b/ilxutils/ilxutils/interlex_sanity_checks.py
@@ -0,0 +1,4 @@
+from .sql import production_sql
+
+ilx_sql = production_sql(from_backup=True)
+ex = ilx_sql.get_existing_ids()
diff --git a/ilxutils/ilxutils/interlex_sql.py b/ilxutils/ilxutils/interlex_sql.py
index 256a0f83..522fc1e5 100755
--- a/ilxutils/ilxutils/interlex_sql.py
+++ b/ilxutils/ilxutils/interlex_sql.py
@@ -1,39 +1,79 @@
from pathlib import Path
+import numpy as np
import pandas as pd
from sqlalchemy import create_engine, inspect, Table, Column
from collections import defaultdict
-from ilxutils.tools import light_degrade, open_pickle, create_pickle
+from .tools import light_degrade, open_pickle, create_pickle
import os
-#ELASTIC = 'https://5f86098ac2b28a982cebf64e82db4ea2.us-west-2.aws.found.io:9243/interlex/term/'
-TERMS_COMPLETE_BACKUP_PATH = Path.home()/'Dropbox/interlex_backups/ilx_db_terms_complete_backup.pickle'
-TERMS_BACKUP_PATH = Path.home()/'Dropbox/interlex_backups/ilx_db_terms_backup.pickle'
-ANNOS_BACKUP_PATH = Path.home()/'Dropbox/interlex_backups/ilx_db_annotations_backup.pickle'
-RELAS_BACKUP_PATH = Path.home()/'Dropbox/interlex_backups/ilx_db_relationships_backup.pickle'
-SUPER_BACKUP_PATH = Path.home()/'Dropbox/interlex_backups/ilx_db_superclasses_backup.pickle'
-SYNOS_BACKUP_PATH = Path.home()/'Dropbox/interlex_backups/ilx_db_synonyms_backup.pickle'
-EXIDS_BACKUP_PATH = Path.home()/'Dropbox/interlex_backups/ilx_db_ex_backup.pickle'
+HOME = Path.home() / "DropboxPersonal"
+# ELASTIC = 'https://5f86098ac2b28a982cebf64e82db4ea2.us-west-2.aws.found.io:9243/interlex/term/'
+TERMS_COMPLETE_BACKUP_PATH = HOME / ".interlex_backups/ilx_db_terms_complete_backup.pickle"
+USERS_BACKUP_PATH = HOME / ".interlex_backups/ilx_db_users_backup.pickle"
+TERMS_BACKUP_PATH = HOME / ".interlex_backups/ilx_db_terms_backup.pickle"
+ANNOS_BACKUP_PATH = HOME / ".interlex_backups/ilx_db_annotations_backup.pickle"
+RELAS_BACKUP_PATH = HOME / ".interlex_backups/ilx_db_relationships_backup.pickle"
+SUPER_BACKUP_PATH = HOME / ".interlex_backups/ilx_db_superclasses_backup.pickle"
+SYNOS_BACKUP_PATH = HOME / ".interlex_backups/ilx_db_synonyms_backup.pickle"
+EXIDS_BACKUP_PATH = HOME / ".interlex_backups/ilx_db_ex_backup.pickle"
-class IlxSql():
+class IlxSql:
def __init__(self, db_url, pre_load=False, from_backup=False):
self.db_url = db_url
self.engine = create_engine(self.db_url)
- self.local_degrade = lambda string: string.lower().strip() # current degrade of choice for sql
+ # current degrade of choice for sql
+ self.local_degrade = lambda string: string.lower().strip()
self.from_backup = from_backup
- self.terms_complete = self.get_terms_complete() if pre_load else pd.DataFrame
- self.terms = self.get_terms() if pre_load else pd.DataFrame
- self.superclasses = self.get_superclasses if pre_load else pd.DataFrame
- self.annotations = self.get_annotations() if pre_load else pd.DataFrame
- self.existing_ids = self.get_existing_ids() if pre_load else pd.DataFrame
- self.relationships = self.get_relationships() if pre_load else pd.DataFrame
- self.synonyms = self.get_synonyms() if pre_load else pd.DataFrame
+ # self.terms_complete = self.get_terms_complete() if pre_load else pd.DataFrame
+ self.users = pd.DataFrame
+ self.terms = pd.DataFrame
+ self.superclasses = pd.DataFrame
+ self.annotations = pd.DataFrame
+ self.existing_ids = pd.DataFrame
+ self.relationships = pd.DataFrame
+ self.synonyms = pd.DataFrame
+ if pre_load:
+ self.__pre_load_db()
+
+ def __pre_load_db(self):
+ self.users = self.get_users()
+ self.terms = self.get_terms()
+ self.superclasses = self.get_superclasses()
+ self.annotations = self.get_annotations()
+ self.existing_ids = self.get_existing_ids()
+ self.relationships = self.get_relationships()
+ self.synonyms = self.get_synonyms()
+
+ def fix_df_types(self, df: pd.DataFrame) -> pd.DataFrame:
+ """
+ Convert Float Columns to Int with 0 for NaN while converting the rest of the NaNs to None.
+
+ Parameters
+ ----------
+ df : dataframe
+
+ Returns
+ -------
+ dataframe
+ 0 and None in place of nans
+ """
+ float_col = df.select_dtypes(include=["float64"])
+ for col in float_col:
+ df[col] = df[col].replace({np.nan: 0}).astype(int)
+ df = df.replace({np.nan: None})
+ return df
def fetch_terms_complete(self):
if self.terms_complete.empty:
return self.get_terms_complete()
return self.terms_complete
+ def fetch_users(self):
+ if self.users.empty:
+ return self.get_users()
+ return self.users
+
def fetch_terms(self):
if self.terms.empty:
return self.get_terms()
@@ -64,180 +104,222 @@ def fetch_superclasses(self):
return self.get_superclasses()
return self.superclasses
- def get_terms(self):
- ''' GROUP BY is a shortcut to only getting the first in every list of group '''
- if not self.terms.empty:
+ def get_users(self, refresh=False) -> pd.DataFrame:
+ """GROUP BY is a shortcut to only getting the first in every list of group"""
+ if not self.users.empty and not refresh:
+ return self.users
+ if self.from_backup:
+ self.users = open_pickle(USERS_BACKUP_PATH)
+ self.users.name = "users"
+ return self.users
+ engine = create_engine(self.db_url)
+ data = f"""
+ SELECT u.*
+ FROM users u
+ """
+ self.users = pd.read_sql(data, engine)
+ self.users = self.fix_df_types(self.users)
+ self.users.name = "users"
+ create_pickle(self.users, USERS_BACKUP_PATH)
+ return self.users
+
+ def get_terms(self, status=0, refresh=False):
+ """GROUP BY is a shortcut to only getting the first in every list of group"""
+ if not self.terms.empty and not refresh:
return self.terms
if self.from_backup:
self.terms = open_pickle(TERMS_BACKUP_PATH)
+ self.terms.name = "terms"
return self.terms
engine = create_engine(self.db_url)
- data = """
- SELECT t.id as tid, t.ilx, t.label, t.definition, t.type, t.comment, t.version, t.uid, t.time
+ data = f"""
+ SELECT t.*, u.*
FROM terms t
- GROUP BY t.ilx
+ JOIN users u
+ ON t.orig_uid = u.guid
+ WHERE t.status = '{status}'
"""
self.terms = pd.read_sql(data, engine)
+ self.terms = self.fix_df_types(self.terms)
+ self.terms.name = "terms"
create_pickle(self.terms, TERMS_BACKUP_PATH)
return self.terms
- def get_annotations(self):
- if not self.annotations:
+ def get_annotations(self, status=0, withdrawn=0, refresh=False):
+ if not self.annotations.empty and not refresh:
return self.fetch_annotations()
if self.from_backup:
self.annotations = open_pickle(ANNOS_BACKUP_PATH)
+ self.annotations.name = "annotations"
return self.annotations
engine = create_engine(self.db_url)
- data = """
+ data = f"""
SELECT
- ta.tid, ta.annotation_tid as annotation_type_tid,
- t1.ilx as term_ilx, t2.ilx as annotation_type_ilx,
- t2.label as annotation_type_label,
- ta.value
+ t1.ilx as term_ilx, t1.label as term_label, t1.type as term_type, t1.orig_cid as term_orig_cid,
+ t2.ilx as annotation_type_ilx, t2.label as annotation_type_label,
+ ta.*
FROM term_annotations AS ta
JOIN (
SELECT *
FROM terms
- GROUP BY terms.ilx
+ WHERE status = '{status}'
) AS t1 ON ta.tid=t1.id
JOIN (
SELECT *
FROM terms
- GROUP BY terms.ilx
+ WHERE status = '{status}'
) AS t2 ON ta.annotation_tid=t2.id
+ WHERE ta.withdrawn = '{withdrawn}'
"""
self.annotations = pd.read_sql(data, engine)
+ self.annotations = self.fix_df_types(self.annotations)
+ self.annotations.name = "annotations"
create_pickle(self.annotations, ANNOS_BACKUP_PATH)
return self.annotations
- def get_existing_ids(self):
- if not self.existing_ids.empty:
+ def get_existing_ids(self, status=0, refresh=False):
+ if not self.existing_ids.empty and not refresh:
return self.existing_ids
if self.from_backup:
self.existing_ids = open_pickle(EXIDS_BACKUP_PATH)
+ self.existing_ids.name = "existing_ids"
return self.existing_ids
engine = create_engine(self.db_url)
- data = """
- SELECT tei.tid, tei.curie, tei.iri, tei.preferred, t.ilx, t.label, t.definition
+ data = f"""
+ SELECT tei.*, t.ilx, t.label, t.type, t.orig_cid
FROM (
SELECT *
FROM terms
- GROUP BY terms.ilx
+ WHERE status = '{status}'
) as t
JOIN term_existing_ids AS tei
ON t.id = tei.tid
"""
self.existing_ids = pd.read_sql(data, engine)
+ self.existing_ids = self.fix_df_types(self.existing_ids)
+ self.existing_ids.name = "existing_ids"
create_pickle(self.existing_ids, EXIDS_BACKUP_PATH)
return self.existing_ids
- def get_relationships(self):
- if not self.relationships.empty:
+ def get_relationships(self, status=0, withdrawn=0, refresh=False):
+ if not self.relationships.empty and not refresh:
return self.relationships
if self.from_backup:
self.relationships = open_pickle(RELAS_BACKUP_PATH)
+ self.relationships.name = "relationships"
return self.relationships
engine = create_engine(self.db_url)
- data = """
+ data = f"""
SELECT
- t1.id as term1_tid, t1.ilx AS term1_ilx, t1.type as term1_type,
- t2.id as term2_tid, t2.ilx AS term2_ilx, t2.type as term2_type,
- t3.id as relationship_tid, t3.ilx AS relationship_ilx, t3.label as relationship_label
+ t1.ilx AS term1_ilx, t1.type as term1_type, t1.label as term1_label, t1.orig_cid as term1_orig_cid,
+ t2.ilx AS term2_ilx, t2.type as term2_type, t2.label as term2_label, t2.orig_cid as term2_orig_cid,
+ t3.ilx AS relationship_ilx, t3.label as relationship_label,
+ tr.*
FROM term_relationships AS tr
JOIN (
SELECT *
FROM terms
- GROUP BY terms.ilx
+ WHERE status = '{status}'
) t1 ON t1.id = tr.term1_id
JOIN (
SELECT *
FROM terms
- GROUP BY terms.ilx
+ WHERE status = '{status}'
) AS t2 ON t2.id = tr.term2_id
JOIN (
SELECT *
FROM terms
- GROUP BY terms.ilx
+ WHERE status = '{status}'
) AS t3 ON t3.id = tr.relationship_tid
+ WHERE tr.withdrawn = '{withdrawn}'
"""
self.relationships = pd.read_sql(data, engine)
+ self.relationships = self.fix_df_types(self.relationships)
+ self.relationships.name = "relationships"
create_pickle(self.relationships, RELAS_BACKUP_PATH)
return self.relationships
- def get_superclasses(self):
- if not self.superclasses.empty:
+ def get_superclasses(self, status=0, withdrawn=0, refresh=False):
+ if not self.superclasses.empty and not refresh:
return self.superclasses
if self.from_backup:
self.superclasses = open_pickle(SUPER_BACKUP_PATH)
+ self.superclasses.name = "superclasses"
return self.superclasses
engine = create_engine(self.db_url)
- data = """
+ data = f"""
SELECT
- ts.tid, ts.superclass_tid,
- t1.label as term_label, t1.ilx as term_ilx,
- t2.label as superclass_label, t2.ilx as superclass_ilx
+ t1.ilx as term_ilx, t1.label as term_label,
+ t2.ilx as superclass_ilx, t2.label as superclass_label,
+ ts.*
FROM term_superclasses AS ts
JOIN (
SELECT *
FROM terms
- GROUP BY terms.ilx
+ WHERE status = '{status}'
) as t1
ON t1.id = ts.tid
JOIN (
SELECT *
FROM terms
- GROUP BY terms.ilx
+ WHERE status = '{status}'
) AS t2
ON t2.id = ts.superclass_tid
"""
self.superclasses = pd.read_sql(data, engine)
+ self.superclasses = self.fix_df_types(self.superclasses)
+ self.superclasses.name = "superclasses"
create_pickle(self.superclasses, SUPER_BACKUP_PATH)
return self.superclasses
- def get_synonyms(self):
- if not self.synonyms.empty:
+ def get_synonyms(self, status=0, refresh=False):
+ if not self.synonyms.empty and not refresh:
return self.synonyms
if self.from_backup:
self.synonyms = open_pickle(SYNOS_BACKUP_PATH)
+ self.synonyms.name = "synonyms"
return self.synonyms
engine = create_engine(self.db_url)
- data = """
- SELECT ts.tid as tid, t.ilx, ts.literal, ts.type
+ data = f"""
+ SELECT t.ilx, t.type as term_type, t.label, ts.*
FROM term_synonyms AS ts
JOIN (
SELECT *
FROM terms
- GROUP BY terms.ilx
+ WHERE status = '{status}'
) AS t
WHERE ts.tid=t.id
"""
self.synonyms = pd.read_sql(data, engine)
+ self.synonyms = self.fix_df_types(self.synonyms)
+ self.synonyms.name = "synonyms"
create_pickle(self.synonyms, SYNOS_BACKUP_PATH)
return self.synonyms
- def get_terms_complete(self) -> pd.DataFrame:
- ''' Gets complete entity data like term/view '''
- if not self.terms_complete.empty:
- return self.terms_complete
- if self.from_backup:
- self.terms_complete = open_pickle(TERMS_COMPLETE_BACKUP_PATH)
- return self.terms_complete
- ilx2synonyms = self.get_ilx2synonyms()
- ilx2existing_ids = self.get_ilx2existing_ids()
- ilx2annotations = self.get_ilx2annotations()
- ilx2superclass = self.get_ilx2superclass()
- ilx_complete = []
- header = ['Index'] + list(self.fetch_terms().columns)
- for row in self.fetch_terms().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
- row['synonyms'] = ilx2synonyms.get(row['ilx'])
- row['existing_ids'] = ilx2existing_ids[row['ilx']] # if breaks we have worse problems
- row['annotations'] = ilx2annotations.get(row['ilx'])
- row['superclass'] = ilx2superclass.get(row['ilx'])
- ilx_complete.append(row)
- terms_complete = pd.DataFrame(ilx_complete)
- create_pickle(terms_complete, TERMS_COMPLETE_BACKUP_PATH)
- return terms_complete
+ # def get_terms_complete(self, status=0, withdrawn=0, refresh=False) -> pd.DataFrame:
+ # ''' Gets complete entity data like term/view '''
+ # if not self.terms_complete.empty and not refresh:
+ # return self.terms_complete
+ # if self.from_backup:
+ # self.terms_complete = open_pickle(TERMS_COMPLETE_BACKUP_PATH)
+ # return self.terms_complete
+ # ilx2synonyms = self.get_ilx2synonyms()
+ # ilx2existing_ids = self.get_ilx2existing_ids()
+ # ilx2annotations = self.get_ilx2annotations()
+ # ilx2superclass = self.get_ilx2superclass()
+ # ilx_complete = []
+ # header = ['Index'] + list(self.fetch_terms().columns)
+ # for row in self.fetch_terms().itertuples():
+ # row = {header[i]: val for i, val in enumerate(row)}
+ # row['synonyms'] = ilx2synonyms.get(row['ilx'])
+ # # if breaks we have worse problems
+ # row['existing_ids'] = ilx2existing_ids[row['ilx']]
+ # row['annotations'] = ilx2annotations.get(row['ilx'])
+ # row['superclass'] = ilx2superclass.get(row['ilx'])
+ # ilx_complete.append(row)
+ # terms_complete = pd.DataFrame(ilx_complete)
+ # create_pickle(terms_complete, TERMS_COMPLETE_BACKUP_PATH)
+ # return terms_complete
def get_label2id(self):
self.terms = self.fetch_terms()
@@ -246,14 +328,14 @@ def get_label2id(self):
for row in self.terms.itertuples():
label = self.local_degrade(row.label)
if not visited.get((label, row.type, row.ilx)):
- if row.type == 'term':
- label_to_id[label]['term'].append(int(row.id))
+ if row.type == "term":
+ label_to_id[label]["term"].append(int(row.id))
visited[(label, row.type, row.ilx)] = True
- elif row.type == 'cde':
- label_to_id[label]['cde'].append(int(row.id))
+ elif row.type == "cde":
+ label_to_id[label]["cde"].append(int(row.id))
visited[(label, row.type, row.ilx)] = True
- elif row.type == 'fde':
- label_to_id[label]['fde'].append(int(row.id))
+ elif row.type == "fde":
+ label_to_id[label]["fde"].append(int(row.id))
visited[(label, row.type, row.ilx)] = True
return label_to_id
@@ -272,173 +354,172 @@ def get_label2rows(self):
self.terms_complete = self.fetch_terms_complete()
visited = {}
label2rows = defaultdict(list)
- header = ['Index'] + list(self.terms_complete.columns)
+ header = ["Index"] + list(self.terms_complete.columns)
for row in self.terms_complete.itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
- label = self.local_degrade(row['label'])
- if not visited.get((label, row['type'], row['ilx'])):
+ row = {header[i]: val for i, val in enumerate(row)}
+ label = self.local_degrade(row["label"])
+ if not visited.get((label, row["type"], row["ilx"])):
label2rows[label].append(row)
- visited[(label, row['type'], row['ilx'])] = True
+ visited[(label, row["type"], row["ilx"])] = True
return label2rows
def get_definition2rows(self):
self.terms = self.fetch_terms()
visited = {}
definition2rows = defaultdict(list)
- header = ['Index'] + list(self.terms.columns)
+ header = ["Index"] + list(self.terms.columns)
for row in self.terms.itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
- definition = self.local_degrade(row['definition'])
- if not definition or definition == ' ':
+ row = {header[i]: val for i, val in enumerate(row)}
+ definition = self.local_degrade(row["definition"])
+ if not definition or definition == " ":
continue
- if not visited.get((definition, row['type'], row['ilx'])):
+ if not visited.get((definition, row["type"], row["ilx"])):
definition2rows[definition].append(row)
- visited[(definition, row['type'], row['ilx'])] = True
+ visited[(definition, row["type"], row["ilx"])] = True
return definition2rows
def get_tid2row(self):
tid2row = {}
- header = ['Index'] + list(self.fetch_terms().columns)
+ header = ["Index"] + list(self.fetch_terms().columns)
for row in self.fetch_terms().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
- tid2row[row['tid']] = row
+ row = {header[i]: val for i, val in enumerate(row)}
+ tid2row[row["tid"]] = row
return tid2row
def get_ilx2row(self):
ilx2row = {}
- header = ['Index'] + list(self.fetch_terms().columns)
+ header = ["Index"] + list(self.fetch_terms().columns)
for row in self.fetch_terms().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
- ilx2row[row['ilx']] = row
+ row = {header[i]: val for i, val in enumerate(row)}
+ ilx2row[row["ilx"]] = row
return ilx2row
- def get_ilx2superclass(self, clean:bool=True):
- ''' clean: for list of literals only '''
+ def get_ilx2superclass(self, clean: bool = True):
+ """clean: for list of literals only"""
ilx2superclass = defaultdict(list)
- header = ['Index'] + list(self.fetch_superclasses().columns)
+ header = ["Index"] + list(self.fetch_superclasses().columns)
for row in self.fetch_superclasses().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
+ row = {header[i]: val for i, val in enumerate(row)}
if clean:
superclass = {
- 'tid': row['superclass_tid'],
- 'ilx': row['superclass_ilx'],
+ "tid": row["superclass_tid"],
+ "ilx": row["superclass_ilx"],
}
- ilx2superclass[row['term_ilx']].append(superclass)
+ ilx2superclass[row["term_ilx"]].append(superclass)
elif not clean:
- ilx2superclass[row['term_ilx']].append(row)
+ ilx2superclass[row["term_ilx"]].append(row)
return ilx2superclass
- def get_tid2annotations(self, clean:bool=True):
- ''' clean: for list of literals only '''
+ def get_tid2annotations(self, clean: bool = True):
+ """clean: for list of literals only"""
tid2annotations = defaultdict(list)
- header = ['Index'] + list(self.fetch_annotations().columns)
+ header = ["Index"] + list(self.fetch_annotations().columns)
for row in self.fetch_annotations().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
+ row = {header[i]: val for i, val in enumerate(row)}
if clean:
annotation = {
- 'tid': row['tid'],
- 'annotation_type_tid': row['annotation_type_tid'],
- 'value': row['value'],
- 'annotation_type_label': row['annotation_type_label'],
+ "tid": row["tid"],
+ "annotation_type_tid": row["annotation_type_tid"],
+ "value": row["value"],
+ "annotation_type_label": row["annotation_type_label"],
}
- tid2annotations[row['tid']].append(annotation)
+ tid2annotations[row["tid"]].append(annotation)
elif not clean:
- tid2annotations[row['tid']].append(row)
+ tid2annotations[row["tid"]].append(row)
return tid2annotations
- def get_ilx2annotations(self, clean:bool=True):
- ''' clean: for list of literals only '''
+ def get_ilx2annotations(self, clean: bool = True):
+ """clean: for list of literals only"""
ilx2annotations = defaultdict(list)
- header = ['Index'] + list(self.fetch_annotations().columns)
+ header = ["Index"] + list(self.fetch_annotations().columns)
for row in self.fetch_annotations().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
+ row = {header[i]: val for i, val in enumerate(row)}
if clean:
annotation = {
- 'tid': row['tid'],
- 'annotation_type_tid': row['annotation_type_tid'],
- 'value': row['value'],
- 'annotation_type_label': row['annotation_type_label'],
+ "tid": row["tid"],
+ "annotation_type_tid": row["annotation_type_tid"],
+ "value": row["value"],
+ "annotation_type_label": row["annotation_type_label"],
}
- ilx2annotations[row['term_ilx']].append(annotation)
+ ilx2annotations[row["term_ilx"]].append(annotation)
elif not clean:
- ilx2annotations[row['term_ilx']].append(row)
+ ilx2annotations[row["term_ilx"]].append(row)
return ilx2annotations
-
- def get_tid2synonyms(self, clean:bool=True):
- ''' clean: for list of literals only '''
+ def get_tid2synonyms(self, clean: bool = True):
+ """clean: for list of literals only"""
tid2synonyms = {}
- header = ['Index'] + list(self.fetch_synonyms().columns)
+ header = ["Index"] + list(self.fetch_synonyms().columns)
for row in self.fetch_synonyms().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
+ row = {header[i]: val for i, val in enumerate(row)}
if clean:
- synonym = {'literal':row['literal'], 'type':row['type']}
- tid2synonyms[row['tid']].append(synonym)
+ synonym = {"literal": row["literal"], "type": row["type"]}
+ tid2synonyms[row["tid"]].append(synonym)
elif not clean:
- tid2synonyms[row['tid']].append(row)
+ tid2synonyms[row["tid"]].append(row)
return tid2synonyms
- def get_ilx2synonyms(self, clean:bool=True):
- ''' clean: for list of literals only '''
+ def get_ilx2synonyms(self, clean: bool = True):
+ """clean: for list of literals only"""
ilx2synonyms = defaultdict(list)
- header = ['Index'] + list(self.fetch_synonyms().columns)
+ header = ["Index"] + list(self.fetch_synonyms().columns)
for row in self.fetch_synonyms().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
+ row = {header[i]: val for i, val in enumerate(row)}
if clean:
- synonym = {'literal':row['literal'], 'type':row['type']}
- ilx2synonyms[row['ilx']].append(synonym)
+ synonym = {"literal": row["literal"], "type": row["type"]}
+ ilx2synonyms[row["ilx"]].append(synonym)
elif not clean:
- ilx2synonyms[row['ilx']].append(row)
+ ilx2synonyms[row["ilx"]].append(row)
return ilx2synonyms
def get_iri2row(self):
iri2row = {}
- header = ['Index'] + list(self.fetch_existing_ids().columns)
+ header = ["Index"] + list(self.fetch_existing_ids().columns)
for row in self.fetch_existing_ids().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
- iri2row[row['iri']] = row
+ row = {header[i]: val for i, val in enumerate(row)}
+ iri2row[row["iri"]] = row
return iri2row
def get_tid2existing_ids(self, clean=True):
tid2existing_ids = defaultdict(list)
- header = ['Index'] + list(self.fetch_existing_ids().columns)
+ header = ["Index"] + list(self.fetch_existing_ids().columns)
for row in self.fetch_existing_ids().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
+ row = {header[i]: val for i, val in enumerate(row)}
if clean:
- existing_id = {'iri':row['iri'], 'curie':row['curie']}
- tid2existing_ids[row['tid']].append(existing_id)
+ existing_id = {"iri": row["iri"], "curie": row["curie"]}
+ tid2existing_ids[row["tid"]].append(existing_id)
elif not clean:
- tid2existing_ids[row['tid']].append(row)
+ tid2existing_ids[row["tid"]].append(row)
return tid2existing_ids
def get_ilx2existing_ids(self, clean=True):
ilx2existing_ids = defaultdict(list)
- header = ['Index'] + list(self.fetch_existing_ids().columns)
+ header = ["Index"] + list(self.fetch_existing_ids().columns)
for row in self.fetch_existing_ids().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
+ row = {header[i]: val for i, val in enumerate(row)}
if clean:
- existing_id = {'iri':row['iri'], 'curie':row['curie']}
- ilx2existing_ids[row['ilx']].append(existing_id)
+ existing_id = {"iri": row["iri"], "curie": row["curie"]}
+ ilx2existing_ids[row["ilx"]].append(existing_id)
elif not clean:
- ilx2existing_ids[row['ilx']].append(row)
+ ilx2existing_ids[row["ilx"]].append(row)
return ilx2existing_ids
def get_curie2row(self):
curie2row = {}
- header = ['Index'] + list(self.fetch_existing_ids().columns)
+ header = ["Index"] + list(self.fetch_existing_ids().columns)
for row in self.fetch_existing_ids().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
- curie2row[row['curie']] = row
+ row = {header[i]: val for i, val in enumerate(row)}
+ curie2row[row["curie"]] = row
return curie2row
def get_fragment2rows(self):
fragement2rows = defaultdict(list)
- header = ['Index'] + list(self.fetch_existing_ids().columns)
+ header = ["Index"] + list(self.fetch_existing_ids().columns)
for row in self.fetch_existing_ids().itertuples():
- row = {header[i]:val for i, val in enumerate(row)}
- if not row['curie']: # there are a few with no curies that will cause a false positive
+ row = {header[i]: val for i, val in enumerate(row)}
+ if not row["curie"]: # there are a few with no curies that will cause a false positive
continue
- fragment = row['curie'].split(':')[-1]
+ fragment = row["curie"].split(":")[-1]
fragement2rows[fragment].append(row)
return fragement2rows
@@ -451,7 +532,9 @@ def get_table(self, tablename, limit=5):
SELECT *
FROM {tablename}
LIMIT {limit}
- """.format(tablename=tablename, limit=limit)
+ """.format(
+ tablename=tablename, limit=limit
+ )
return pd.read_sql(data, self.engine)
def get_custom(self, data):
@@ -459,11 +542,11 @@ def get_custom(self, data):
def main():
- db_url = os.environ.get('SCICRUNCH_DB_URL_PRODUCTION')
+ db_url = os.environ.get("SCICRUNCH_DB_URL_PRODUCTION")
sql = IlxSql(db_url)
rels = sql.get_relationships()
print(rels.head())
-if __name__ == '__main__':
+if __name__ == "__main__":
main()
diff --git a/ilxutils/ilxutils/nltklib.py b/ilxutils/ilxutils/nltklib.py
index da5c9918..27acae74 100644
--- a/ilxutils/ilxutils/nltklib.py
+++ b/ilxutils/ilxutils/nltklib.py
@@ -137,7 +137,8 @@ def sentence_similarity(sentence1, sentence2, ignore_integers=False):
tokens2 = word_tokenize(sentence2)
tokens1 = clean_tokens(tokens1, ignore_integers)
tokens2 = clean_tokens(tokens2, ignore_integers)
-
+ print(tokens1)
+ print(tokens2)
# tag
sentence1 = pos_tag(tokens1)
sentence2 = pos_tag(tokens2)
diff --git a/ilxutils/ilxutils/ontopandas.py b/ilxutils/ilxutils/ontopandas.py
index faf2581b..f777863b 100644
--- a/ilxutils/ilxutils/ontopandas.py
+++ b/ilxutils/ilxutils/ontopandas.py
@@ -43,10 +43,12 @@ class OntoPandas:
def __init__(self,
obj: Union[rdflib.graph.Graph, str],
query:str=defaultquery,
+ curie:bool=True,
qnamed:bool=False,
str_vals:bool=False,) -> None:
self.query = query
self.qnamed = qnamed
+ self.curie = curie
self.str_vals = str_vals
self.g = obj # could be path
self.path = obj # could be graph
@@ -270,6 +272,10 @@ def get_sparql_dataframe( self ):
df = df.where((pd.notnull(df)), None) # default Null is fricken Float NaN
df = df.reset_index().rename(columns={'index':'iri'})
+
+ if self.curie:
+ df['curie'] = df.apply(lambda row: self.qname(row.iri), axis = 1)
+
return df
diff --git a/ilxutils/ilxutils/remotes.py b/ilxutils/ilxutils/remotes.py
index 4137adb1..979ae58f 100644
--- a/ilxutils/ilxutils/remotes.py
+++ b/ilxutils/ilxutils/remotes.py
@@ -2,19 +2,20 @@
from pyontutils.core import OntTerm
import os
-TEST = 'https://test3.scicrunch.org/api/1/'
-PRODUCTION = 'https://scicrunch.org/api/1/'
+def remote(server=''):
-InterLexRemote = oq.plugin.get('InterLex')
-interlex_remote_production = InterLexRemote(
- # When ready, should be changed to 'https://scicrunch.org/api/1/' for production
- apiEndpoint = PRODUCTION
-)
-interlex_remote_production.setup(instrumented=OntTerm)
+ # Request interlex remote (scigraph is also an option for plugins)
+ InterLexRemote = oq.plugin.get('InterLex')
-# InterLexRemote = oq.plugin.get('InterLex')
-# interlex_remote_test = InterLexRemote(
-# # When ready, should be changed to 'https://scicrunch.org/api/1/' for production
-# apiEndpoint = TEST
-# )
-# interlex_remote_test.setup(instrumented=OntTerm)
+ if server:
+ server = server if server.endswith('.') else server + '.'
+ endpoint = f'https://{server}scicrunch.org/api/1/'
+
+ #
+ interlex_remote = InterLexRemote()
+
+ # setup inheritance classes
+ interlex_remote.apiEndpoint = endpoint
+ interlex_remote.setup(instrumented=OntTerm)
+
+ return interlex_remote
diff --git a/ilxutils/ilxutils/scicrunch_session.py b/ilxutils/ilxutils/scicrunch_session.py
new file mode 100644
index 00000000..1c68a422
--- /dev/null
+++ b/ilxutils/ilxutils/scicrunch_session.py
@@ -0,0 +1,187 @@
+import json
+from typing import Union, Dict, List, Tuple
+from urllib.parse import urljoin
+
+import requests
+from requests.adapters import HTTPAdapter
+from requests.packages.urllib3.util.retry import Retry
+
+from pyontutils.utils import Async, deferred
+
+
+class SciCrunchSession:
+ """ Boiler plate for SciCrunch server responses. """
+
+ class Error(Exception):
+ """Script could not complete."""
+
+ class NoApiKeyError(Error):
+ """ No api key has been set """
+
+ class IncorrectAPIKeyError(Error):
+ """Incorrect API key for scicrunch website used."""
+
+ def __init__(self,
+ key: str,
+ host: str = 'test3.scicrunch.org', # MAIN TEST -> test3.scicrunch.org
+ auth: tuple = ('', ''), # user, password for authentication
+ retries: int = 3, # retries if code in status_forcelist
+ backoff_factor: float = 1.0, # delay factor for reties
+ status_forcelist: tuple = (500, 502, 504), # flagged codes for retry
+ ) -> None:
+ """ Initialize Session with SciCrunch Server.
+
+ :param str key: API key for SciCrunch [should work for test hosts].
+ :param str host: Base url for hosting server [can take localhost:8080].
+ """
+ self.key = key
+ self.host = ''
+ self.api = ''
+
+ # Pull host for potential url
+ if host.startswith('http'):
+ host = urlparse(host).netloc
+
+ # Use host to create api url
+ if host.startswith('localhost'):
+ self.host = "http://" + host
+ self.api = self.host + '/api/1/'
+ else:
+ self.host = "https://" + host
+ self.api = self.host + '/api/1/'
+
+ # Api key check
+ if self.key is None: # injected by orthauth
+ # Error here because viewing without a key handled in InterLexRemote not here
+ raise self.NoApiKeyError('You have not set an API key for the SciCrunch API!')
+ if not requests.get(self.api+'user/info', params={'key':self.key}).status_code in [200, 201]:
+ raise self.IncorrectAPIKeyError(f'api_key given is incorrect.')
+
+ self.session = requests.Session()
+ self.session.auth = auth
+ self.session.headers.update({'Content-type': 'application/json'})
+ retry = Retry(
+ total=retries,
+ read=retries,
+ connect=retries,
+ backoff_factor=backoff_factor,
+ status_forcelist=status_forcelist, # 400 for no ILX ID generated.
+ )
+ adapter = HTTPAdapter(max_retries=retry)
+ self.session.mount('http://', adapter)
+ self.session.mount('https://', adapter)
+
+ def __session_shortcut(self, endpoint: str, data: dict, session_type: str = 'GET') -> dict:
+ """ Short for both GET and POST.
+
+ Will only crash if success is False or if there a 400+ error.
+ """
+ def _prepare_data(data: dict) -> dict:
+ """ Check if request data inputed has key and proper format. """
+ if data is None:
+ data = {'key': self.key}
+ elif isinstance(data, dict):
+ data.update({'key': self.key})
+ else:
+ raise ValueError('request session data must be of type dictionary')
+ return json.dumps(data)
+
+ # urljoin bug; .com/ap1/1/ + /test/ != .com/ap1/1/test/ but .com/test/
+ # HOWEVER .com/ap1/1/ + test/ == .com/ap1/1/test/
+ endpoint = endpoint[1:] if endpoint.startswith('/') else endpoint
+ url = urljoin(self.api, endpoint)
+ if data:
+ for key, value in data.items():
+ url = url.format(**{key:value})
+ data = _prepare_data(data)
+ # TODO: Could use a Request here to shorten code.
+ if session_type == 'GET':
+ response = self.session.get(url, data=data)
+ else:
+ response = self.session.post(url, data=data)
+ try:
+ # crashes if success on the server side is False
+ if response.json()['success'] == False:
+ # Need to retry if server fails to create the ILX ID.
+ raise ValueError(response.text + f' -> STATUS CODE: {response.status_code} @ URL: {response.url}')
+ response.raise_for_status()
+ # crashes if the server couldn't use it or it never made it.
+ except:
+ raise requests.exceptions.HTTPError(f'{response.text} {response.status_code}')
+
+ # response.json() == {'data':{}, 'success':bool}
+ return response.json()['data']
+
+ def _get(self, endpoint: str, data: dict = None) -> dict:
+ """ Quick GET for SciCrunch. """
+ return self.__session_shortcut(endpoint, data, 'GET')
+
+ def _post(self, endpoint: str , data: dict = None) -> dict:
+ """ Quick POST for SciCrunch. """
+ return self.__session_shortcut(endpoint, data, 'POST')
+
+ def get(self, endpoint, data_list, tag=None) -> List[Tuple[str, dict]]:
+ # worker
+ gin = lambda endpoint, data: (tag, self._get(endpoint, data))
+ # Builds futures dynamically
+ return Async()(deferred(gin)(endpoint, data) for endpoint, data in zip(endpoint, data_list))
+
+ def post(self, endpoint: object, data_list: list) -> List[Tuple[str, dict]]:
+ # worker; return server_response first then initial data input
+ gin = lambda data: (data, self._post(endpoint, data))
+
+ # Builds futures dynamically
+ responses = Async()(deferred(gin)(data) for data in data_list)
+ return responses
+
+ # def post(self, func: object, data_list: list) -> List[Tuple[str, dict]]:
+ # # worker; return server_response first then initial data input
+ # gin = lambda data: (data, func(data))
+ #
+ # # Builds futures dynamically
+ # responses = Async()(deferred(gin)(data) for data in data_list)
+ #
+ # # BUG: ilx_ids are created on the PHP side and are slow. Duplicates
+ # # are known to be created "func hit at same time" so we need to a new
+ # # session and try again.
+ # number_of_batch_retries = 0
+ # while number_of_batch_retries < 10:
+ # data_queue = []
+ # for response in responses:
+ # data, server_response = response
+ # print(server_response)
+ # if server_response.get('errormsg') == 'could not generate ILX identifier':
+ # data_queue.append(data)
+ # if data_queue == []:
+ # break
+ # responses = Async()(deferred(gin)(data) for data in data_queue)
+ # number_of_batch_retries += 1
+ # return
+
+ # def get(self, urls, limit=5):
+ #
+ # async def get_single(url, session, auth):
+ # async with session.get(url) as response:
+ # try:
+ # output = await response.json()
+ # except:
+ # output = await response.text()
+ # ValueError(f'{output} with status code [{response.status}]')
+ # return output
+ #
+ # async def get_all(urls, connector, loop):
+ # tasks = []
+ # async with ClientSession(connector=connector, loop=loop,
+ # auth=self.auth, raise_for_status=True) as session:
+ # for i, url in enumerate(urls):
+ # task = asyncio.ensure_future(get_single(url, session, self.auth))
+ # tasks.append(task)
+ # return (await asyncio.gather(*tasks))
+ #
+ # # rate limiter; should be between 20 and 80; 100 maxed out server
+ # connector = TCPConnector(limit=limit)
+ # loop = asyncio.get_event_loop() # event loop initialize
+ # # tasks to do; data is in json format [{},]
+ # future = asyncio.ensure_future(get_all(urls, connector, loop))
+ # outputs = loop.run_until_complete(future) # loop until done
+ # return {k: v for keyval in outputs for k, v in keyval.items()}
diff --git a/ilxutils/ilxutils/sparql.ipynb b/ilxutils/ilxutils/sparql.ipynb
new file mode 100644
index 00000000..fb11bab8
--- /dev/null
+++ b/ilxutils/ilxutils/sparql.ipynb
@@ -0,0 +1,296 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pyontutils.core import OntResIri\n",
+ "g = OntResIri('https://cassava.ucsd.edu/sparc/exports/curation-export.ttl').graph"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 9,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "q2=\"\"\"\n",
+ "SELECT ?subj ?pred ?obj\n",
+ "WHERE {\n",
+ " TEMP:hasDerivedInformationAsParticipant ?obj .\n",
+ "?subj TEMP:hasDerivedInformationAsParticipant ?obj .\n",
+ "}\n",
+ "\"\"\"\n",
+ "\n",
+ "templates = SparqlQueryTemplates(g.namespace_manager)\n",
+ "q = templates.dataset_group(\n",
+ " subject='https://api.blackfynn.io/datasets/N:dataset:bc4071fd-aba1-4fe5-a59e-3da5affbc5fb/subjects/10653',\n",
+ ")\n",
+ "# print(q)\n",
+ "ts = []\n",
+ "sp = g.query(q2)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 211,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "True"
+ ]
+ },
+ "execution_count": 211,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from typing import Union, Dict, List, Tuple\n",
+ "import unittest\n",
+ "\n",
+ "import rdflib\n",
+ "from rdflib.plugins import sparql\n",
+ "import pytest\n",
+ "\n",
+ "from pyontutils.core import OntResIri\n",
+ "\n",
+ "Semantic = Union[rdflib.URIRef, rdflib.Literal, rdflib.BNode]\n",
+ "\n",
+ "\n",
+ "class TestCurationExportTtl:\n",
+ "\n",
+ " def __init__(self):\n",
+ " self.ori = OntResIri('https://cassava.ucsd.edu/sparc/exports/curation-export.ttl')\n",
+ " self.graph = self.ori.graph\n",
+ " self.spaql_templates = SparqlQueryTemplates(self.graph)\n",
+ "\n",
+ " def test_dataset_group(self):\n",
+ " \"\"\" sparql queries here \"\"\"\n",
+ " subj = rdflib.URIRef('https://api.blackfynn.io/datasets/N:dataset:c2a014b8-2c15-4269-b10a-3345420e3d56/subjects/53')\n",
+ " query = self.spaql_templates.dataset_group()\n",
+ " assert len(list(self.graph.query(query, initBindings={'target': subj}))) > 0\n",
+ "\n",
+ " def test_related_datasets(self):\n",
+ " subj = rdflib.util.from_n3('dataset:bec4d335-9377-4863-9017-ecd01170f354', nsm=self.graph)\n",
+ " query = self.spaql_templates.related_datasets()\n",
+ " assert len(list(self.graph.query(query, initBindings={'target': subj}))) > 0\n",
+ "\n",
+ "\n",
+ "class SparqlQueryTemplates:\n",
+ " \"\"\" Creates SPARQL query templates. \"\"\"\n",
+ "\n",
+ " def __init__(self, nsm=None):\n",
+ " self.nsm = nsm if nsm else rdflib.Graph().namespace_manager\n",
+ " self.prefixes = {p:ns for p, ns in self.nsm.namespaces() if p}\n",
+ "\n",
+ " def sparql_iri(self, iri: Union[rdflib.URIRef, str]) -> str:\n",
+ " \"\"\" Converts IRIs and curies to a usable format for SPARQL queries. \"\"\"\n",
+ " if iri.startswith('http') or isinstance(iri, rdflib.URIRef):\n",
+ " return '<'+str(iri)+'>'\n",
+ " return iri\n",
+ "\n",
+ " def dataset_group(self) -> str:\n",
+ " \"\"\" Get all subject groups and dataset associated with subject input.\n",
+ "\n",
+ " :returns: list of tuples containing: subject, subjects group, and subjects dataset.\n",
+ " \"\"\"\n",
+ " query = \"\"\"\n",
+ " SELECT ?subj ?group ?dataset\n",
+ " WHERE {\n",
+ " ?target TEMP:hasAssignedGroup ?group .\n",
+ " ?subj TEMP:hasAssignedGroup ?group .\n",
+ " ?subj TEMP:hasDerivedInformationAsParticipant ?dataset .\n",
+ " }\n",
+ " \"\"\"\n",
+ " return sparql.prepareQuery(query, initNs=self.prefixes)\n",
+ "\n",
+ " def related_datasets(self) -> str:\n",
+ " \"\"\" Get all related datasets of subject.\n",
+ "\n",
+ " :returns: list of tuples containing: subject & subjects shared dataset.\n",
+ " \"\"\"\n",
+ " query = \"\"\"\n",
+ " SELECT ?subj ?dataset\n",
+ " WHERE {\n",
+ " ?target TEMP:collectionTitle ?dataset .\n",
+ " ?subj TEMP:collectionTitle ?dataset .\n",
+ " }\n",
+ " \"\"\"\n",
+ " return sparql.prepareQuery(query, initNs=self.prefixes)\n",
+ " \n",
+ "TestCurationExportTtl().test_dataset_group()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 205,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 205,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "t.test_dataset_group()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 40,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/plain": [
+ "rdflib.term.URIRef('http://www.w3.org/2000/01/rdf-schema#label')"
+ ]
+ },
+ "execution_count": 40,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "from rdflib import RDFS\n",
+ "from_n3('')"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "AttributeError",
+ "evalue": "module 'rdflib.plugins' has no attribute 'sparql'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mrdflib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplugins\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msparql\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprepareQuery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mquery\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0minitN\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mAttributeError\u001b[0m: module 'rdflib.plugins' has no attribute 'sparql'"
+ ]
+ }
+ ],
+ "source": [
+ "rdflib.plugins.sparql.prepareQuery(query, initN)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from rdflib.plugins import sparql"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "rdflib==5.0.0.dev0\n"
+ ]
+ }
+ ],
+ "source": [
+ "!pip3 freeze | grep rdflib"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "SyntaxError",
+ "evalue": "invalid syntax (, line 1)",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;36m File \u001b[0;32m\"\"\u001b[0;36m, line \u001b[0;32m1\u001b[0m\n\u001b[0;31m from rdflib.plugins\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
+ ]
+ }
+ ],
+ "source": [
+ "from rdflib.plugins.sp"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "ename": "AttributeError",
+ "evalue": "module 'rdflib' has no attribute 'plugins'",
+ "output_type": "error",
+ "traceback": [
+ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+ "\u001b[0;31mAttributeError\u001b[0m Traceback (most recent call last)",
+ "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[0;32mimport\u001b[0m \u001b[0mrdflib\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0;32m----> 2\u001b[0;31m \u001b[0mrdflib\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mplugins\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0msparql\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mprepareQuery\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+ "\u001b[0;31mAttributeError\u001b[0m: module 'rdflib' has no attribute 'plugins'"
+ ]
+ }
+ ],
+ "source": [
+ "import rdflib\n",
+ "rdflib.plugins.sparql.prepareQuery"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 10,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from pyontutils.core import OntResIri\n",
+ "from rdflib.plugins.sparql import prepareQuery"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.7"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 4
+}
diff --git a/ilxutils/ilxutils/sql.py b/ilxutils/ilxutils/sql.py
new file mode 100644
index 00000000..a895a4db
--- /dev/null
+++ b/ilxutils/ilxutils/sql.py
@@ -0,0 +1,12 @@
+from .interlex_sql import IlxSql
+import os
+
+def production_sql(from_backup=True):
+ return IlxSql(db_url=os.environ.get('SCICRUNCH_DB_URL_PRODUCTION'), from_backup=from_backup)
+
+def test_sql(db_url=None, schema=None, from_backup=True):
+ db_url = os.environ.get('SCICRUNCH_DB_URL_TEST') if not db_url else db_url
+ # Default test schema is nif_test3
+ if schema:
+ db_url = '/'.join(db_url.split('/')[:-1] + [schema])
+ return IlxSql(db_url=db_url, from_backup=from_backup)
diff --git a/ilxutils/tools.py b/ilxutils/ilxutils/tools.py
similarity index 100%
rename from ilxutils/tools.py
rename to ilxutils/ilxutils/tools.py
diff --git a/ilxutils/setup.py b/ilxutils/setup.py
index 623b1c01..5335342a 100755
--- a/ilxutils/setup.py
+++ b/ilxutils/setup.py
@@ -23,13 +23,16 @@
'progressbar2',
'aiohttp',
'asyncio',
+ 'mysql-connector-python',
+ 'elasticsearch',
'sqlalchemy',
'pathlib',
+ 'networkx',
],
# TODO: add a get functionality thats more specific query
- # entry_points={
- # 'console_scripts': [
- # 'ilxutils = ilxutils.cli: main',
- # ],
- # },
+ entry_points={
+ 'console_scripts': [
+ 'backup_ilx = ilxutils.backup_ilx:main',
+ ],
+ },
)
diff --git a/ilxutils/tutorials/interlex_remotes_tutorial.ipynb b/ilxutils/tutorials/interlex_remotes_tutorial.ipynb
deleted file mode 100644
index f596fda2..00000000
--- a/ilxutils/tutorials/interlex_remotes_tutorial.ipynb
+++ /dev/null
@@ -1,742 +0,0 @@
-{
- "cells": [
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# INSTALL\n",
- "- WARNING ::: ONLY DO ONCE \n",
- " - update devconfig in ~/.config/pyontutils/devonfig.yaml\n",
- " - scigraph_api: http://scigraph.scicrunch.io:9000/scigraph\n",
- " - Install both pyontutils and ilxutils with pyontutils\n",
- " - cd ~/git/pyontutils\n",
- " - pip3 install --user --editable .\n",
- " - cd ~/git/pyontutils/ilxutils/\n",
- " - pip3 install --user --editable .\n",
- " - Clone ontquery and install\n",
- " - cd ~/git\n",
- " - git clone https://github.com/tgbugs/ontquery.git\n",
- " - cd ~/git/ontquery\n",
- " - pip3 install --user --editable ."
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Maintainance\n",
- "- update repos\n",
- " - cd ~/git/pyontutils\n",
- " - git pull \n",
- " - cd ~/git/ontquery\n",
- " - git pull"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# Google Sheets Import\n",
- "### Need pyontutils secrets.yaml setup first!"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "\n",
- "\n",
- "
\n",
- " \n",
- " \n",
- " | \n",
- " term | \n",
- " curie | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 1 | \n",
- " abdominal cavity | \n",
- " UBERON:0003684 | \n",
- "
\n",
- " \n",
- " 2 | \n",
- " abdominal wall | \n",
- " UBERON:0003697 | \n",
- "
\n",
- " \n",
- " 3 | \n",
- " adipose tissue | \n",
- " UBERON:0001013 | \n",
- "
\n",
- " \n",
- " 4 | \n",
- " adult organism | \n",
- " UBERON:0007023 | \n",
- "
\n",
- " \n",
- " 5 | \n",
- " alimentary part of gastrointestinal system | \n",
- " UBERON:0005409 | \n",
- "
\n",
- " \n",
- "
\n",
- "
"
- ],
- "text/plain": [
- "0 term curie\n",
- "1 abdominal cavity UBERON:0003684\n",
- "2 abdominal wall UBERON:0003697\n",
- "3 adipose tissue UBERON:0001013\n",
- "4 adult organism UBERON:0007023\n",
- "5 alimentary part of gastrointestinal system UBERON:0005409"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "from pyontutils.sheets import Sheet\n",
- "import pandas as pd\n",
- "KEY_NAME = 'sparc-terms'\n",
- "SHEET_NAME = 'Minimal information model(MIS)'\n",
- "\n",
- "class Brainstem(Sheet):\n",
- " name = KEY_NAME # key name you gave the google sheet id value in secrets.yaml\n",
- " sheet_name = SHEET_NAME # the actual sheet name on the google sheet\n",
- " fetch_grid = True # meta data in self.grid that has detials like bolding\n",
- "\n",
- "brainstem = Brainstem()\n",
- "df = pd.DataFrame(brainstem.raw_values)\n",
- "df.columns = df.iloc[0]\n",
- "df.drop(df.index[0], inplace=True)\n",
- "df.head()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "['abdominal cavity',\n",
- " 'abdominal wall',\n",
- " 'adipose tissue',\n",
- " 'adult organism',\n",
- " 'alimentary part of gastrointestinal system',\n",
- " 'arterial blood',\n",
- " 'biceps femoris',\n",
- " 'blood',\n",
- " 'bolus of food',\n",
- " 'brainstem']"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "list(df.term)[:10]"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# CSV or TSV EXAMPLE"
- ]
- },
- {
- "cell_type": "raw",
- "metadata": {},
- "source": [
- "import pandas as pd\n",
- "\n",
- "csv_df = pd.DataFrame('/path/to/csv')\n",
- "tsv_df = pd.DataFrame('/path/to/tsv', delimiter='\\t')\n",
- "\n",
- "csv_df.head() # returns top 5 rows\n",
- "csv_df.column_name # specific column name will return a Series which will act like a list"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# QUERY DATABASES "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 3,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[OntTerm('HBA:3999', label='brain (hba)'),\n",
- " OntTerm('FMA:50801', label='Brain'),\n",
- " OntTerm('UBERON:0000955', label='brain'),\n",
- " OntTerm('UBERON:6110636', label='adult cerebral ganglion'),\n",
- " OntTerm('ILX:0101431', label='Brain'),\n",
- " OntTerm('ILX:0101433', label='Brain Infarction'),\n",
- " OntTerm('ILX:0506386', label='Brain Aneurysm'),\n",
- " OntTerm('ILX:0433050', label='Brain Chemistry'),\n",
- " OntTerm('ILX:0641746', label='alpha BRAIN'),\n",
- " OntTerm('ILX:0726394', label='brain meninx'),\n",
- " OntTerm('ILX:0729002', label='brain commissure'),\n",
- " OntTerm('ILX:0101434', label='Brain Ischemia'),\n",
- " OntTerm('ILX:0461406', label='Brain Death'),\n",
- " OntTerm('ILX:0733041', label='brain endothelium')]"
- ]
- },
- "execution_count": 3,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Give \"query\" a usable parameter to query the databases \n",
- "from pyontutils.core import query # OntTerm\n",
- "query(term='brain')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 4,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[OntTerm('ILX:0103358', label='DN1 neuron'),\n",
- " OntTerm('ILX:0109525', label='Pupal DN1 period neuron')]"
- ]
- },
- "execution_count": 4,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# similar entities will show\n",
- "# default limit is 10\n",
- "query(term='DN1 neuron', limit=2) "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 5,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "[OntTerm('UBERON:0000955', label='brain')]"
- ]
- },
- "execution_count": 5,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "# Faster and more accurate with curie/iri\n",
- "query(curie='UBERON:0000955')"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 6,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{'prefix': 'UBERON',\n",
- " 'suffix': '0000955',\n",
- " 'orig_kwargs': {'iri': 'http://purl.obolibrary.org/obo/UBERON_0000955',\n",
- " 'curie_or_iri': None,\n",
- " 'label': None,\n",
- " 'term': None,\n",
- " 'search': None,\n",
- " 'validated': None,\n",
- " 'query': None},\n",
- " 'kwargs': {'iri': 'http://purl.obolibrary.org/obo/UBERON_0000955',\n",
- " 'curie_or_iri': None,\n",
- " 'label': None,\n",
- " 'term': None,\n",
- " 'search': None,\n",
- " 'validated': None,\n",
- " 'query': None},\n",
- " 'label': 'brain',\n",
- " 'labels': ['brain'],\n",
- " 'definition': 'The brain is the center of the nervous system in all vertebrate, and most invertebrate, animals. Some primitive animals such as jellyfish and starfish have a decentralized nervous system without a brain, while sponges lack any nervous system at all. In vertebrates, the brain is located in the head, protected by the skull and close to the primary sensory apparatus of vision, hearing, balance, taste, and smell[WP].',\n",
- " 'synonyms': ['the brain',\n",
- " 'synganglion',\n",
- " 'suprasegmental structures',\n",
- " 'suprasegmental levels of nervous system',\n",
- " 'encephalon'],\n",
- " 'deprecated': False,\n",
- " 'predicates': {},\n",
- " '_type': OntId('owl:Class'),\n",
- " '_types': (OntId('owl:Class'),),\n",
- " '_graph': None,\n",
- " '_source': ,\n",
- " 'validated': True,\n",
- " '_query_result': QueryResult({'iri': 'http://purl.obolibrary.org/obo/UBERON_0000955', 'curie': 'UBERON:0000955', 'label': 'brain', 'labels': ['brain'], 'definition': 'The brain is the center of the nervous system in all vertebrate, and most invertebrate, animals. Some primitive animals such as jellyfish and starfish have a decentralized nervous system without a brain, while sponges lack any nervous system at all. In vertebrates, the brain is located in the head, protected by the skull and close to the primary sensory apparatus of vision, hearing, balance, taste, and smell[WP].', 'synonyms': ['the brain', 'synganglion', 'suprasegmental structures', 'suprasegmental levels of nervous system', 'encephalon'], 'deprecated': False, 'predicates': {}, 'type': OntId('owl:Class'), 'types': (OntId('owl:Class'),), '_graph': None, 'source': })}"
- ]
- },
- "execution_count": 6,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "entity = query(curie='UBERON:0000955')[0]\n",
- "# Full result attribute\n",
- "vars(entity)"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# DEBUGGING HINT\n",
- "- 1 \"?\" at the end of a function or class will return its params, docstring, and pathing. \n",
- "- 2 \"??\" returns the ENTIRE class/functions "
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 7,
- "metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "\u001b[0;31mSignature:\u001b[0m \u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m*\u001b[0m\u001b[0margs\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m**\u001b[0m\u001b[0mkwargs\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mCall signature:\u001b[0m\n",
- "\u001b[0mquery\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mterm\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mprefix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mcategory\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mlabel\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mabbrev\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0msearch\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0msuffix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mcurie\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0miri\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mNone\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mpredicates\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mexclude_prefix\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mdepth\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m1\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mdirection\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;34m'OUTGOING'\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0mlimit\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;36m10\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m \u001b[0minclude_deprecated\u001b[0m\u001b[0;34m=\u001b[0m\u001b[0;32mFalse\u001b[0m\u001b[0;34m,\u001b[0m\u001b[0;34m\u001b[0m\n",
- "\u001b[0;34m\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n",
- "\u001b[0;31mType:\u001b[0m OntQueryCli\n",
- "\u001b[0;31mString form:\u001b[0m \n",
- "\u001b[0;31mFile:\u001b[0m ~/Dropbox/git/ontquery/ontquery/query.py\n",
- "\u001b[0;31mDocstring:\u001b[0m \n"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
- "source": [
- "query?"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "# BONUS!"
- ]
- },
- {
- "cell_type": "markdown",
- "metadata": {},
- "source": [
- "### Concurrently search! (Run multiple query functions at the same time)"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 8,
- "metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "Futures compiled\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "[({'curie': 'UBERON:0000955'}, [OntTerm('UBERON:0000955', label='brain')]),\n",
- " ({'curie': 'UBERON:6110636'},\n",
- " [OntTerm('UBERON:6110636', label='adult cerebral ganglion')])]"
- ]
- },
- "execution_count": 8,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
- "source": [
- "from pyontutils.utils import Async, deferred\n",
- "from pyontutils.core import OntTerm, ixr, query\n",
- "from typing import List, Tuple\n",
- "\n",
- "# query.setup()\n",
- "\n",
- "def queries(kwargs_list:List[dict]) -> List[Tuple[str, dict]]:\n",
- " '''Asynchronously query databases to dramatically increase runtime un users end \n",
- " \n",
- " Examples:\n",
- " >>> queries([{'term':'Brain'},])\n",
- " [({'term': 'Brain'},\n",
- " [OntTerm('HBA:3999', label='brain (hba)'),\n",
- " OntTerm('FMA:50801', label='Brain'),\n",
- " OntTerm('UBERON:0000955', label='brain'),\n",
- " OntTerm('UBERON:6110636', label='adult cerebral ganglion')])]\n",
- " >>> queries([{'curie':'UBERON:0000955'},])\n",
- " [({'curie': 'UBERON:0000955'}, [OntTerm('UBERON:0000955', label='brain')])]\n",
- " \n",
- " Definitions:\n",
- " kwargs == common name given to dictionary input for function\n",
- " tuple == a list that you cannot update. \n",
- " lambda == short-hand for single line function creation (func = lambda :