Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Sql Upgrade #94

Open
wants to merge 38 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
38 commits
Select commit Hold shift + click to select a range
0bb8f9f
removing case conflict
tmsincomb Oct 31, 2019
125b51c
new elastic search wrapper
tmsincomb Nov 5, 2019
80e4753
Merge remote-tracking branch 'upstream/master'
tmsincomb Nov 5, 2019
ff55c49
Merge remote-tracking branch 'upstream/master'
tmsincomb Nov 7, 2019
c7c71df
shortcut to sql and update print on ontopandas
tmsincomb Dec 13, 2019
1825020
Merge remote-tracking branch 'upstream/master'
tmsincomb Dec 13, 2019
72901e4
Merge remote-tracking branch 'upstream/master'
Jan 7, 2020
4559c51
this was lost
Jan 7, 2020
a27a253
update ilx references
Jan 7, 2020
3c72023
updates
tmsincomb Feb 25, 2020
33c738b
Merge remote-tracking branch 'upstream/master'
tmsincomb Feb 25, 2020
0a56d4c
Merge remote-tracking branch 'upstream/master'
tmsincomb Feb 28, 2020
dbb9293
Merge remote-tracking branch 'upstream/master'
tmsincomb Feb 28, 2020
b7627b7
Merge remote-tracking branch 'upstream/master'
tmsincomb Feb 28, 2020
d7d935a
Merge remote-tracking branch 'upstream/master'
tmsincomb Feb 28, 2020
5d4f786
Merge remote-tracking branch 'upstream/master'
tmsincomb Mar 9, 2020
7bac845
Merge remote-tracking branch 'upstream/master'
tmsincomb Mar 24, 2020
53d71c4
updated tutorials
tmsincomb Apr 10, 2020
3e5e5c9
fixed bugs
tmsincomb Apr 10, 2020
8f8c722
Merge remote-tracking branch 'upstream/master'
tmsincomb Apr 10, 2020
ede39d7
Merge remote-tracking branch 'upstream/master'
tmsincomb Apr 29, 2020
665ea4c
Merge remote-tracking branch 'upstream/master'
tmsincomb May 4, 2020
9091b0b
Merge remote-tracking branch 'upstream/master'
tmsincomb May 8, 2020
e75362f
bug fixes
tmsincomb May 21, 2020
4a866b9
Merge remote-tracking branch 'upstream/master'
tmsincomb May 21, 2020
ebf0a3c
Merge remote-tracking branch 'upstream/master'
tmsincomb Jul 14, 2020
c0c1692
gradual updates
tmsincomb Oct 1, 2020
c81ef18
Merge remote-tracking branch 'upstream/master'
tmsincomb Oct 1, 2020
8b87424
moved tutorial to docs
tmsincomb Oct 21, 2020
a6c9c77
added proper test port
tmsincomb Dec 10, 2020
21de3e6
pax-spine reference files
tmsincomb Jan 3, 2021
abf6546
ilxutils quality of life updates
tmsincomb May 26, 2021
ad0f391
Merge remote-tracking branch 'upstream/master'
tmsincomb May 26, 2021
0eb349e
sql update
tmsincomb Aug 18, 2021
83da68f
Merge remote-tracking branch 'upstream/master'
tmsincomb Aug 18, 2021
dad24e7
-
tmsincomb Aug 19, 2021
6cdcad3
Merge remote-tracking branch 'upstream/master'
tmsincomb Aug 26, 2021
4444ec1
sql update
tmsincomb Mar 6, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view

Large diffs are not rendered by default.

16 changes: 7 additions & 9 deletions ilxutils/ilx-playground.ipynb
Original file line number Diff line number Diff line change
@@ -1,12 +1,5 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": 33,
Expand Down Expand Up @@ -681,7 +674,12 @@
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"metadata": {
"collapsed": true,
"jupyter": {
"outputs_hidden": true
}
},
"outputs": [
{
"name": "stderr",
Expand Down Expand Up @@ -6477,7 +6475,7 @@
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.7.3"
"version": "3.7.5"
}
},
"nbformat": 4,
Expand Down
71 changes: 44 additions & 27 deletions ilxutils/ilxutils/backup_ilx.py
Original file line number Diff line number Diff line change
@@ -1,42 +1,59 @@
from pathlib import Path as p
from ilxutils.interlex_sql import IlxSql
from ilxutils.tools import create_pickle
from .interlex_sql import IlxSql

# from tools import create_pickle
import pickle
import os


sql = IlxSql(db_url=os.environ.get('SCICRUNCH_DB_URL_PRODUCTION'))
HOME = p.home() / "DropboxPersonal/.interlex_backups"
if HOME.exists is False:
HOME.mkdir()


def create_pickle(data, outfilename):
with open(outfilename, "wb") as outfile:
pickle.dump(data, outfile)


terms = sql.get_terms()
create_pickle(terms, p.home() / 'Dropbox/interlex_backups/ilx_db_terms_backup.pickle')
print('=== terms backup complete ===')
del terms
def main():
sql = IlxSql(db_url=os.environ.get("SCICRUNCH_DB_URL_PRODUCTION"))

users = sql.get_users()
create_pickle(users, HOME / "ilx_db_users_backup.pickle")
print("=== Users backup complete ===")
del users

annos = sql.get_annotations()
create_pickle(annos, p.home() / 'Dropbox/interlex_backups/ilx_db_annos_backup.pickle')
print('=== annotations backup complete ===')
del annos
terms = sql.get_terms()
create_pickle(terms, HOME / "ilx_db_terms_backup.pickle")
print("=== terms backup complete ===")
del terms

annos = sql.get_annotations()
create_pickle(annos, HOME / "ilx_db_annos_backup.pickle")
print("=== annotations backup complete ===")
del annos

ex = sql.get_existing_ids()
create_pickle(ex, p.home() / 'Dropbox/interlex_backups/ilx_db_ex_backup.pickle')
print('=== existing ids backup complete ===')
del ex
ex = sql.get_existing_ids()
create_pickle(ex, HOME / "ilx_db_ex_backup.pickle")
print("=== existing ids backup complete ===")
del ex

synonyms = sql.get_synonyms()
create_pickle(synonyms, HOME / "ilx_db_synonyms_backup.pickle")
print("=== synonyms backup complete ===")
del synonyms

synonyms = sql.get_synonyms()
create_pickle(synonyms, p.home() / 'Dropbox/interlex_backups/ilx_db_synonyms_backup.pickle')
print('=== synonyms backup complete ===')
del synonyms
superclasses = sql.get_superclasses()
create_pickle(superclasses, HOME / "ilx_db_superclasses_backup.pickle")
print("=== superclasses backup complete ===")
del superclasses

relationships = sql.get_relationships()
create_pickle(relationships, HOME / "ilx_db_relationships_backup.pickle")
print("=== relationships backup complete ===")
del relationships

superclasses = sql.get_superclasses()
create_pickle(superclasses, p.home() / 'Dropbox/interlex_backups/ilx_db_superclasses_backup.pickle')
print('=== superclasses backup complete ===')
del superclasses

relationships = sql.get_relationships()
create_pickle(relationships, p.home() / 'Dropbox/interlex_backups/ilx_db_relationships_backup.pickle')
print('=== relationships backup complete ===')
del relationships
if __name__ == "__main__":
main()
119 changes: 119 additions & 0 deletions ilxutils/ilxutils/elasticsearch_wrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
from functools import wraps
import json
import os
import subprocess
import docopt
from elasticsearch import Elasticsearch
BASHRC = lambda s: os.environ.get(s)


class ElasticSearchTools:
""" Shortcuts for common elasticsearch querys. """

def __init__(self,
host: str, index: str, type: str,
user: str, password: str,
size: int = 10, start: int = 0,
scheme: str = 'https',) -> None:
"""
:param str url: ElasticSearch url endpoint.
:param str index:
"""
self.url = f'{scheme}://{host}/{index}'
self.host, self.index, self.type = host, index, type
self.es = Elasticsearch(self.url, http_auth=(user, password))

def search(self, body: dict, **kwargs) -> dict:
""" Elasticsearch '/_search' feature.

We use a framented index called a type. The type is the last index
while the real index becomes part of the host url.

:param dict body: query dict.
:return: nested elasticsearch dict where hits are in ['hits']['hits']

>>>__search(body={ 'query': { 'match_all': {} } })
"""
return self.es.search(index=self.type, body=body, **kwargs)

def scroll(self, body: dict, size: int, **kwargs) -> dict:
body['size'] = 10000
body['from'] = 0
hits = []
print(body)
for step in range(0, size, 10000):
hits += self.es.search(index=self.type, body=body, **kwargs)['hits']['hits']
body['from'] = step
print(body)
return hits

def all_matches(self, sorting: str, size, start) -> dict:
"""First or last set of entities.

:param str sorting: asc for head or desc for tail.
:param int size: number of entities you want from head or tails.
:param int start: position of index you want to start from.
:return: elasticsearch _search dict
"""
if sorting.lower().strip() not in ['asc', 'desc']:
raise ValueError('sorting can only be asc or desc.')
body = {
'query': { 'match_all': {} },
'sort': [ { '_id': sorting } ],
'size': size,
'from': start,
}
return self.search(body)

def head(self, size=10, start=0):
""" See __end doc. """
return self.all_matches(sorting='asc', size=size, start=start)

def tail(self, size=10, start=0):
""" See __end doc. """
return self.all_matches(sorting='desc', size=size, start=start)


class InterLexES(ElasticSearchTools):

def __init__(self, beta=True):
super().__init__(
host = BASHRC('SCICRUNCH_ELASTIC_URL'),
# index = 'Interlex_old',
index = 'interlex',
type = 'term',
user = BASHRC('INTERLEX_ELASTIC_USER'),
password = BASHRC('INTERLEX_ELASTIC_PASSWORD'),
)
self.beta = beta

def filter_tmp(self):
prefix = 'tmp_' if self.beta else 'ilx_'
return { 'prefix': { 'ilx' : { 'value': prefix } } }

def all_matches(self, sorting: str, size, start) -> dict:
"""First or last set of entities.

:param str sorting: asc for head or desc for tail.
:param int size: number of entities you want from head or tails.
:param int start: position of index you want to start from.
:return: elasticsearch _search dict
"""
if sorting.lower().strip() not in ['asc', 'desc']:
raise ValueError('sorting can only be asc or desc.')
body = {
'query': self.filter_tmp(),
'sort': [ { '_id': sorting } ],
'size': size,
'from': start,
}
return self.search(body)


def main():
ilxes = InterLexES(beta=False)
print(ilxes.tail(1))


if __name__ == '__main__':
main()
4 changes: 4 additions & 0 deletions ilxutils/ilxutils/interlex_sanity_checks.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,4 @@
from .sql import production_sql

ilx_sql = production_sql(from_backup=True)
ex = ilx_sql.get_existing_ids()
Loading