Skip to content

Commit

Permalink
Update OBO Prefix handling and generate OBO Context JSON-LD (#51)
Browse files Browse the repository at this point in the history
* Add code for generating OBO context JSON LD

Closes #50

Still to do: fix the capitalization

* Update interface for generating OBO Foundry format URLs

* Update prefix_maps.py

* Add tentative update for getting OBO Foundry prefixes

* Skip BILA - it's on a dead server

* Update bioregistry.json

* Update __init__.py

* Rename download

* Update CLI automation

* Fix inclusion of preferred prefix in obofoundry and make output

* Update bioregistry.json
  • Loading branch information
cthoyt authored May 25, 2021
1 parent cb4f918 commit 4ff7610
Show file tree
Hide file tree
Showing 10 changed files with 378 additions and 40 deletions.
216 changes: 216 additions & 0 deletions docs/_data/contexts/obo_context.jsonld
Original file line number Diff line number Diff line change
@@ -0,0 +1,216 @@
{
"@context": {
"AAO": "http://purl.obolibrary.org/obo/AAO_",
"ADW": "http://purl.obolibrary.org/obo/ADW_",
"AEO": "http://purl.obolibrary.org/obo/AEO_",
"AGRO": "http://purl.obolibrary.org/obo/AGRO_",
"AISM": "http://purl.obolibrary.org/obo/AISM_",
"AMPHX": "http://purl.obolibrary.org/obo/AMPHX_",
"APO": "http://purl.obolibrary.org/obo/APO_",
"APOLLO_SV": "http://purl.obolibrary.org/obo/APOLLO_SV_",
"ARO": "http://purl.obolibrary.org/obo/ARO_",
"BCO": "http://purl.obolibrary.org/obo/BCO_",
"BFO": "http://purl.obolibrary.org/obo/BFO_",
"BILA": "http://purl.obolibrary.org/obo/BILA_",
"BSPO": "http://purl.obolibrary.org/obo/BSPO_",
"BTO": "http://purl.obolibrary.org/obo/BTO_",
"CARO": "http://purl.obolibrary.org/obo/CARO_",
"CDAO": "http://purl.obolibrary.org/obo/CDAO_",
"CDNO": "http://purl.obolibrary.org/obo/CDNO_",
"CEPH": "http://purl.obolibrary.org/obo/CEPH_",
"CHEBI": "http://purl.obolibrary.org/obo/CHEBI_",
"CHEMINF": "http://purl.obolibrary.org/obo/CHEMINF_",
"CHIRO": "http://purl.obolibrary.org/obo/CHIRO_",
"CHMO": "http://purl.obolibrary.org/obo/CHMO_",
"CIDO": "http://purl.obolibrary.org/obo/CIDO_",
"CIO": "http://purl.obolibrary.org/obo/CIO_",
"CL": "http://purl.obolibrary.org/obo/CL_",
"CLAO": "http://purl.obolibrary.org/obo/CLAO_",
"CLO": "http://purl.obolibrary.org/obo/CLO_",
"CLYH": "http://purl.obolibrary.org/obo/CLYH_",
"CMF": "http://purl.obolibrary.org/obo/CMF_",
"CMO": "http://purl.obolibrary.org/obo/CMO_",
"COB": "http://purl.obolibrary.org/obo/COB_",
"CRO": "http://purl.obolibrary.org/obo/CRO_",
"CTENO": "http://purl.obolibrary.org/obo/CTENO_",
"CTO": "http://purl.obolibrary.org/obo/CTO_",
"CVDO": "http://purl.obolibrary.org/obo/CVDO_",
"DDANAT": "http://purl.obolibrary.org/obo/DDANAT_",
"DDPHENO": "http://purl.obolibrary.org/obo/DDPHENO_",
"DIDEO": "http://purl.obolibrary.org/obo/DIDEO_",
"DOID": "http://purl.obolibrary.org/obo/DOID_",
"DRON": "http://purl.obolibrary.org/obo/DRON_",
"DUO": "http://purl.obolibrary.org/obo/DUO_",
"ECAO": "http://purl.obolibrary.org/obo/ECAO_",
"ECO": "http://purl.obolibrary.org/obo/ECO_",
"ECOCORE": "http://purl.obolibrary.org/obo/ECOCORE_",
"ECTO": "http://purl.obolibrary.org/obo/ECTO_",
"EFO": "http://purl.obolibrary.org/obo/EFO_",
"EHDA": "http://purl.obolibrary.org/obo/EHDA_",
"EHDAA": "http://purl.obolibrary.org/obo/EHDAA_",
"EHDAA2": "http://purl.obolibrary.org/obo/EHDAA2_",
"EMAPA": "http://purl.obolibrary.org/obo/EMAPA_",
"ENVO": "http://purl.obolibrary.org/obo/ENVO_",
"ERO": "http://purl.obolibrary.org/obo/ERO_",
"EUPATH": "http://purl.obolibrary.org/obo/EUPATH_",
"EV": "http://purl.obolibrary.org/obo/EV_",
"ExO": "http://purl.obolibrary.org/obo/ExO_",
"FAO": "http://purl.obolibrary.org/obo/FAO_",
"FBbi": "http://purl.obolibrary.org/obo/FBbi_",
"FBbt": "http://purl.obolibrary.org/obo/FBbt_",
"FBcv": "http://purl.obolibrary.org/obo/FBcv_",
"FBdv": "http://purl.obolibrary.org/obo/FBdv_",
"FIDEO": "http://purl.obolibrary.org/obo/FIDEO_",
"FIX": "http://purl.obolibrary.org/obo/FIX_",
"FLOPO": "http://purl.obolibrary.org/obo/FLOPO_",
"FMA": "http://purl.obolibrary.org/obo/FMA_",
"FOBI": "http://purl.obolibrary.org/obo/FOBI_",
"FOODON": "http://purl.obolibrary.org/obo/FOODON_",
"FOVT": "http://purl.obolibrary.org/obo/FOVT_",
"FYPO": "http://purl.obolibrary.org/obo/FYPO_",
"GAZ": "http://purl.obolibrary.org/obo/GAZ_",
"GECKO": "http://purl.obolibrary.org/obo/GECKO_",
"GENEPIO": "http://purl.obolibrary.org/obo/GENEPIO_",
"GENO": "http://purl.obolibrary.org/obo/GENO_",
"GEO": "http://purl.obolibrary.org/obo/GEO_",
"GNO": "http://purl.obolibrary.org/obo/GNO_",
"GO": "http://purl.obolibrary.org/obo/GO_",
"GSSO": "http://purl.obolibrary.org/obo/GSSO_",
"HANCESTRO": "http://purl.obolibrary.org/obo/HANCESTRO_",
"HAO": "http://purl.obolibrary.org/obo/HAO_",
"HOM": "http://purl.obolibrary.org/obo/HOM_",
"HP": "http://purl.obolibrary.org/obo/HP_",
"HSO": "http://purl.obolibrary.org/obo/HSO_",
"HTN": "http://purl.obolibrary.org/obo/HTN_",
"HsapDv": "http://purl.obolibrary.org/obo/HsapDv_",
"IAO": "http://purl.obolibrary.org/obo/IAO_",
"ICEO": "http://purl.obolibrary.org/obo/ICEO_",
"ICO": "http://purl.obolibrary.org/obo/ICO_",
"IDO": "http://purl.obolibrary.org/obo/IDO_",
"IDOMAL": "http://purl.obolibrary.org/obo/IDOMAL_",
"INO": "http://purl.obolibrary.org/obo/INO_",
"KISAO": "http://purl.obolibrary.org/obo/KISAO_",
"LABO": "http://purl.obolibrary.org/obo/LABO_",
"MA": "http://purl.obolibrary.org/obo/MA_",
"MAMO": "http://purl.obolibrary.org/obo/MAMO_",
"MAT": "http://purl.obolibrary.org/obo/MAT_",
"MAXO": "http://purl.obolibrary.org/obo/MAXO_",
"MCO": "http://purl.obolibrary.org/obo/MCO_",
"MF": "http://purl.obolibrary.org/obo/MF_",
"MFMO": "http://purl.obolibrary.org/obo/MFMO_",
"MFO": "http://purl.obolibrary.org/obo/MFO_",
"MFOEM": "http://purl.obolibrary.org/obo/MFOEM_",
"MFOMD": "http://purl.obolibrary.org/obo/MFOMD_",
"MI": "http://purl.obolibrary.org/obo/MI_",
"MIAPA": "http://purl.obolibrary.org/obo/MIAPA_",
"MICRO": "http://purl.obolibrary.org/obo/MICRO_",
"MIRO": "http://purl.obolibrary.org/obo/MIRO_",
"MMO": "http://purl.obolibrary.org/obo/MMO_",
"MO": "http://purl.obolibrary.org/obo/MO_",
"MOD": "http://purl.obolibrary.org/obo/MOD_",
"MONDO": "http://purl.obolibrary.org/obo/MONDO_",
"MOP": "http://purl.obolibrary.org/obo/MOP_",
"MP": "http://purl.obolibrary.org/obo/MP_",
"MPATH": "http://purl.obolibrary.org/obo/MPATH_",
"MPIO": "http://purl.obolibrary.org/obo/MPIO_",
"MRO": "http://purl.obolibrary.org/obo/MRO_",
"MS": "http://purl.obolibrary.org/obo/MS_",
"MmusDv": "http://purl.obolibrary.org/obo/MmusDv_",
"NBO": "http://purl.obolibrary.org/obo/NBO_",
"NCBITaxon": "http://purl.obolibrary.org/obo/NCBITaxon_",
"NCIT": "http://purl.obolibrary.org/obo/NCIT_",
"NCRO": "http://purl.obolibrary.org/obo/NCRO_",
"NMR": "http://purl.obolibrary.org/obo/NMR_",
"NOMEN": "http://purl.obolibrary.org/obo/NOMEN_",
"OAE": "http://purl.obolibrary.org/obo/OAE_",
"OARCS": "http://purl.obolibrary.org/obo/OARCS_",
"OBA": "http://purl.obolibrary.org/obo/OBA_",
"OBCS": "http://purl.obolibrary.org/obo/OBCS_",
"OBI": "http://purl.obolibrary.org/obo/OBI_",
"OBIB": "http://purl.obolibrary.org/obo/OBIB_",
"OGG": "http://purl.obolibrary.org/obo/OGG_",
"OGI": "http://purl.obolibrary.org/obo/OGI_",
"OGMS": "http://purl.obolibrary.org/obo/OGMS_",
"OGSF": "http://purl.obolibrary.org/obo/OGSF_",
"OHD": "http://purl.obolibrary.org/obo/OHD_",
"OHMI": "http://purl.obolibrary.org/obo/OHMI_",
"OHPI": "http://purl.obolibrary.org/obo/OHPI_",
"OMIABIS": "http://purl.obolibrary.org/obo/OMIABIS_",
"OMIT": "http://purl.obolibrary.org/obo/OMIT_",
"OMO": "http://purl.obolibrary.org/obo/OMO_",
"OMP": "http://purl.obolibrary.org/obo/OMP_",
"OMRSE": "http://purl.obolibrary.org/obo/OMRSE_",
"ONE": "http://purl.obolibrary.org/obo/ONE_",
"ONS": "http://purl.obolibrary.org/obo/ONS_",
"ONTONEO": "http://purl.obolibrary.org/obo/ONTONEO_",
"OOSTT": "http://purl.obolibrary.org/obo/OOSTT_",
"OPL": "http://purl.obolibrary.org/obo/OPL_",
"OPMI": "http://purl.obolibrary.org/obo/OPMI_",
"ORNASEQ": "http://purl.obolibrary.org/obo/ORNASEQ_",
"OVAE": "http://purl.obolibrary.org/obo/OVAE_",
"OlatDv": "http://purl.obolibrary.org/obo/OlatDv_",
"PATO": "http://purl.obolibrary.org/obo/PATO_",
"PCO": "http://purl.obolibrary.org/obo/PCO_",
"PDRO": "http://purl.obolibrary.org/obo/PDRO_",
"PECO": "http://purl.obolibrary.org/obo/PECO_",
"PHIPO": "http://purl.obolibrary.org/obo/PHIPO_",
"PLANA": "http://purl.obolibrary.org/obo/PLANA_",
"PLANP": "http://purl.obolibrary.org/obo/PLANP_",
"PLO": "http://purl.obolibrary.org/obo/PLO_",
"PO": "http://purl.obolibrary.org/obo/PO_",
"PORO": "http://purl.obolibrary.org/obo/PORO_",
"PPO": "http://purl.obolibrary.org/obo/PPO_",
"PR": "http://purl.obolibrary.org/obo/PR_",
"PSDO": "http://purl.obolibrary.org/obo/PSDO_",
"PSO": "http://purl.obolibrary.org/obo/PSO_",
"PW": "http://purl.obolibrary.org/obo/PW_",
"PdumDv": "http://purl.obolibrary.org/obo/PdumDv_",
"RBO": "http://purl.obolibrary.org/obo/RBO_",
"RESID": "http://purl.obolibrary.org/obo/RESID_",
"REX": "http://purl.obolibrary.org/obo/REX_",
"RNAO": "http://purl.obolibrary.org/obo/RNAO_",
"RO": "http://purl.obolibrary.org/obo/RO_",
"RS": "http://purl.obolibrary.org/obo/RS_",
"RXNO": "http://purl.obolibrary.org/obo/RXNO_",
"SAO": "http://purl.obolibrary.org/obo/SAO_",
"SBO": "http://purl.obolibrary.org/obo/SBO_",
"SCDO": "http://purl.obolibrary.org/obo/SCDO_",
"SEPIO": "http://purl.obolibrary.org/obo/SEPIO_",
"SIBO": "http://purl.obolibrary.org/obo/SIBO_",
"SO": "http://purl.obolibrary.org/obo/SO_",
"SPD": "http://purl.obolibrary.org/obo/SPD_",
"STATO": "http://purl.obolibrary.org/obo/STATO_",
"SWO": "http://purl.obolibrary.org/obo/SWO_",
"SYMP": "http://purl.obolibrary.org/obo/SYMP_",
"TADS": "http://purl.obolibrary.org/obo/TADS_",
"TAO": "http://purl.obolibrary.org/obo/TAO_",
"TAXRANK": "http://purl.obolibrary.org/obo/TAXRANK_",
"TGMA": "http://purl.obolibrary.org/obo/TGMA_",
"TO": "http://purl.obolibrary.org/obo/TO_",
"TRANS": "http://purl.obolibrary.org/obo/TRANS_",
"TTO": "http://purl.obolibrary.org/obo/TTO_",
"TXPO": "http://purl.obolibrary.org/obo/TXPO_",
"UBERON": "http://purl.obolibrary.org/obo/UBERON_",
"UO": "http://purl.obolibrary.org/obo/UO_",
"UPA": "http://purl.obolibrary.org/obo/UPA_",
"UPHENO": "http://purl.obolibrary.org/obo/UPHENO_",
"VARIO": "http://purl.obolibrary.org/obo/VARIO_",
"VHOG": "http://purl.obolibrary.org/obo/VHOG_",
"VO": "http://purl.obolibrary.org/obo/VO_",
"VSAO": "http://purl.obolibrary.org/obo/VSAO_",
"VT": "http://purl.obolibrary.org/obo/VT_",
"VTO": "http://purl.obolibrary.org/obo/VTO_",
"WBPhenotype": "http://purl.obolibrary.org/obo/WBPhenotype_",
"WBbt": "http://purl.obolibrary.org/obo/WBbt_",
"WBls": "http://purl.obolibrary.org/obo/WBls_",
"XAO": "http://purl.obolibrary.org/obo/XAO_",
"XCO": "http://purl.obolibrary.org/obo/XCO_",
"XLMOD": "http://purl.obolibrary.org/obo/XLMOD_",
"XPO": "http://purl.obolibrary.org/obo/XPO_",
"ZEA": "http://purl.obolibrary.org/obo/ZEA_",
"ZECO": "http://purl.obolibrary.org/obo/ZECO_",
"ZFA": "http://purl.obolibrary.org/obo/ZFA_",
"ZFS": "http://purl.obolibrary.org/obo/ZFS_",
"ZP": "http://purl.obolibrary.org/obo/ZP_"
}
}
8 changes: 4 additions & 4 deletions src/bioregistry/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,10 @@
from .resolve import ( # noqa
get, get_banana, get_bioportal_prefix, get_collection, get_description, get_email, get_example,
get_fairsharing_prefix, get_format, get_homepage, get_identifiers_org_prefix, get_mappings, get_n2t_prefix,
get_name, get_obo_download, get_obofoundry_prefix, get_ols_prefix, get_owl_download, get_pattern, get_pattern_re,
get_provides_for, get_registry, get_registry_description, get_registry_example, get_registry_homepage,
get_registry_name, get_registry_url, get_synonyms, get_version, get_versions, has_terms, is_deprecated,
namespace_in_lui, normalize_prefix, parse_curie,
get_name, get_obo_download, get_obofoundry_format, get_obofoundry_prefix, get_ols_prefix, get_owl_download,
get_pattern, get_pattern_re, get_provides_for, get_registry, get_registry_description, get_registry_example,
get_registry_homepage, get_registry_name, get_registry_url, get_synonyms, get_version, get_versions, has_terms,
is_deprecated, namespace_in_lui, normalize_prefix, parse_curie,
)
from .resolve_identifier import ( # noqa
get_bioportal_url, get_identifiers_org_curie, get_identifiers_org_url, get_link, get_n2t_url, get_obofoundry_link,
Expand Down
41 changes: 26 additions & 15 deletions src/bioregistry/align/obofoundry.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,17 @@
OBO_KEYS = {
'id',
'prefix',
'preferred_prefix',
'pattern',
'namespaceEmbeddedInLui',
'name',
'deprecated',
'description',
'homepage',
}
SKIP = {
'bila',
}


def _prepare_obo(obofoundry_entry): # noqa:C901
Expand All @@ -40,6 +44,10 @@ def _prepare_obo(obofoundry_entry): # noqa:C901
if homepage is not None:
rv['homepage'] = homepage

preferred_prefix = obofoundry_entry.get('preferredPrefix')
if preferred_prefix is not None:
rv['preferredPrefix'] = preferred_prefix

contact_dict = obofoundry_entry.get('contact')
if contact_dict is not None and contact_dict.get('email'):
rv.update({
Expand Down Expand Up @@ -78,11 +86,11 @@ def _prepare_obo(obofoundry_entry): # noqa:C901
# TODO maybe try recovering if this doesn't work
obo_url = f'https://raw.githubusercontent.com/{owner}/{repo}/master/{prefix}.obo'

res = session.get(obo_url)
if res.status_code == 200:
rv['download.obo'] = obo_url
else:
secho(f"[{prefix}] [http {res.status_code}] see {rv['repo']} [{path}]", bold=True, fg='red')
with session.get(obo_url, stream=True) as res:
if res.status_code == 200:
rv['download.obo'] = obo_url
else:
secho(f"[{prefix}] [http {res.status_code}] see {rv['repo']} [{path}]", bold=True, fg='red')

elif method == 'owl2obo':
source_url = build['source_url']
Expand All @@ -98,22 +106,22 @@ def _prepare_obo(obofoundry_entry): # noqa:C901
rv['download.obo'] = source_url
elif source_url.endswith('.owl'):
obo_url = source_url.removesuffix('.owl') + '.obo'
res = session.get(obo_url)
if res.status_code == 200:
rv['download.obo'] = source_url
else:
secho(f'[{prefix}] [http {res.status_code}] problem with {obo_url}', bold=True, fg='red')
with session.get(obo_url) as res:
if res.status_code == 200:
rv['download.obo'] = source_url
else:
secho(f'[{prefix}] [http {res.status_code}] problem with {obo_url}', bold=True, fg='red')
else:
secho(f'[{prefix}] unhandled build.source_url: {source_url}', fg='red')

elif method == 'obo2owl':
source_url = build['source_url']
if source_url.endswith('.obo'):
res = session.get(source_url)
if res.status_code == 200:
rv['download.obo'] = source_url
else:
secho(f'[{prefix}] [http {res.status_code}] problem with {source_url}', bold=True, fg='red')
with session.get(source_url, stream=True) as res:
if res.status_code == 200:
rv['download.obo'] = source_url
else:
secho(f'[{prefix}] [http {res.status_code}] problem with {source_url}', bold=True, fg='red')
else:
secho(f'[{prefix}] unhandled extension {source_url}', bold=True, fg='red')
else:
Expand Down Expand Up @@ -145,6 +153,9 @@ def align_obofoundry(registry):
obofoundry_id_to_bioregistry_id[obofoundry_id] = bioregistry_id

for obofoundry_prefix, obofoundry_entry in obofoundry_registry.items():
if obofoundry_prefix in SKIP:
secho(f'[{obofoundry_prefix}] skipping', fg='yellow')
continue
# Get key by checking the miriam.id key
bioregistry_id = obofoundry_id_to_bioregistry_id.get(obofoundry_prefix)
if bioregistry_id is None:
Expand Down
2 changes: 2 additions & 0 deletions src/bioregistry/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
from .generate_warnings_file import warnings
from .lint import lint
from .make_curation_list import curation
from .prefix_maps import generate_context_json_ld


@click.group()
Expand Down Expand Up @@ -74,6 +75,7 @@ def update(ctx: click.Context):
ctx.invoke(compare)
ctx.invoke(curation)
ctx.invoke(warnings)
ctx.invoke(generate_context_json_ld)


if __name__ == '__main__':
Expand Down
Loading

0 comments on commit 4ff7610

Please sign in to comment.