From 38dd62f32af35913b911664b33c183f0d721372d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hampus=20N=C3=A4sstr=C3=B6m?= Date: Thu, 25 Apr 2024 10:29:20 +0200 Subject: [PATCH] Added ucumvert and support for custom keys --- src/ontopint/__init__.py | 54 +++++++++++++++++++++++++++---------- tests/data/test_data.jsonld | 33 +++++++++++++++++++++++ 2 files changed, 73 insertions(+), 14 deletions(-) create mode 100644 tests/data/test_data.jsonld diff --git a/src/ontopint/__init__.py b/src/ontopint/__init__.py index bd3dc05..a842e1a 100644 --- a/src/ontopint/__init__.py +++ b/src/ontopint/__init__.py @@ -2,9 +2,24 @@ import rdflib from pyld import jsonld -from pint import UnitRegistry -ureg = UnitRegistry() +# from pint import UnitRegistry +from ucumvert import PintUcumRegistry + +# ureg = UnitRegistry() +ureg = PintUcumRegistry() + +processing_context = { + 'qudt': 'http://qudt.org/schema/qudt/', + 'qunit': 'http://qudt.org/vocab/unit/', + 'qkind': 'http://qudt.org/vocab/quantkind/', + 'unit': {'@id': 'qudt:hasUnit', '@type': '@id'}, + 'quantity': {'@id': 'qudt:hasQuantityKind', '@type': '@id'}, + 'value': 'qudt:value', +} + +HAS_UNIT = 'http://qudt.org/schema/qudt/hasUnit' +VALUE = 'http://qudt.org/schema/qudt/value' class UnitDecoder(json.JSONDecoder): @@ -34,31 +49,42 @@ def object_hook(self, obj): return obj -def _replace_units(obj, context): +def _replace_units(obj, context, original_key_lookup_dict): if isinstance(obj, dict): - if 'unit' in obj and 'value' in obj: - expanded_obj = jsonld.expand({**obj, '@context': context}, context) - unit_iri = expanded_obj[0]['http://qudt.org/schema/qudt/hasUnit'][0]['@id'] - obj.pop('unit') + expanded_obj = jsonld.expand({**obj, '@context': context}, context) + if HAS_UNIT in expanded_obj[0] and VALUE in expanded_obj[0]: + unit_iri = expanded_obj[0][HAS_UNIT][0]['@id'] + obj.pop(original_key_lookup_dict['unit']) graph = rdflib.Graph() graph.parse(unit_iri) result = graph.query( - f'SELECT * WHERE {{<{unit_iri}> ?symbol}}' + f'SELECT * WHERE {{<{unit_iri}> ?ucumCode}}' + ) + unit = result.bindings[0]['ucumCode'] + obj[original_key_lookup_dict['value']] = ureg.Quantity( + obj[original_key_lookup_dict['value']], ureg.from_ucum(unit) ) - unit = result.bindings[0]['symbol'] - obj['value'] = ureg.Quantity(obj['value'], unit) - for key, value in obj.items(): - obj[key] = _replace_units(value, context) + obj[key] = _replace_units(value, context, original_key_lookup_dict) return obj elif isinstance(obj, list): - return [_replace_units(value, context) for value in obj] + return [ + _replace_units(value, context, original_key_lookup_dict) for value in obj + ] else: return obj def parse_units(json_ld: dict) -> dict: original_context = json_ld.pop('@context') - parsed_json = _replace_units(json_ld, original_context) + key_dict = {'@context': processing_context, 'unit': 'unit', 'value': 'value'} + # inverse expand-reverse cycle + expanded = jsonld.expand(key_dict, processing_context) + compacted = jsonld.compact(expanded, original_context) + # remove the context + del compacted['@context'] + # reverse the dict + original_key_lookup_dict = {v: k for k, v in compacted.items()} + parsed_json = _replace_units(json_ld, original_context, original_key_lookup_dict) parsed_json['@context'] = original_context return parsed_json diff --git a/tests/data/test_data.jsonld b/tests/data/test_data.jsonld new file mode 100644 index 0000000..277be4e --- /dev/null +++ b/tests/data/test_data.jsonld @@ -0,0 +1,33 @@ +{ + "@context": { + "qudt": "http://qudt.org/schema/qudt/", + "qunit": "http://qudt.org/vocab/unit/", + "qkind": "http://qudt.org/vocab/quantkind/", + "my_unit": { + "@id": "qudt:hasUnit", + "@type": "@id" + }, + "quantity": { + "@id": "qudt:hasQuantityKind", + "@type": "@id" + }, + "my_value": "qudt:value", + "band_gap": "http://example.org/BandGap", + "mass": "http://example.org/Mass", + "type": "@type", + "sample_id": "@id" + }, + "sample_id": "HUB_HaNa_20240424_Sample-1", + "band_gap" : { + "type": "qudt:Quantity", + "quantity": "qkind:GapEnergy", + "my_value": 1.9, + "my_unit": "qunit:EV" + }, + "mass": { + "type": "qudt:Quantity", + "quantity": "qkind:Mass", + "my_value": 3201.3, + "my_unit": "qunit:MilliGM" + } + } \ No newline at end of file