From ca506e9a57d97077fb847d0f2b2eb4aed4b88c53 Mon Sep 17 00:00:00 2001 From: Sarthak Kapoor Date: Wed, 14 Aug 2024 17:12:47 +0200 Subject: [PATCH] Rename and add doc string for utils: generate_archive_from_json --- src/nomad_polymerization_reactions/utils.py | 37 ++++++++++++++++++- .../processed_reactions/empty.archive.yaml | 2 + tests/data/processed_reactions/empty.json | 1 + tests/test_utils.py | 8 +++- 4 files changed, 45 insertions(+), 3 deletions(-) create mode 100644 tests/data/processed_reactions/empty.archive.yaml create mode 100644 tests/data/processed_reactions/empty.json diff --git a/src/nomad_polymerization_reactions/utils.py b/src/nomad_polymerization_reactions/utils.py index 1ce7c40..764e70a 100644 --- a/src/nomad_polymerization_reactions/utils.py +++ b/src/nomad_polymerization_reactions/utils.py @@ -9,7 +9,42 @@ from structlog.stdlib import BoundLogger -def generate_archive_from_llm_output(filepath: str, logger: 'BoundLogger' = None): # noqa: PLR0912 +def generate_archive_from_json(filepath: str, logger: 'BoundLogger' = None): # noqa: PLR0912 + """ + Generate an archive.yaml file from a JSON file coming from the LLM output. + Function expects a JSON of the following format: + ```json + { + "file": "paper_0.json", + "monomer1_s": "C=C", + "monomer2_s": "C=O", + "monomer1": "ethylene", + "monomer2": "carbon monoxide", + "r_values": { + "constant_1": 22.0, + "constant_2": 0.0 + }, + "conf_intervals": { + "constant_conf_1": null, + "constant_conf_2": null + }, + "temperature": 20.0, + "temperature_unit": "\u00b0C", + "solvent": null, + "method": "bulk", + "r-product": null, + "source": "https://doi.org/10.1002/pol.1963.110010415" + } + ``` + + Args: + filepath (str): Path to the JSON file. + logger (BoundLogger): A structlog logger. + + Returns: + dict: The dict used to generate archive.yaml file. + """ + class OrderedDumper(yaml.Dumper): def represent_dict(self, data): return self.represent_mapping('tag:yaml.org,2002:map', data.items()) diff --git a/tests/data/processed_reactions/empty.archive.yaml b/tests/data/processed_reactions/empty.archive.yaml new file mode 100644 index 0000000..5c7228e --- /dev/null +++ b/tests/data/processed_reactions/empty.archive.yaml @@ -0,0 +1,2 @@ +data: + m_def: nomad_polymerization_reactions.schema_packages.mypackage.PolymerizationReaction diff --git a/tests/data/processed_reactions/empty.json b/tests/data/processed_reactions/empty.json new file mode 100644 index 0000000..9e26dfe --- /dev/null +++ b/tests/data/processed_reactions/empty.json @@ -0,0 +1 @@ +{} \ No newline at end of file diff --git a/tests/test_utils.py b/tests/test_utils.py index 0e0c1f7..e48b2a4 100644 --- a/tests/test_utils.py +++ b/tests/test_utils.py @@ -2,7 +2,7 @@ import pytest import yaml -from nomad_polymerization_reactions.utils import generate_archive_from_llm_output +from nomad_polymerization_reactions.utils import generate_archive_from_json @pytest.mark.parametrize( @@ -20,10 +20,14 @@ 'tests/data/processed_reactions/paper_5_reaction_1.archive.yaml' ), }, + { + 'filepath': 'tests/data/processed_reactions/empty.json', + 'reference': ('tests/data/processed_reactions/empty.archive.yaml'), + }, ], ) def test_generate_archive_from_llm_output(params): - output = generate_archive_from_llm_output(params['filepath']) + output = generate_archive_from_json(params['filepath']) with open(params['reference']) as f: reference = yaml.load(f, Loader=yaml.FullLoader) assert output == reference