From 772d958fb5c0694d1d4ecf80fb506552d01e1bf7 Mon Sep 17 00:00:00 2001 From: Jordan Byers <21129425+ItIsJordan@users.noreply.github.com> Date: Fri, 25 Aug 2023 13:20:03 +0100 Subject: [PATCH] Related records (#232) * Add related table/doi changes Adds changes to accommodate creation of related record ID and DOI values into submissions. * Add test functions Adds test functions for the new add functions in the Table and Submission classes. * Update add_related functions Updates the new add_related functions in Submission and Table to have better checks. Also adds tests for new functions. * Update hepdata-validator version Updates the hepdata-validator version in requrements txt (>=0.3.2 -> 0.3.4) * Rename related_to_hepdata_recids Renames the related_to_hepdata_recids value to related_to_hedata_records to match hepdata-validator change. * Update documentation (useage.rst) Updates the documentation in usage.rst to describe use of the bidirectional linking feature. * Drop Python 3.6 Drops support for python 3.6 as the new hepdata-validator version no longer supports this. * Update usage.rst Adds examples of adding multiple linking data entries into the usage documentation. * Pylint import fix Fixes the pylint errors caused in the previous commit. * Revert "Drop Python 3.6" This reverts commit 009478b4c048a58ab447b90982a0d16c9ce03a62. * Update hepdata-validator for python 3.6 supportt Updates hepdata-validator which again adds support for python 3.6 * Update comment in add_related_doi Updates the commenting in add_related_doi to be more clear as it was not updated alongside the code. * Update usage.rst Updates the usage documentation page to use existing DOI values and a link. * Remove parentheses --------- Co-authored-by: Clemens Lange --- docs/usage.rst | 37 +++++++++++++++++++++++++++++++++++++ hepdata_lib/__init__.py | 38 ++++++++++++++++++++++++++++++++++++++ requirements.txt | 2 +- tests/test_submission.py | 39 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 113 insertions(+), 3 deletions(-) diff --git a/docs/usage.rst b/docs/usage.rst index 82224b6a..3bd3ecba 100644 --- a/docs/usage.rst +++ b/docs/usage.rst @@ -112,6 +112,25 @@ The ``create_files`` function writes all the YAML output files you need and pack **Please note**: creating the output files also creates a ``submission`` folder containing the individual files going into the tarball. This folder exists merely for convenience, in order to make it easy to inspect each individual file. It is not recommended to attempt to manually manage or edit the files in the folder, and there is no guarantee that ``hepdata_lib`` will handle any of the changes you make in a graceful manner. As far as we are aware, there is no use case where manual editing of the files is necessary. If you have such a use case, please report it in a Github issue. +Adding links to related records +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To add a link to a related record object, you can use the `add_related_recid` function of the Submission object. + +**Please note**: Values must be entered as integers. + +:: + + sub.add_related_recid(1) + sub.add_related_recid(2) + sub.add_related_recid(3) + +In this example, we are adding a link to the submission with the record ID value of `"1"`. + +The documentation for this feature can be found here: (`Linking Records`_). + +.. _`Linking Records` : https://hepdata-submission.readthedocs.io/en/latest/bidirectional.html#linking-records + .. _sec-usage-tab-var: @@ -239,6 +258,24 @@ Lists of recognized keywords are available from the hepdata documentation for `O .. _`Particles`: https://hepdata-submission.readthedocs.io/en/latest/keywords/partlist.html +Adding links to related tables +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ + +To add a link to a related table object, you can use the `add_related_doi` function of the Table class. + +**Please note**: Your DOIs must match the format: `10.17182/hepdata.[RecordID].v[Version]/t[Table]` + +:: + + table.add_related_doi("10.17182/hepdata.72886.v2/t3") + table.add_related_doi("10.17182/hepdata.12882.v1/t2") + +In this example, we are adding a link to the table with a DOI value of: `10.17182/hepdata.12882.v1/t2 `__. + + +The documentation for this feature can be found here: `Linking Tables`_. + +.. _`Linking Tables` : https://hepdata-submission.readthedocs.io/en/latest/bidirectional.html#linking-tables Uncertainties ++++++++++++++++++++++++++++++++ diff --git a/hepdata_lib/__init__.py b/hepdata_lib/__init__.py index d9103252..3b0a51e7 100644 --- a/hepdata_lib/__init__.py +++ b/hepdata_lib/__init__.py @@ -6,9 +6,11 @@ import warnings from collections import defaultdict from decimal import Decimal +from re import match as rematch import numpy as np import yaml from future.utils import raise_from + # try to use LibYAML bindings if possible try: from yaml import CLoader as Loader, CSafeDumper as Dumper @@ -293,6 +295,7 @@ def __init__(self, name): self._name = None self.name = name self.variables = [] + self.related_tables = [] self.description = "Example description" self.location = "Example location" self.keywords = {} @@ -338,6 +341,22 @@ def add_image(self, file_path, outdir=None): else: raise RuntimeError(f"Cannot find image file: {file_path}") + def add_related_doi(self, doi): + """ + Appends a DOI string to the related_tables list. + + :param doi: The table DOI. + :type doi: string + """ + # Checking against the regex, this also happens in the validator. + pattern = r"^10\.17182\/hepdata\.\d+\.v\d+\/t\d+$" + match = rematch(pattern, doi) + if match: + to_string = str(doi) + self.related_tables.append(to_string) + else: + raise ValueError(f"DOI does not match the correct pattern: {pattern}.") + def write_output(self, outdir): """ Write the table files into the output directory. @@ -444,6 +463,7 @@ def write_yaml(self, outdir="."): submission["name"] = self.name submission["description"] = self.description submission["location"] = self.location + submission["related_to_table_dois"] = self.related_tables submission["data_file"] = f'{shortname}.yaml' submission["keywords"] = [] if self.additional_resources: @@ -472,6 +492,7 @@ def __init__(self): self.tables = [] self.comment = "" self.record_ids = [] + self.related_records = [] self.add_additional_resource( "Created with hepdata_lib " + __version__, "https://zenodo.org/record/4946277") @@ -522,6 +543,22 @@ def add_record_id(self, r_id, r_type): record_id["type"] = r_type self.record_ids.append(record_id) + def add_related_recid(self, r_id): + """ + Appends a record ID to the related_records list. + :param r_id: The record's ID + :type r_id: integer + """ + + try: + recid = int(r_id) + except Exception as exc: + raise TypeError(f"Expected 'Integer', instead got '{type(r_id)}'.") from exc + if recid > 0: + self.related_records.append(recid) + else: + raise ValueError("Please enter a valid integer above 0.") + def read_abstract(self, filepath): """ Read in the abstracts file. @@ -567,6 +604,7 @@ def create_files(self, outdir=".", validate=True, remove_old=False): submission = {} submission["data_license"] = self.get_license() submission["comment"] = self.comment + submission["related_to_hepdata_records"] = self.related_records if self.additional_resources: submission["additional_resources"] = self.additional_resources diff --git a/requirements.txt b/requirements.txt index 22e21009..eaf80f96 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ numpy PyYAML>=4.0 future -hepdata-validator>=0.3.2 +hepdata-validator>=0.3.5 diff --git a/tests/test_submission.py b/tests/test_submission.py index e9f044d4..3fa0275a 100644 --- a/tests/test_submission.py +++ b/tests/test_submission.py @@ -107,8 +107,6 @@ def test_create_files_with_removal(self): self.assertFalse(os.path.isfile(testfile)) - - def test_read_abstract(self): """Test read_abstract function.""" some_string = string.ascii_lowercase @@ -154,3 +152,40 @@ def test_nested_files_to_copy(self): tar.getmember(testfile) except KeyError: self.fail("Submission.create_files failed to write all files to tar ball.") + + def test_add_related_doi(self): + """Test insertion and retrieval of recid values in the Table object""" + # Possibly unneccessary boundary testing + test_data = [ + {"doi": "10.17182/hepdata.1.v1/t1", "error": False}, + {"doi": "10.17182/hepdata.1", "error": ValueError}, + {"doi": "10.17182/hepdata.1.v1", "error": ValueError}, + {"doi": "10.17182/hepdata.1.v1/a2", "error": ValueError}, + {"doi": "not_valid", "error": ValueError}, + {"doi": 1, "error": TypeError}, + ] + table = Table("Table") + for test in test_data: + if test["error"]: + self.assertRaises(test["error"], table.add_related_doi, test["doi"]) + else: + table.add_related_doi(test["doi"]) + assert test["doi"] == table.related_tables[-1] + assert len(table.related_tables) == 1 + + def test_add_related_recid(self): + """Test insertion and retrieval of recid values in the Submission object""" + test_data = [ + {"recid": 1, "error": False}, + {"recid": "1", "error": False}, + {"recid": -1, "error": ValueError}, + {"recid": "a", "error": TypeError} + ] + sub = Submission() + for test in test_data: + if test["error"]: + self.assertRaises(test["error"], sub.add_related_recid, test["recid"]) + else: + sub.add_related_recid(test["recid"]) + assert int(test["recid"]) == sub.related_records[-1] + assert len(sub.related_records) == 2