diff --git a/CHANGELOG.md b/CHANGELOG.md index e42887d..39f73d4 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). +## [5.0.1] + +### [Fixed] + - An existing bug in constructing a JSON representation for a Pydantic model + when the plate_number attribute is 2 or larger resulted in incorrect IDs + being generated. The fix includes a return to using Pydantic models' built-in + model_dump_json method in order to mitigate the risk that comes with manual + generation of JSON strings. + ## [5.0.0] ### [Changed] diff --git a/npg_id_generation/pac_bio.py b/npg_id_generation/pac_bio.py index 82e3e54..33d2b0c 100644 --- a/npg_id_generation/pac_bio.py +++ b/npg_id_generation/pac_bio.py @@ -20,7 +20,6 @@ # You should have received a copy of the GNU General Public License along with # this program. If not, see . -import io import re from hashlib import sha256 from typing import Optional @@ -57,8 +56,7 @@ class PacBioEntity(BaseModel): execution time. Order the attributes alphabetically to maintain order in the output - of model_to_json(). The hash_product_id() method does not depend on - the Pydantic ordering, however (it uses a custom JSON serializer). + of model_dump_json(). """ model_config = ConfigDict(extra="forbid") @@ -112,27 +110,9 @@ def tags_have_correct_characters(cls, v): def hash_product_id(self): """Generate a sha256sum for the PacBio Entity""" - # Avoid using Pydantic's model_to_json() method as it requires somewhat - # complicated setup with decorators to create our backwards-compatible JSON - # serialization. - - # The JSON is built with StringIO to ensure the order of the attributes and - # that it's faster than using json.dumps() with sort_keys=True (timeit - # estimates that this is ~4x faster. - json = io.StringIO() - json.write('{"run_name":"') - json.write(self.run_name) - json.write('","well_label":"') - json.write(self.well_label) - json.write('"') if self.plate_number is not None and self.plate_number > 1: - json.write(',"plate_number":') - json.write(str(self.plate_number)) - json.write('"') - if self.tags is not None: - json.write(',"tags":"') - json.write(self.tags) - json.write('"') - json.write("}") - - return sha256(json.getvalue().encode("utf-8")).hexdigest() + json = self.model_dump_json(exclude_none=True) + else: + json = self.model_dump_json(exclude_none=True, exclude=["plate_number"]) + + return sha256(json.encode()).hexdigest() diff --git a/tests/test_hashing.py b/tests/test_hashing.py index d579d6d..d5dd486 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -196,14 +196,24 @@ def test_multiple_plates_make_difference(): def test_expected_hashes(): """Test against expected hashes.""" # plate_number absent (historical runs) or plate_number == 1 - p1_sha256 = "cda15311f706217e31e32d42d524bc35662a6fc15623ce5abe6a31ed741801ae" + id1_sha256 = "cda15311f706217e31e32d42d524bc35662a6fc15623ce5abe6a31ed741801ae" # plate_number == 2 - p2_sha256 = "7ca9d350c9b14f0883ac568220b8e5d97148a4eeb41d9de00b5733299d7bcd89" + id2_sha256 = "9d883922744395bccb226069cc2b77eac867778e2e31c8c1ac3651ed2a83c059" + id3_sha256 = "dc77c4a7f34d84afbb895fcaee72fc8bead9dac20e8d3a9614091d9dd4519acd" + id4_sha256 = "e40d98a697a5ffcb09f7ed62597216094198c0d804be100bbb9f6050482182e3" test_cases = [ - ('{"run_name": "MARATHON", "well_label": "A1"}', p1_sha256), - ('{"run_name": "MARATHON", "well_label": "A1", "plate_number": 1}', p1_sha256), - ('{"run_name": "MARATHON", "well_label": "A1", "plate_number": 2}', p2_sha256), + ('{"run_name": "MARATHON", "well_label": "A1"}', id1_sha256), + ('{"run_name": "MARATHON", "well_label": "A1", "plate_number": 1}', id1_sha256), + ('{"run_name": "MARATHON", "well_label": "A1", "plate_number": 2}', id2_sha256), + ( + '{"run_name": "TRACTION_RUN_16", "well_label": "A1", "plate_number": 2}', + id3_sha256, + ), + ( + '{"run_name": "TRACTION_RUN_16", "well_label": "A1", "plate_number": 3}', + id4_sha256, + ), ( '{"run_name": "SEMI-MARATHON", "well_label": "D1"}', "b55417615e458c23049cc84822531a435c0c4069142f0e1d5e4378d48d9f7bd2",