From 2129071b308c5f258cb3989fc2d74d79affdad65 Mon Sep 17 00:00:00 2001 From: mksanger Date: Thu, 27 Oct 2022 17:41:01 +0100 Subject: [PATCH 1/4] Add tags to pac bio id generation script --- CHANGELOG.md | 12 ++++++++++++ bin/generate_pac_bio_id | 9 ++++++++- npg_id_generation/pac_bio.py | 8 ++++++++ tests/test_hashing.py | 15 +++++++++++++++ 4 files changed, 43 insertions(+), 1 deletion(-) create mode 100644 CHANGELOG.md diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..f7533f7 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,12 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [Unreleased] + +### Changed + + - Add tags argument to generate_pac_bio_id + - Sort tags on creation of PacBioEntity object \ No newline at end of file diff --git a/bin/generate_pac_bio_id b/bin/generate_pac_bio_id index aeef03c..4dfc23e 100755 --- a/bin/generate_pac_bio_id +++ b/bin/generate_pac_bio_id @@ -31,12 +31,19 @@ parser.add_argument( parser.add_argument("well_label", type=str, help="The well label") +parser.add_argument( + "tags", + type=str, + required=False, + help="A comma separated list of tag sequences to include in id generation", +) + args = parser.parse_args() def main(): print( - f"{PacBioEntity(run_name=args.run_name, well_label=args.well_label).hash_product_id()}\n" + f"{PacBioEntity(run_name=args.run_name, well_label=args.well_label, tags=args.tags).hash_product_id()}\n" ) diff --git a/npg_id_generation/pac_bio.py b/npg_id_generation/pac_bio.py index 13cadc5..c862837 100644 --- a/npg_id_generation/pac_bio.py +++ b/npg_id_generation/pac_bio.py @@ -45,6 +45,14 @@ def attributes_are_non_empty_strings(cls, v): raise ValueError("Cannot be an empty string") return v + @validator("tags") + def sort_tags(cls, v): + if v is None: + return v + tags = v.split(",") + tags.sort() + return ",".join(tags) + def hash_product_id(self): """Generate a sha256sum for the PacBio Entity""" diff --git a/tests/test_hashing.py b/tests/test_hashing.py index 67a61c1..f9faeca 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -136,3 +136,18 @@ def test_expected_hashes(): PacBioEntity.parse_raw(json_str, content_type="json").hash_product_id() == expected_hash ) + + +def test_tags_sorted(): + """Test that tags are automatically sorted alphabetically before id generation""" + + pb_entity_1 = PacBioEntity( + run_name="MARATHON", well_label="A1", tags="TCGA,ACGT,TGAC,AACG" + ) + assert pb_entity_1.tags == "AACG,ACGT,TCGA,TGAC" + + pb_entity_2 = PacBioEntity( + run_name="MARATHON", well_label="A1", tags="ACGT,AACG,TGAC,TCGA" + ) + assert pb_entity_2.tags == pb_entity_1.tags + assert pb_entity_1.hash_product_id() == pb_entity_2.hash_product_id() From 38a368696ac9ec9d2fc6d57f38f9c590115e2fee Mon Sep 17 00:00:00 2001 From: mksanger Date: Fri, 28 Oct 2022 12:56:45 +0100 Subject: [PATCH 2/4] Change tags argument to non-positional, so that it can be optional --- bin/generate_pac_bio_id | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bin/generate_pac_bio_id b/bin/generate_pac_bio_id index 4dfc23e..726b481 100755 --- a/bin/generate_pac_bio_id +++ b/bin/generate_pac_bio_id @@ -32,7 +32,7 @@ parser.add_argument( parser.add_argument("well_label", type=str, help="The well label") parser.add_argument( - "tags", + "--tags", type=str, required=False, help="A comma separated list of tag sequences to include in id generation", From 2fa28c9ffa39756a80cae9468b284c34ea5b5353 Mon Sep 17 00:00:00 2001 From: mksanger Date: Mon, 7 Nov 2022 11:37:26 +0000 Subject: [PATCH 3/4] Add test with json input to tag sorting --- tests/test_hashing.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/tests/test_hashing.py b/tests/test_hashing.py index f9faeca..874dac2 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -149,5 +149,14 @@ def test_tags_sorted(): pb_entity_2 = PacBioEntity( run_name="MARATHON", well_label="A1", tags="ACGT,AACG,TGAC,TCGA" ) - assert pb_entity_2.tags == pb_entity_1.tags - assert pb_entity_1.hash_product_id() == pb_entity_2.hash_product_id() + + pb_entity_3 = PacBioEntity.parse_raw( + '{"run_name": "MARATHON", "well_label": "A1", "tags": "TGAC,TCGA,AACG,ACGT"}' + ) + + assert pb_entity_1.tags == pb_entity_2.tags == pb_entity_3.tags + assert ( + pb_entity_3.hash_product_id() + == pb_entity_2.hash_product_id() + == pb_entity_3.hash_product_id() + ) From 0784cc5993703ddc3f8b2a795f12e4a4030acd95 Mon Sep 17 00:00:00 2001 From: mksanger Date: Fri, 11 Nov 2022 11:38:29 +0000 Subject: [PATCH 4/4] Update changelog for release 1.0.1 --- CHANGELOG.md | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index f7533f7..bf15247 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,9 +4,15 @@ All notable changes to this project will be documented in this file. The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). -## [Unreleased] +## [1.0.1] ### Changed - Add tags argument to generate_pac_bio_id - - Sort tags on creation of PacBioEntity object \ No newline at end of file + - Sort tags on creation of PacBioEntity object + +## [1.0.0] + +### Added + + - Ability to generate a product id for a PacBio well