diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..bf15247 --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,18 @@ +# Changelog +All notable changes to this project will be documented in this file. + +The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/), +and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html). + +## [1.0.1] + +### Changed + + - Add tags argument to generate_pac_bio_id + - Sort tags on creation of PacBioEntity object + +## [1.0.0] + +### Added + + - Ability to generate a product id for a PacBio well diff --git a/bin/generate_pac_bio_id b/bin/generate_pac_bio_id index aeef03c..726b481 100755 --- a/bin/generate_pac_bio_id +++ b/bin/generate_pac_bio_id @@ -31,12 +31,19 @@ parser.add_argument( parser.add_argument("well_label", type=str, help="The well label") +parser.add_argument( + "--tags", + type=str, + required=False, + help="A comma separated list of tag sequences to include in id generation", +) + args = parser.parse_args() def main(): print( - f"{PacBioEntity(run_name=args.run_name, well_label=args.well_label).hash_product_id()}\n" + f"{PacBioEntity(run_name=args.run_name, well_label=args.well_label, tags=args.tags).hash_product_id()}\n" ) diff --git a/npg_id_generation/pac_bio.py b/npg_id_generation/pac_bio.py index 13cadc5..c862837 100644 --- a/npg_id_generation/pac_bio.py +++ b/npg_id_generation/pac_bio.py @@ -45,6 +45,14 @@ def attributes_are_non_empty_strings(cls, v): raise ValueError("Cannot be an empty string") return v + @validator("tags") + def sort_tags(cls, v): + if v is None: + return v + tags = v.split(",") + tags.sort() + return ",".join(tags) + def hash_product_id(self): """Generate a sha256sum for the PacBio Entity""" diff --git a/tests/test_hashing.py b/tests/test_hashing.py index 67a61c1..874dac2 100644 --- a/tests/test_hashing.py +++ b/tests/test_hashing.py @@ -136,3 +136,27 @@ def test_expected_hashes(): PacBioEntity.parse_raw(json_str, content_type="json").hash_product_id() == expected_hash ) + + +def test_tags_sorted(): + """Test that tags are automatically sorted alphabetically before id generation""" + + pb_entity_1 = PacBioEntity( + run_name="MARATHON", well_label="A1", tags="TCGA,ACGT,TGAC,AACG" + ) + assert pb_entity_1.tags == "AACG,ACGT,TCGA,TGAC" + + pb_entity_2 = PacBioEntity( + run_name="MARATHON", well_label="A1", tags="ACGT,AACG,TGAC,TCGA" + ) + + pb_entity_3 = PacBioEntity.parse_raw( + '{"run_name": "MARATHON", "well_label": "A1", "tags": "TGAC,TCGA,AACG,ACGT"}' + ) + + assert pb_entity_1.tags == pb_entity_2.tags == pb_entity_3.tags + assert ( + pb_entity_3.hash_product_id() + == pb_entity_2.hash_product_id() + == pb_entity_3.hash_product_id() + )