-
Notifications
You must be signed in to change notification settings - Fork 5
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #10 from wtsi-npg/devel
pull from devel to master to create release 2.0.0
- Loading branch information
Showing
3 changed files
with
28 additions
and
52 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -19,6 +19,7 @@ | |
# @author Michael Kubiak <[email protected]> | ||
|
||
import argparse | ||
|
||
from npg_id_generation.pac_bio import PacBioEntity | ||
|
||
parser = argparse.ArgumentParser( | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,6 +1,9 @@ | ||
# Copyright (c) 2022 Genome Research Ltd. | ||
# Copyright (c) 2022, 2023 Genome Research Ltd. | ||
# | ||
# Author: Adam Blanchet <[email protected]> | ||
# Authors: | ||
# Adam Blanchet <[email protected]> | ||
# Michael Kubiak <[email protected]> | ||
# Marina Gourtovaia <[email protected]> | ||
# | ||
# This file is part of npg_id_generation. | ||
# | ||
|
@@ -18,14 +21,15 @@ | |
# this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
from hashlib import sha256 | ||
|
||
from pydantic import BaseModel, Extra, Field, validator | ||
|
||
|
||
class PacBioEntity(BaseModel, extra=Extra.forbid): | ||
"""A PacBio entity class for ID generation.""" | ||
"""A PacBio class for product ID generation.""" | ||
|
||
# Order these alphabetically, to allow for interoperability with | ||
# a possible Perl API. | ||
# Order the attributes alphabetically, to allow for interoperability | ||
# with a possible Perl API. | ||
# Alternatively the sorting could be achieved with json.dumps()'s | ||
# sort_keys argument. See https://docs.python.org/3/library/json.html#basic-usage | ||
run_name: str = Field(title="Pac Bio run name as in LIMS") | ||
|
@@ -34,8 +38,10 @@ class PacBioEntity(BaseModel, extra=Extra.forbid): | |
default=None, | ||
title="A string representing tag or tags", | ||
description=""" | ||
A string representing a single tag (index) sequence or a comma-separated | ||
list of multiple tags. It is important to order multiple tags consistently. | ||
A string representing a single barcode index sequence (tag) or | ||
a comma-separated list of multiple tags. The order of tags in | ||
the list is meaningful for the purpose of product identification, | ||
therefore it should not be changed by the code of this class. | ||
""", | ||
) | ||
|
||
|
@@ -45,14 +51,6 @@ def attributes_are_non_empty_strings(cls, v): | |
raise ValueError("Cannot be an empty string") | ||
return v | ||
|
||
@validator("tags") | ||
def sort_tags(cls, v): | ||
if v is None: | ||
return v | ||
tags = v.split(",") | ||
tags.sort() | ||
return ",".join(tags) | ||
|
||
def hash_product_id(self): | ||
"""Generate a sha256sum for the PacBio Entity""" | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,26 +1,8 @@ | ||
# Copyright (c) 2022 Genome Research Ltd. | ||
# | ||
# Author: Adam Blanchet <[email protected]> | ||
# | ||
# This file is part of npg_id_generation. | ||
# | ||
# npg_langqc is free software: you can redistribute it and/or modify it under | ||
# the terms of the GNU General Public License as published by the Free Software | ||
# Foundation; either version 3 of the License, or (at your option) any later | ||
# version. | ||
# | ||
# This program is distributed in the hope that it will be useful, but WITHOUT | ||
# ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS | ||
# FOR A PARTICULAR PURPOSE. See the GNU General Public License for more | ||
# details. | ||
# | ||
# You should have received a copy of the GNU General Public License along with | ||
# this program. If not, see <http://www.gnu.org/licenses/>. | ||
|
||
"""Tests checking the hashing behaviour of objects.""" | ||
|
||
import pytest | ||
from pydantic import ValidationError | ||
|
||
from npg_id_generation.pac_bio import PacBioEntity | ||
|
||
|
||
|
@@ -138,25 +120,20 @@ def test_expected_hashes(): | |
) | ||
|
||
|
||
def test_tags_sorted(): | ||
"""Test that tags are automatically sorted alphabetically before id generation""" | ||
|
||
pb_entity_1 = PacBioEntity( | ||
run_name="MARATHON", well_label="A1", tags="TCGA,ACGT,TGAC,AACG" | ||
) | ||
assert pb_entity_1.tags == "AACG,ACGT,TCGA,TGAC" | ||
def test_tags_not_sorted(): | ||
"""Test that tags are not changed prior to id generation""" | ||
|
||
pb_entity_2 = PacBioEntity( | ||
run_name="MARATHON", well_label="A1", tags="ACGT,AACG,TGAC,TCGA" | ||
) | ||
|
||
pb_entity_3 = PacBioEntity.parse_raw( | ||
'{"run_name": "MARATHON", "well_label": "A1", "tags": "TGAC,TCGA,AACG,ACGT"}' | ||
) | ||
run = "MARATHON" | ||
well = "A1" | ||
# Tags in these strings are the same, the difference is | ||
# in the order. | ||
tags_strings = ["TCGA,ACGT,TGAC,AACG", "ACGT,AACG,TGAC,TCGA", "TGAC,TCGA,AACG,ACGT"] | ||
pb_entities = [] | ||
for tag_string in tags_strings: | ||
pb_entities.append(PacBioEntity(run_name=run, well_label=well, tags=tag_string)) | ||
|
||
assert pb_entity_1.tags == pb_entity_2.tags == pb_entity_3.tags | ||
assert ( | ||
pb_entity_3.hash_product_id() | ||
== pb_entity_2.hash_product_id() | ||
== pb_entity_3.hash_product_id() | ||
pb_entities[0].hash_product_id() | ||
!= pb_entities[1].hash_product_id() | ||
!= pb_entities[2].hash_product_id() | ||
) |