Skip to content

Commit

Permalink
Merge pull request #12 from mgcam/improve_tag_validation
Browse files Browse the repository at this point in the history
Added validation for multiple tags.
  • Loading branch information
mksanger authored Apr 27, 2023
2 parents 607d5a2 + ffb5440 commit 8ae6d78
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 9 deletions.
2 changes: 1 addition & 1 deletion npg_id_generation/pac_bio.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ def well_label_conforms_to_pattern(cls, v):

@validator("tags")
def tags_have_correct_characters(cls, v):
if (v is not None) and (not re.match("^[ACGT,]+$", v)):
if (v is not None) and (not re.match("^[ACGT]+(,[ACGT]+)*$", v)):
raise ValueError(
"Tags should be a comma separated list of uppercase DNA sequences"
)
Expand Down
20 changes: 12 additions & 8 deletions tests/test_hashing.py
Original file line number Diff line number Diff line change
Expand Up @@ -108,21 +108,25 @@ def test_well_label_conforms_to_pattern():


def test_tags_have_correct_characters():
bad_tags = ["ABCD", "ACGT.AGTC", " ACGT", "ACGT ", "acgt"]
bad_tags = [
"ABCD",
"ACGT.AGTC",
" ACGT",
"ACGT ",
"acgt",
",",
" ACCTG ",
"ACCTG,",
",ACCTG",
"ACCTG,,GGTAC",
]
for tag in bad_tags:
with pytest.raises(ValidationError) as excinfo:
PacBioEntity(run_name="MARATHON", well_label="A1", tags=tag)
assert (
"Tags should be a comma separated list of uppercase DNA sequences"
in str(excinfo.value)
)
with pytest.raises(ValidationError) as excinfo:
PacBioEntity.parse_raw(
'{"run_name":"MARATHON", "well_label":"A1", "tags":"ABCD"}'
)
assert "Tags should be a comma separated list of uppercase DNA sequences" in str(
excinfo.value
)


def test_expected_hashes():
Expand Down

0 comments on commit 8ae6d78

Please sign in to comment.