diff --git a/alembic/manual_migrations/migrate_non_api_like_constants.py b/alembic/manual_migrations/migrate_non_api_like_constants.py new file mode 100644 index 00000000..e59d5f3c --- /dev/null +++ b/alembic/manual_migrations/migrate_non_api_like_constants.py @@ -0,0 +1,40 @@ +import sqlalchemy as sa +from sqlalchemy.orm import Session, configure_mappers + +from mavedb.models import * +from mavedb.models.enums.target_category import TargetCategory +from mavedb.models.target_gene import TargetGene + +from mavedb.db.session import SessionLocal + +configure_mappers() + +def api_like_target_gene_category(category: str): + if category == "Protein coding": + return TargetCategory.protein_coding + elif category == "Other noncoding": + return TargetCategory.other_noncoding + elif category == "Regulatory": + return TargetCategory.regulatory + else: + raise ValueError() + + +def do_migration(db: Session): + target_genes = db.scalars(sa.select(TargetGene)).all() + + for target in target_genes: + target.category = api_like_target_gene_category(target.category) + db.add(target) + + db.commit() + + +if __name__ == "__main__": + db = SessionLocal() + db.current_user = None # type: ignore + + do_migration(db) + + db.commit() + db.close() diff --git a/alembic/versions/03c7124c33e1_target_category_enum.py b/alembic/versions/03c7124c33e1_target_category_enum.py new file mode 100644 index 00000000..ce9720ac --- /dev/null +++ b/alembic/versions/03c7124c33e1_target_category_enum.py @@ -0,0 +1,54 @@ +"""Target category enum + +Revision ID: 03c7124c33e1 +Revises: 2b6f40ea2fb6 +Create Date: 2024-11-01 11:27:03.609116 + +""" + +from alembic import op +import sqlalchemy as sa + + +# revision identifiers, used by Alembic. +revision = "03c7124c33e1" +down_revision = "2b6f40ea2fb6" +branch_labels = None +depends_on = None + + +def upgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "target_genes", + "category", + type_=sa.Enum( + "protein_coding", + "other_noncoding", + "regulatory", + name="targetcategory", + native_enum=False, + create_constraint=True, + length=32, + ), + ) + # ### end Alembic commands ### + + +def downgrade(): + # ### commands auto generated by Alembic - please adjust! ### + op.alter_column( + "target_genes", + "category", + type_=sa.String(), + existing_type=sa.Enum( + "protein_coding", + "other_noncoding", + "regulatory", + name="targetcategory", + native_enum=False, + create_constraint=True, + length=32, + ), + ) + # ### end Alembic commands ### diff --git a/src/mavedb/lib/validation/constants/target.py b/src/mavedb/lib/validation/constants/target.py index 6bf8392e..f64b4bd4 100644 --- a/src/mavedb/lib/validation/constants/target.py +++ b/src/mavedb/lib/validation/constants/target.py @@ -1,2 +1 @@ -valid_categories = ["Protein coding", "Regulatory", "Other noncoding"] valid_sequence_types = ["infer", "dna", "protein"] diff --git a/src/mavedb/lib/validation/target.py b/src/mavedb/lib/validation/target.py index f22121ac..3d65c7b9 100644 --- a/src/mavedb/lib/validation/target.py +++ b/src/mavedb/lib/validation/target.py @@ -1,31 +1,10 @@ from fqfa import infer_sequence_type from fqfa.validator import amino_acids_validator, dna_bases_validator -from mavedb.lib.validation.constants.target import valid_categories, valid_sequence_types +from mavedb.lib.validation.constants.target import valid_sequence_types from mavedb.lib.validation.exceptions import ValidationError -def validate_target_category(category: str): - """ - If the target category provided does not fall within a pre-defined list of valid categories. - - Parameters - __________ - category: str - The target category to be validated. - - Raises - ______ - ValidationError - If the target category provided is not valid. - """ - if category not in valid_categories: - raise ValidationError( - "{} is not a valid target category. Valid categories are " - "Protein coding, Regulatory, and Other noncoding".format(category) - ) - - def validate_sequence_category(sequence_type: str): """ If the sequence type provided does not fall within a pre-defined list of valid sequence types. diff --git a/src/mavedb/models/enums/target_category.py b/src/mavedb/models/enums/target_category.py new file mode 100644 index 00000000..540ca847 --- /dev/null +++ b/src/mavedb/models/enums/target_category.py @@ -0,0 +1,7 @@ +from enum import Enum + + +class TargetCategory(str, Enum): + protein_coding = "protein_coding" + regulatory = "regulatory" + other_noncoding = "other_noncoding" diff --git a/src/mavedb/models/target_gene.py b/src/mavedb/models/target_gene.py index 36b88f85..19f3ea96 100644 --- a/src/mavedb/models/target_gene.py +++ b/src/mavedb/models/target_gene.py @@ -1,11 +1,12 @@ from datetime import date from typing import TYPE_CHECKING -from sqlalchemy import Column, Date, ForeignKey, Integer, String +from sqlalchemy import Column, Date, Enum, ForeignKey, Integer, String from sqlalchemy.dialects.postgresql import JSONB from sqlalchemy.orm import Mapped, backref, relationship from mavedb.db.base import Base +from mavedb.models.enums.target_category import TargetCategory from mavedb.models.score_set import ScoreSet from mavedb.models.target_accession import TargetAccession from mavedb.models.target_sequence import TargetSequence @@ -24,7 +25,10 @@ class TargetGene(Base): id = Column(Integer, primary_key=True) name = Column(String, nullable=False) - category = Column(String, nullable=False) + category = Column( + Enum(TargetCategory, create_constraint=True, length=32, native_enum=False, validate_strings=True), + nullable=False, + ) score_set_id = Column("scoreset_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=False) score_set: Mapped[ScoreSet] = relationship(back_populates="target_genes", single_parent=True, uselist=True) diff --git a/src/mavedb/view_models/target_gene.py b/src/mavedb/view_models/target_gene.py index c69f659f..51127f99 100644 --- a/src/mavedb/view_models/target_gene.py +++ b/src/mavedb/view_models/target_gene.py @@ -4,7 +4,7 @@ from pydantic import root_validator from pydantic.utils import GetterDict -from mavedb.lib.validation import target +from mavedb.models.enums.target_category import TargetCategory from mavedb.view_models import external_gene_identifier_offset from mavedb.view_models.base.base import BaseModel, validator from mavedb.view_models.target_accession import SavedTargetAccession, TargetAccession, TargetAccessionCreate @@ -40,7 +40,7 @@ class TargetGeneBase(BaseModel): """Base class for target gene view models.""" name: str - category: str + category: TargetCategory external_identifiers: Sequence[external_gene_identifier_offset.ExternalGeneIdentifierOffsetBase] class Config: @@ -48,10 +48,7 @@ class Config: class TargetGeneModify(TargetGeneBase): - @validator("category") - def validate_category(cls, v): - target.validate_target_category(v) - return v + pass class TargetGeneCreate(TargetGeneModify): diff --git a/tests/helpers/constants.py b/tests/helpers/constants.py index 580294b6..75052d43 100644 --- a/tests/helpers/constants.py +++ b/tests/helpers/constants.py @@ -301,7 +301,7 @@ "target_genes": [ { "name": "TEST1", - "category": "Protein coding", + "category": "protein_coding", "external_identifiers": [], "target_sequence": { "sequence_type": "dna", @@ -327,7 +327,7 @@ "targetGenes": [ { "name": "TEST1", - "category": "Protein coding", + "category": "protein_coding", "externalIdentifiers": [], "targetSequence": { "sequenceType": "dna", @@ -369,7 +369,7 @@ "targetGenes": [ { "name": "TEST1", - "category": "Protein coding", + "category": "protein_coding", "externalIdentifiers": [], "id": 1, "targetSequence": { @@ -413,7 +413,7 @@ "targetGenes": [ { "name": "TEST2", - "category": "Protein coding", + "category": "protein_coding", "externalIdentifiers": [], "targetAccession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE}, } @@ -428,7 +428,7 @@ "target_genes": [ { "name": "TEST2", - "category": "Protein coding", + "category": "protein_coding", "external_identifiers": [], "target_accession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE}, } @@ -457,7 +457,7 @@ "targetGenes": [ { "name": "TEST2", - "category": "Protein coding", + "category": "protein_coding", "externalIdentifiers": [], "targetAccession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE}, } diff --git a/tests/routers/test_score_set.py b/tests/routers/test_score_set.py index ad841cfc..25d13b9b 100644 --- a/tests/routers/test_score_set.py +++ b/tests/routers/test_score_set.py @@ -174,6 +174,17 @@ def test_cannot_create_score_set_without_email(client, setup_router_db): assert response_data["detail"] in "There must be an email address associated with your account to use this feature." +def test_cannot_create_score_set_with_invalid_target_gene_category(client, setup_router_db): + experiment = create_experiment(client) + score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET) + score_set_post_payload["experimentUrn"] = experiment["urn"] + score_set_post_payload["targetGenes"][0]["category"] = "some_invalid_target_category" + response = client.post("/api/v1/score-sets/", json=score_set_post_payload) + assert response.status_code == 422 + response_data = response.json() + assert "value is not a valid enumeration member;" in response_data["detail"][0]["msg"] + + def test_get_own_private_score_set(client, setup_router_db): experiment = create_experiment(client) score_set = create_seq_score_set(client, experiment["urn"]) diff --git a/tests/validation/test_target.py b/tests/validation/test_target.py index b1920bed..0ac2db12 100644 --- a/tests/validation/test_target.py +++ b/tests/validation/test_target.py @@ -1,22 +1,8 @@ from unittest import TestCase -from mavedb.lib.validation.constants.target import valid_categories, valid_sequence_types +from mavedb.lib.validation.constants.target import valid_sequence_types from mavedb.lib.validation.exceptions import ValidationError -from mavedb.lib.validation.target import validate_sequence_category, validate_target_category, validate_target_sequence - - -class TestValidateTargetCategory(TestCase): - def test_valid(self): - for category in valid_categories: - validate_target_category(category) - - def test_invalid_category(self): - with self.assertRaises(ValidationError): - validate_target_category("Protein") - - def test_invalid_case(self): - with self.assertRaises(ValidationError): - validate_target_category("protein coding") +from mavedb.lib.validation.target import validate_sequence_category, validate_target_sequence class TestValidateSequenceCategory(TestCase): diff --git a/tests/view_models/test_target_gene.py b/tests/view_models/test_target_gene.py index 564a27fd..13f8b78a 100644 --- a/tests/view_models/test_target_gene.py +++ b/tests/view_models/test_target_gene.py @@ -5,7 +5,7 @@ def test_create_target_gene_with_sequence(): name = "UBE2I" - category = "Regulatory" + category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}] target_sequence = { "sequenceType": "dna", @@ -37,12 +37,12 @@ def test_create_target_gene_with_sequence(): target_sequence=target_sequence, ) assert externalIdentifier.name == "UBE2I" - assert externalIdentifier.category == "Regulatory" + assert externalIdentifier.category == "regulatory" def test_create_target_gene_with_accession(): name = "BRCA1" - category = "Regulatory" + category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}] target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1"} externalIdentifier = TargetGeneCreate( @@ -52,7 +52,7 @@ def test_create_target_gene_with_accession(): target_accession=target_accession, ) assert externalIdentifier.name == "BRCA1" - assert externalIdentifier.category == "Regulatory" + assert externalIdentifier.category == "regulatory" def test_create_invalid_category(): @@ -91,14 +91,14 @@ def test_create_invalid_category(): target_sequence=target_sequence, ) assert ( - "invalid name is not a valid target category. Valid categories are Protein coding, Regulatory, and Other" - " noncoding" in str(exc_info.value) + "value is not a valid enumeration member; permitted: 'protein_coding', 'regulatory', 'other_noncoding'" + in str(exc_info.value) ) def test_create_invalid_sequence_type(): name = "UBE2I" - category = "Regulatory" + category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 0}] taxonomy = { "taxId": 9606, @@ -136,7 +136,7 @@ def test_create_invalid_sequence_type(): def test_create_not_match_sequence_and_type(): name = "UBE2I" - category = "Regulatory" + category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 0}] target_sequence = {"sequenceType": "dna", "sequence": "ARCG"} taxonomy = { @@ -163,7 +163,7 @@ def test_create_not_match_sequence_and_type(): def test_create_invalid_sequence(): name = "UBE2I" - category = "Regulatory" + category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 0}] target_sequence = {"sequenceType": "dna", "sequence": "AOCG%"} taxonomy = { @@ -190,7 +190,7 @@ def test_create_invalid_sequence(): def test_cant_create_target_gene_without_sequence_or_accession(): name = "UBE2I" - category = "Regulatory" + category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}] with pytest.raises(ValueError) as exc_info: TargetGeneCreate( @@ -204,7 +204,7 @@ def test_cant_create_target_gene_without_sequence_or_accession(): def test_cant_create_target_gene_with_both_sequence_and_accession(): name = "UBE2I" - category = "Regulatory" + category = "regulatory" external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}] target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1"} target_sequence = {