Skip to content

Commit

Permalink
Merge pull request #352 from VariantEffect/feature/bencap/35/api-like…
Browse files Browse the repository at this point in the history
…-target-category-constant

Target Category Constant
  • Loading branch information
bencap authored Nov 12, 2024
2 parents 1621dd0 + 5809ba4 commit e8e1c48
Show file tree
Hide file tree
Showing 11 changed files with 141 additions and 64 deletions.
40 changes: 40 additions & 0 deletions alembic/manual_migrations/migrate_non_api_like_constants.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
import sqlalchemy as sa
from sqlalchemy.orm import Session, configure_mappers

from mavedb.models import *
from mavedb.models.enums.target_category import TargetCategory
from mavedb.models.target_gene import TargetGene

from mavedb.db.session import SessionLocal

configure_mappers()

def api_like_target_gene_category(category: str):
if category == "Protein coding":
return TargetCategory.protein_coding
elif category == "Other noncoding":
return TargetCategory.other_noncoding
elif category == "Regulatory":
return TargetCategory.regulatory
else:
raise ValueError()


def do_migration(db: Session):
target_genes = db.scalars(sa.select(TargetGene)).all()

for target in target_genes:
target.category = api_like_target_gene_category(target.category)
db.add(target)

db.commit()


if __name__ == "__main__":
db = SessionLocal()
db.current_user = None # type: ignore

do_migration(db)

db.commit()
db.close()
54 changes: 54 additions & 0 deletions alembic/versions/03c7124c33e1_target_category_enum.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,54 @@
"""Target category enum
Revision ID: 03c7124c33e1
Revises: 2b6f40ea2fb6
Create Date: 2024-11-01 11:27:03.609116
"""

from alembic import op
import sqlalchemy as sa


# revision identifiers, used by Alembic.
revision = "03c7124c33e1"
down_revision = "2b6f40ea2fb6"
branch_labels = None
depends_on = None


def upgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column(
"target_genes",
"category",
type_=sa.Enum(
"protein_coding",
"other_noncoding",
"regulatory",
name="targetcategory",
native_enum=False,
create_constraint=True,
length=32,
),
)
# ### end Alembic commands ###


def downgrade():
# ### commands auto generated by Alembic - please adjust! ###
op.alter_column(
"target_genes",
"category",
type_=sa.String(),
existing_type=sa.Enum(
"protein_coding",
"other_noncoding",
"regulatory",
name="targetcategory",
native_enum=False,
create_constraint=True,
length=32,
),
)
# ### end Alembic commands ###
1 change: 0 additions & 1 deletion src/mavedb/lib/validation/constants/target.py
Original file line number Diff line number Diff line change
@@ -1,2 +1 @@
valid_categories = ["Protein coding", "Regulatory", "Other noncoding"]
valid_sequence_types = ["infer", "dna", "protein"]
23 changes: 1 addition & 22 deletions src/mavedb/lib/validation/target.py
Original file line number Diff line number Diff line change
@@ -1,31 +1,10 @@
from fqfa import infer_sequence_type
from fqfa.validator import amino_acids_validator, dna_bases_validator

from mavedb.lib.validation.constants.target import valid_categories, valid_sequence_types
from mavedb.lib.validation.constants.target import valid_sequence_types
from mavedb.lib.validation.exceptions import ValidationError


def validate_target_category(category: str):
"""
If the target category provided does not fall within a pre-defined list of valid categories.
Parameters
__________
category: str
The target category to be validated.
Raises
______
ValidationError
If the target category provided is not valid.
"""
if category not in valid_categories:
raise ValidationError(
"{} is not a valid target category. Valid categories are "
"Protein coding, Regulatory, and Other noncoding".format(category)
)


def validate_sequence_category(sequence_type: str):
"""
If the sequence type provided does not fall within a pre-defined list of valid sequence types.
Expand Down
7 changes: 7 additions & 0 deletions src/mavedb/models/enums/target_category.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from enum import Enum


class TargetCategory(str, Enum):
protein_coding = "protein_coding"
regulatory = "regulatory"
other_noncoding = "other_noncoding"
8 changes: 6 additions & 2 deletions src/mavedb/models/target_gene.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from datetime import date
from typing import TYPE_CHECKING

from sqlalchemy import Column, Date, ForeignKey, Integer, String
from sqlalchemy import Column, Date, Enum, ForeignKey, Integer, String
from sqlalchemy.dialects.postgresql import JSONB
from sqlalchemy.orm import Mapped, backref, relationship

from mavedb.db.base import Base
from mavedb.models.enums.target_category import TargetCategory
from mavedb.models.score_set import ScoreSet
from mavedb.models.target_accession import TargetAccession
from mavedb.models.target_sequence import TargetSequence
Expand All @@ -24,7 +25,10 @@ class TargetGene(Base):
id = Column(Integer, primary_key=True)

name = Column(String, nullable=False)
category = Column(String, nullable=False)
category = Column(
Enum(TargetCategory, create_constraint=True, length=32, native_enum=False, validate_strings=True),
nullable=False,
)

score_set_id = Column("scoreset_id", Integer, ForeignKey("scoresets.id"), index=True, nullable=False)
score_set: Mapped[ScoreSet] = relationship(back_populates="target_genes", single_parent=True, uselist=True)
Expand Down
9 changes: 3 additions & 6 deletions src/mavedb/view_models/target_gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
from pydantic import root_validator
from pydantic.utils import GetterDict

from mavedb.lib.validation import target
from mavedb.models.enums.target_category import TargetCategory
from mavedb.view_models import external_gene_identifier_offset
from mavedb.view_models.base.base import BaseModel, validator
from mavedb.view_models.target_accession import SavedTargetAccession, TargetAccession, TargetAccessionCreate
Expand Down Expand Up @@ -40,18 +40,15 @@ class TargetGeneBase(BaseModel):
"""Base class for target gene view models."""

name: str
category: str
category: TargetCategory
external_identifiers: Sequence[external_gene_identifier_offset.ExternalGeneIdentifierOffsetBase]

class Config:
getter_dict: ExternalIdentifiersGetter


class TargetGeneModify(TargetGeneBase):
@validator("category")
def validate_category(cls, v):
target.validate_target_category(v)
return v
pass


class TargetGeneCreate(TargetGeneModify):
Expand Down
12 changes: 6 additions & 6 deletions tests/helpers/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -301,7 +301,7 @@
"target_genes": [
{
"name": "TEST1",
"category": "Protein coding",
"category": "protein_coding",
"external_identifiers": [],
"target_sequence": {
"sequence_type": "dna",
Expand All @@ -327,7 +327,7 @@
"targetGenes": [
{
"name": "TEST1",
"category": "Protein coding",
"category": "protein_coding",
"externalIdentifiers": [],
"targetSequence": {
"sequenceType": "dna",
Expand Down Expand Up @@ -369,7 +369,7 @@
"targetGenes": [
{
"name": "TEST1",
"category": "Protein coding",
"category": "protein_coding",
"externalIdentifiers": [],
"id": 1,
"targetSequence": {
Expand Down Expand Up @@ -413,7 +413,7 @@
"targetGenes": [
{
"name": "TEST2",
"category": "Protein coding",
"category": "protein_coding",
"externalIdentifiers": [],
"targetAccession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE},
}
Expand All @@ -428,7 +428,7 @@
"target_genes": [
{
"name": "TEST2",
"category": "Protein coding",
"category": "protein_coding",
"external_identifiers": [],
"target_accession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE},
}
Expand Down Expand Up @@ -457,7 +457,7 @@
"targetGenes": [
{
"name": "TEST2",
"category": "Protein coding",
"category": "protein_coding",
"externalIdentifiers": [],
"targetAccession": {"accession": VALID_ACCESSION, "assembly": "GRCh37", "gene": VALID_GENE},
}
Expand Down
11 changes: 11 additions & 0 deletions tests/routers/test_score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,17 @@ def test_cannot_create_score_set_without_email(client, setup_router_db):
assert response_data["detail"] in "There must be an email address associated with your account to use this feature."


def test_cannot_create_score_set_with_invalid_target_gene_category(client, setup_router_db):
experiment = create_experiment(client)
score_set_post_payload = deepcopy(TEST_MINIMAL_SEQ_SCORESET)
score_set_post_payload["experimentUrn"] = experiment["urn"]
score_set_post_payload["targetGenes"][0]["category"] = "some_invalid_target_category"
response = client.post("/api/v1/score-sets/", json=score_set_post_payload)
assert response.status_code == 422
response_data = response.json()
assert "value is not a valid enumeration member;" in response_data["detail"][0]["msg"]


def test_get_own_private_score_set(client, setup_router_db):
experiment = create_experiment(client)
score_set = create_seq_score_set(client, experiment["urn"])
Expand Down
18 changes: 2 additions & 16 deletions tests/validation/test_target.py
Original file line number Diff line number Diff line change
@@ -1,22 +1,8 @@
from unittest import TestCase

from mavedb.lib.validation.constants.target import valid_categories, valid_sequence_types
from mavedb.lib.validation.constants.target import valid_sequence_types
from mavedb.lib.validation.exceptions import ValidationError
from mavedb.lib.validation.target import validate_sequence_category, validate_target_category, validate_target_sequence


class TestValidateTargetCategory(TestCase):
def test_valid(self):
for category in valid_categories:
validate_target_category(category)

def test_invalid_category(self):
with self.assertRaises(ValidationError):
validate_target_category("Protein")

def test_invalid_case(self):
with self.assertRaises(ValidationError):
validate_target_category("protein coding")
from mavedb.lib.validation.target import validate_sequence_category, validate_target_sequence


class TestValidateSequenceCategory(TestCase):
Expand Down
22 changes: 11 additions & 11 deletions tests/view_models/test_target_gene.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

def test_create_target_gene_with_sequence():
name = "UBE2I"
category = "Regulatory"
category = "regulatory"
external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}]
target_sequence = {
"sequenceType": "dna",
Expand Down Expand Up @@ -37,12 +37,12 @@ def test_create_target_gene_with_sequence():
target_sequence=target_sequence,
)
assert externalIdentifier.name == "UBE2I"
assert externalIdentifier.category == "Regulatory"
assert externalIdentifier.category == "regulatory"


def test_create_target_gene_with_accession():
name = "BRCA1"
category = "Regulatory"
category = "regulatory"
external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}]
target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1"}
externalIdentifier = TargetGeneCreate(
Expand All @@ -52,7 +52,7 @@ def test_create_target_gene_with_accession():
target_accession=target_accession,
)
assert externalIdentifier.name == "BRCA1"
assert externalIdentifier.category == "Regulatory"
assert externalIdentifier.category == "regulatory"


def test_create_invalid_category():
Expand Down Expand Up @@ -91,14 +91,14 @@ def test_create_invalid_category():
target_sequence=target_sequence,
)
assert (
"invalid name is not a valid target category. Valid categories are Protein coding, Regulatory, and Other"
" noncoding" in str(exc_info.value)
"value is not a valid enumeration member; permitted: 'protein_coding', 'regulatory', 'other_noncoding'"
in str(exc_info.value)
)


def test_create_invalid_sequence_type():
name = "UBE2I"
category = "Regulatory"
category = "regulatory"
external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 0}]
taxonomy = {
"taxId": 9606,
Expand Down Expand Up @@ -136,7 +136,7 @@ def test_create_invalid_sequence_type():

def test_create_not_match_sequence_and_type():
name = "UBE2I"
category = "Regulatory"
category = "regulatory"
external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 0}]
target_sequence = {"sequenceType": "dna", "sequence": "ARCG"}
taxonomy = {
Expand All @@ -163,7 +163,7 @@ def test_create_not_match_sequence_and_type():

def test_create_invalid_sequence():
name = "UBE2I"
category = "Regulatory"
category = "regulatory"
external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 0}]
target_sequence = {"sequenceType": "dna", "sequence": "AOCG%"}
taxonomy = {
Expand All @@ -190,7 +190,7 @@ def test_create_invalid_sequence():

def test_cant_create_target_gene_without_sequence_or_accession():
name = "UBE2I"
category = "Regulatory"
category = "regulatory"
external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}]
with pytest.raises(ValueError) as exc_info:
TargetGeneCreate(
Expand All @@ -204,7 +204,7 @@ def test_cant_create_target_gene_without_sequence_or_accession():

def test_cant_create_target_gene_with_both_sequence_and_accession():
name = "UBE2I"
category = "Regulatory"
category = "regulatory"
external_identifiers = [{"identifier": {"dbName": "Ensembl", "identifier": "ENSG00000103275"}, "offset": 1}]
target_accession = {"accession": "NM_001637.3", "assembly": "GRCh37", "gene": "BRCA1"}
target_sequence = {
Expand Down

0 comments on commit e8e1c48

Please sign in to comment.