Skip to content

Commit

Permalink
Add wt_scores; Literal classifications; Explicit label; Ranges as list
Browse files Browse the repository at this point in the history
  • Loading branch information
bencap committed Sep 17, 2024
1 parent ecf22e3 commit 30aa4b9
Show file tree
Hide file tree
Showing 3 changed files with 207 additions and 71 deletions.
78 changes: 50 additions & 28 deletions src/mavedb/view_models/score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,8 +2,8 @@
from __future__ import annotations

from datetime import date
from pydantic import root_validator
from typing import Collection, Dict, Optional, Any, Sequence
from pydantic import root_validator, conlist
from typing import Collection, Dict, Literal, Optional, Any, Sequence

from humps import camelize

Expand Down Expand Up @@ -44,14 +44,19 @@ class ExternalLink(BaseModel):


class ScoreRange(BaseModel):
label: str
description: Optional[str]
classification: Literal["normal", "abnormal"]
# Purposefully vague type hint because of some odd JSON Schema generation behavior.
# Typing this as tuple[Union[float, None], Union[float, None]] will generate an invalid
# jsonschema, and fail all tests that access the schema. This may be fixed in pydantic v2,
# but it is unclear. Even just typing it as Tuple[Any, Any] will generate an invalid schema!
#
# tuple[Union[float, None], Union[float, None]]
range: list[Any]
# but it's unclear. Even just typing it as Tuple[Any, Any] will generate an invalid schema!
range: list[Any] # really: tuple[Union[float, None], Union[float, None]]


class ScoreRanges(BaseModel):
wt_score: float
ranges: conlist(ScoreRange, min_items=1)


class ScoreSetGetter(PublicationIdentifiersGetter):
Expand Down Expand Up @@ -83,7 +88,7 @@ class ScoreSetModify(ScoreSetBase):
secondary_publication_identifiers: Optional[list[PublicationIdentifierCreate]]
doi_identifiers: Optional[list[DoiIdentifierCreate]]
target_genes: list[TargetGeneCreate]
score_ranges: Optional[dict[str, ScoreRange]]
score_ranges: Optional[ScoreRanges]

@validator("title", "short_description", "abstract_text", "method_text")
def validate_field_is_non_empty(cls, v):
Expand Down Expand Up @@ -139,21 +144,23 @@ def at_least_one_target_gene_exists(cls, field_value, values):
return field_value

@validator("score_ranges")
def ranges_are_not_backwards(cls, field_value: dict[str, ScoreRange]):
for k, v in field_value.items():
if len(v.range) != 2:
def ranges_are_not_backwards(cls, field_value: ScoreRanges):
for range_model in field_value.ranges:
if len(range_model.range) != 2:
raise ValidationError("Only a lower and upper bound are allowed.")
if inf_or_float(v.range[0], True) > inf_or_float(v.range[1], False):
if inf_or_float(range_model.range[0], True) > inf_or_float(range_model.range[1], False):
raise ValidationError(
f"The lower bound of the `{k}` score range may not be larger than the upper bound."
f"The lower bound of the `{range_model.label}` score range may not be larger than the upper bound."
)
elif inf_or_float(range_model.range[0], True) == inf_or_float(range_model.range[1], False):
raise ValidationError(
f"The lower and upper bound of the `{range_model.label}` score range may not be the same."
)
elif inf_or_float(v.range[0], True) == inf_or_float(v.range[1], False):
raise ValidationError(f"The lower and upper bound of the `{k}` score range may not be the same.")

return field_value

@validator("score_ranges")
def ranges_do_not_overlap(cls, field_value: dict[str, ScoreRange]):
def ranges_do_not_overlap(cls, field_value: ScoreRanges):
def test_overlap(tp1, tp2) -> bool:
# Always check the tuple with the lowest lower bound. If we do not check
# overlaps in this manner, checking the overlap of (0,1) and (1,2) will
Expand All @@ -172,28 +179,43 @@ def test_overlap(tp1, tp2) -> bool:

return False

for i, (k_test, v_test) in enumerate(field_value.items()):
for k_check, v_check in list(field_value.items())[i + 1 :]:
if test_overlap(v_test.range, v_check.range):
raise ValidationError(f"Score ranges may not overlap; `{k_test}` overlaps with `{k_check}`")
for i, range_test in enumerate(field_value.ranges):
for range_check in list(field_value.ranges)[i + 1 :]:
if test_overlap(range_test.range, range_check.range):
raise ValidationError(
f"Score ranges may not overlap; `{range_test.label}` overlaps with `{range_check.label}`"
)

return field_value

@validator("score_ranges")
def ranges_contain_normal_and_abnormal(cls, field_value: dict[str, ScoreRange]):
ranges = set(field_value.keys())
def ranges_contain_normal_and_abnormal(cls, field_value: ScoreRanges):
ranges = set([range_model.classification for range_model in field_value.ranges])
if not set(default_ranges).issubset(ranges):
raise ValidationError("Both `normal` and `abnormal` ranges must be provided.")

return field_value

@validator("score_ranges")
def description_exists_for_all_ranges(cls, field_value: dict[str, ScoreRange]):
for k, v in field_value.items():
if k not in default_ranges and is_null(v.description):
raise ValidationError(
f"A description must be present for each non-default score range (No description provided for range `{k}`)."
)
def wild_type_score_in_normal_range(cls, field_value: ScoreRanges):
normal_ranges = [
range_model.range for range_model in field_value.ranges if range_model.classification == "normal"
]
for range in normal_ranges:
if field_value.wt_score >= inf_or_float(range[0], lower=True) and field_value.wt_score < inf_or_float(
range[1], lower=False
):
return field_value

raise ValidationError(
f"The provided wild type score of {field_value.wt_score} is not within any of the provided normal ranges. This score should be within a normal range."
)

@validator("score_ranges")
def score_range_labels_must_be_unique(cls, field_value: ScoreRanges):
range_labels = set([range_model.label.strip() for range_model in field_value.ranges])
if len(range_labels) != len(field_value.ranges):
raise ValidationError("Detected repeated labels. Range labels must be unique.")

return field_value

Expand Down Expand Up @@ -312,7 +334,7 @@ class SavedScoreSet(ScoreSetBase):
dataset_columns: Dict
external_links: Dict[str, ExternalLink]
contributors: list[Contributor]
score_ranges: Optional[dict[str, ScoreRange]]
score_ranges: Optional[ScoreRanges]

class Config:
orm_mode = True
Expand Down
29 changes: 22 additions & 7 deletions tests/routers/test_score_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -109,9 +109,17 @@ def test_create_score_set_with_score_range(client, setup_router_db):
score_set.update(
{
"score_ranges": {
"normal": {"range": (-2, 2)},
"abnormal": {"range": (2, None)},
"custom1": {"description": "A user provided custom range", "range": (None, -2)},
"wt_score": 0.5,
"ranges": [
{"label": "range_1", "range": (-2, 2), "classification": "normal"},
{"label": "range_2", "range": (2, None), "classification": "abnormal"},
{
"label": "custom_1",
"range": (None, -2),
"classification": "abnormal",
"description": "A user provided custom range",
},
],
}
}
)
Expand All @@ -133,10 +141,17 @@ def test_create_score_set_with_score_range(client, setup_router_db):
}
)
expected_response["scoreRanges"] = {
# Although the ranges are lists, the jsonschema should apply a min + max length to them
"normal": {"range": [-2, 2]},
"abnormal": {"range": [2, None]},
"custom1": {"description": "A user provided custom range", "range": [None, -2]},
"wtScore": 0.5,
"ranges": [
{"label": "range_1", "range": [-2, 2], "classification": "normal"},
{"label": "range_2", "range": [2, None], "classification": "abnormal"},
{
"label": "custom_1",
"range": [None, -2],
"classification": "abnormal",
"description": "A user provided custom range",
},
],
}

assert sorted(expected_response.keys()) == sorted(response_data.keys())
Expand Down
Loading

0 comments on commit 30aa4b9

Please sign in to comment.