Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: adding expression input schemas #212

Open
wants to merge 4 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
151 changes: 151 additions & 0 deletions pydantic_models/expression_input.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,151 @@
#!/usr/bin/env python
"""Generating json schema for expression data via pydantic models."""

from __future__ import annotations

from enum import Enum
import json
from typing import List, Optional

from pydantic import BaseModel, Extra, Field

class ExpressionAggregated(BaseModel):
"""Expression object for aggregated data."""
assayGroupId: str = Field(
description="Identifier for the assay group.",
)
min: float = Field(
description='Minimum value in the assay group.',
)
q1: float = Field(
description='First quantile of values in the assay group.',
)
q2: float = Field(
description='Median of values in the assay group.',
)
q3: float = Field(
description='third quantile of values in the assay group.',
)
max: float = Field(
description='Maximum expression value in the assay group.',
)

class Config:
extra = Extra.forbid
anystr_strip_whitespace = True

class ExpressionAggregatedSchema(BaseModel):
"""Schema for aggregated expression data."""
geneProductId: str = Field(
description="Identifier of measured gene product, protein or transcript.",
examples=['ENSG00000157764', 'Q9HC10'],
)
unit: str = Field(
description='Unit of the expression value.',
examples=['tpms'],
)
expression: List[ExpressionAggregated]

class Config:
title = 'OpenTargets-gene-expression-aggregated'
extra = Extra.forbid
anystr_strip_whitespace = True

class ExperimentalDesign(BaseModel):
"""Experimental design object."""
assayGroupId: str = Field(
description="Identifier for the assay group.",
)
assayId: str = Field(
description="Identifier for the assay.",
)
assayGroup: str = Field(
description='Group of the assay.',
)
age: str = Field(
description='Age of the organism.',
)
cultivar: str = Field(
description='Cultivar name.',
)
genotype: str = Field(
description='Genotype of the organism.',
)
organismPart: str = Field(
description='Part of the organism.',
)

class Config:
extra = Extra.forbid
anystr_strip_whitespace = True

class ExpressionUnAggregated(BaseModel):
"""Expression object for unaggregated data."""
assayId: str = Field(
description="Identifier for the assay.",
)
value: float = Field(
description='Expression value in the assay.',
)

class Config:
extra = Extra.forbid
anystr_strip_whitespace = True

class ExpressionUnaggregatedSchema(BaseModel):
"""Schema for unaggregated expression data."""
geneProductId: str = Field(
description="Identifier of measured gene product, protein or transcript.",
examples=['ENSG00000157764', 'Q9HC10'],
)
unit: str = Field(
description='Unit of the expression value.',
examples=['tpms'],
)
expression: List[ExpressionUnAggregated]

class Config:
title = 'OpenTargets-gene-expression-unaggregated'
extra = Extra.forbid
anystr_strip_whitespace = True


class StudyMetadataSchema(BaseModel):
"""Schema for expression metadata."""
experimentId: str = Field(
description="Identifier for the experiment.",
)
experimentType: str = Field(
description='Type of the experiment.',
)
species: str = Field(
description='Species name.',
examples=['Sorghum bicolor'],
)
speciesOntURI: str = Field(
description='Species ontology URI.',
examples=['http://purl.obolibrary.org/obo/NCBITaxon_4558'],
)
pubmedIds: List[str] = Field(
description='List of pubmed identifiers.',
examples=['28186631'],
)
provider: str = Field(
description='Provider of the data.',
)
experimentalDesigns: List[ExperimentalDesign]

def main():
with open('expression_aggregated.json', 'wt') as f:
f.write(ExpressionAggregatedSchema.schema_json(indent=2))

with open('expression_unaggregated.json', 'wt') as f:
f.write(ExpressionUnaggregatedSchema.schema_json(indent=2))

with open('expression_study_metadata.json', 'wt') as f:
f.write(StudyMetadataSchema.schema_json(indent=2))



if __name__ == '__main__':
main()
85 changes: 85 additions & 0 deletions schemas/expression_aggregated.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
{
"title": "OpenTargets-gene-expression-aggregated",
"description": "Schema for aggregated expression data.",
"type": "object",
"properties": {
"geneProductId": {
"title": "Geneproductid",
"description": "Identifier of measured gene product, protein or transcript.",
"examples": [
"ENSG00000157764",
"Q9HC10"
],
"type": "string"
},
"unit": {
"title": "Unit",
"description": "Unit of the expression value.",
"examples": [
"tpms"
],
"type": "string"
},
"expression": {
"title": "Expression",
"type": "array",
"items": {
"$ref": "#/definitions/ExpressionAggregated"
}
}
},
"required": [
"geneProductId",
"unit",
"expression"
],
"additionalProperties": false,
"definitions": {
"ExpressionAggregated": {
"title": "ExpressionAggregated",
"description": "Expression object for aggregated data.",
"type": "object",
"properties": {
"assayGroupId": {
"title": "Assaygroupid",
"description": "Identifier for the assay group.",
"type": "string"
},
"min": {
"title": "Min",
"description": "Minimum value in the assay group.",
"type": "number"
},
"q1": {
"title": "Q1",
"description": "First quantile of values in the assay group.",
"type": "number"
},
"q2": {
"title": "Q2",
"description": "Median of values in the assay group.",
"type": "number"
},
"q3": {
"title": "Q3",
"description": "third quantile of values in the assay group.",
"type": "number"
},
"max": {
"title": "Max",
"description": "Maximum expression value in the assay group.",
"type": "number"
}
},
"required": [
"assayGroupId",
"min",
"q1",
"q2",
"q3",
"max"
],
"additionalProperties": false
}
}
}
119 changes: 119 additions & 0 deletions schemas/expression_study_metadata.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,119 @@
{
"title": "StudyMetadataSchema",
"description": "Schema for expression metadata.",
"type": "object",
"properties": {
"experimentId": {
"title": "Experimentid",
"description": "Identifier for the experiment.",
"type": "string"
},
"experimentType": {
"title": "Experimenttype",
"description": "Type of the experiment.",
"type": "string"
},
"species": {
"title": "Species",
"description": "Species name.",
"examples": [
"Sorghum bicolor"
],
"type": "string"
},
"speciesOntURI": {
"title": "Speciesonturi",
"description": "Species ontology URI.",
"examples": [
"http://purl.obolibrary.org/obo/NCBITaxon_4558"
],
"type": "string"
},
"pubmedIds": {
"title": "Pubmedids",
"description": "List of pubmed identifiers.",
"examples": [
"28186631"
],
"type": "array",
"items": {
"type": "string"
}
},
"provider": {
"title": "Provider",
"description": "Provider of the data.",
"type": "string"
},
"experimentalDesigns": {
"title": "Experimentaldesigns",
"type": "array",
"items": {
"$ref": "#/definitions/ExperimentalDesign"
}
}
},
"required": [
"experimentId",
"experimentType",
"species",
"speciesOntURI",
"pubmedIds",
"provider",
"experimentalDesigns"
],
"definitions": {
"ExperimentalDesign": {
"title": "ExperimentalDesign",
"description": "Experimental design object.",
"type": "object",
"properties": {
"assayGroupId": {
"title": "Assaygroupid",
"description": "Identifier for the assay group.",
"type": "string"
},
"assayId": {
"title": "Assayid",
"description": "Identifier for the assay.",
"type": "string"
},
"assayGroup": {
"title": "Assaygroup",
"description": "Group of the assay.",
"type": "string"
},
"age": {
"title": "Age",
"description": "Age of the organism.",
"type": "string"
},
"cultivar": {
"title": "Cultivar",
"description": "Cultivar name.",
"type": "string"
},
"genotype": {
"title": "Genotype",
"description": "Genotype of the organism.",
"type": "string"
},
"organismPart": {
"title": "Organismpart",
"description": "Part of the organism.",
"type": "string"
}
},
"required": [
"assayGroupId",
"assayId",
"assayGroup",
"age",
"cultivar",
"genotype",
"organismPart"
],
"additionalProperties": false
}
}
}
Loading