From 174939f2ceceb7a0723a32e7cca7a9ca2ec5735e Mon Sep 17 00:00:00 2001 From: DSuveges Date: Wed, 3 Jul 2024 10:31:37 +0100 Subject: [PATCH 1/2] feat: adding expression input schemas --- pydantic_models/expression_input.py | 151 +++++++++++++++++++++++++ schemas/expression_aggregated.json | 85 ++++++++++++++ schemas/expression_study_metadata.json | 119 +++++++++++++++++++ schemas/expression_unaggregated.json | 61 ++++++++++ 4 files changed, 416 insertions(+) create mode 100644 pydantic_models/expression_input.py create mode 100644 schemas/expression_aggregated.json create mode 100644 schemas/expression_study_metadata.json create mode 100644 schemas/expression_unaggregated.json diff --git a/pydantic_models/expression_input.py b/pydantic_models/expression_input.py new file mode 100644 index 0000000..45ea205 --- /dev/null +++ b/pydantic_models/expression_input.py @@ -0,0 +1,151 @@ +#!/usr/bin/env python +"""Generating json schema for expression data via pydantic models.""" + +from __future__ import annotations + +from enum import Enum +import json +from typing import List, Optional + +from pydantic import BaseModel, Extra, Field + +class ExpressionAggregated(BaseModel): + """Expression object for aggregated data.""" + assayGroupId: str = Field( + description="Identifier for the assay group.", + ) + min: float = Field( + description='Minimum value in the assay group.', + ) + q1: float = Field( + description='First quantile of values in the assay group.', + ) + q2: float = Field( + description='Median of values in the assay group.', + ) + q3: float = Field( + description='third quantile of values in the assay group.', + ) + max: float = Field( + description='Maximum expression value in the assay group.', + ) + + class Config: + extra = Extra.forbid + anystr_strip_whitespace = True + +class ExpressionAggregatedSchema(BaseModel): + """Schema for aggregated expression data.""" + geneProductId: str = Field( + description="Identifier of measured gene product, protein or transcript.", + examples=['ENSG00000157764', 'Q9HC10'], + ) + unit: str = Field( + description='Unit of the expression value.', + examples=['tpms'], + ) + expression: List[ExpressionAggregated] + + class Config: + title = 'OpenTargets-gene-expression-aggregated' + extra = Extra.forbid + anystr_strip_whitespace = True + +class ExperimentalDesign(BaseModel): + """Experimental design object.""" + assayGroupId: str = Field( + description="Identifier for the assay group.", + ) + assayId: str = Field( + description="Identifier for the assay.", + ) + assayGroup: str = Field( + description='Group of the assay.', + ) + age: str = Field( + description='Age of the organism.', + ) + cultivar: str = Field( + description='Cultivar name.', + ) + genotype: str = Field( + description='Genotype of the organism.', + ) + organismPart: str = Field( + description='Part of the organism.', + ) + + class Config: + extra = Extra.forbid + anystr_strip_whitespace = True + +class ExpressionUnAggregated(BaseModel): + """Expression object for unaggregated data.""" + assayId: str = Field( + description="Identifier for the assay.", + ) + value: float = Field( + description='Expression value in the assay.', + ) + + class Config: + extra = Extra.forbid + anystr_strip_whitespace = True + +class ExpressionUnaggregatedSchema(BaseModel): + """Schema for unaggregated expression data.""" + geneProductId: str = Field( + description="Identifier of measured gene product, protein or transcript.", + examples=['ENSG00000157764', 'Q9HC10'], + ) + unit: str = Field( + description='Unit of the expression value.', + examples=['tpms'], + ) + expression: List[ExpressionUnAggregated] + + class Config: + title = 'OpenTargets-gene-expression-unaggregated' + extra = Extra.forbid + anystr_strip_whitespace = True + + +class StudyMetadataSchema(BaseModel): + """Schema for expression metadata.""" + experimentId: str = Field( + description="Identifier for the experiment.", + ) + experimentType: str = Field( + description='Type of the experiment.', + ) + species: str = Field( + description='Species name.', + examples=['Sorghum bicolor'], + ) + speciesOntURI: str = Field( + description='Species ontology URI.', + examples=['http://purl.obolibrary.org/obo/NCBITaxon_4558'], + ) + pubmedIds: List[str] = Field( + description='List of pubmed identifiers.', + examples=['28186631'], + ) + provider: str = Field( + description='Provider of the data.', + ) + experimentalDesigns: List[ExperimentalDesign] + +def main(): + with open('expression_aggregated.json', 'wt') as f: + f.write(ExpressionAggregatedSchema.schema_json(indent=2)) + + with open('expression_unaggregated.json', 'wt') as f: + f.write(ExpressionUnaggregatedSchema.schema_json(indent=2)) + + with open('expression_study_metadata.json', 'wt') as f: + f.write(StudyMetadataSchema.schema_json(indent=2)) + + + +if __name__ == '__main__': + main() diff --git a/schemas/expression_aggregated.json b/schemas/expression_aggregated.json new file mode 100644 index 0000000..a42cf3f --- /dev/null +++ b/schemas/expression_aggregated.json @@ -0,0 +1,85 @@ +{ + "title": "OpenTargets-gene-expression-aggregated", + "description": "Schema for aggregated expression data.", + "type": "object", + "properties": { + "geneProductId": { + "title": "Geneproductid", + "description": "Identifier of measured gene product, protein or transcript.", + "examples": [ + "ENSG00000157764", + "Q9HC10" + ], + "type": "string" + }, + "unit": { + "title": "Unit", + "description": "Unit of the expression value.", + "examples": [ + "tpms" + ], + "type": "string" + }, + "expression": { + "title": "Expression", + "type": "array", + "items": { + "$ref": "#/definitions/ExpressionAggregated" + } + } + }, + "required": [ + "geneProductId", + "unit", + "expression" + ], + "additionalProperties": false, + "definitions": { + "ExpressionAggregated": { + "title": "ExpressionAggregated", + "description": "Expression object for aggregated data.", + "type": "object", + "properties": { + "assayGroupId": { + "title": "Assaygroupid", + "description": "Identifier for the assay group.", + "type": "string" + }, + "min": { + "title": "Min", + "description": "Minimum value in the assay group.", + "type": "number" + }, + "q1": { + "title": "Q1", + "description": "First quantile of values in the assay group.", + "type": "number" + }, + "q2": { + "title": "Q2", + "description": "Median of values in the assay group.", + "type": "number" + }, + "q3": { + "title": "Q3", + "description": "third quantile of values in the assay group.", + "type": "number" + }, + "max": { + "title": "Max", + "description": "Maximum expression value in the assay group.", + "type": "number" + } + }, + "required": [ + "assayGroupId", + "min", + "q1", + "q2", + "q3", + "max" + ], + "additionalProperties": false + } + } +} \ No newline at end of file diff --git a/schemas/expression_study_metadata.json b/schemas/expression_study_metadata.json new file mode 100644 index 0000000..99bcd87 --- /dev/null +++ b/schemas/expression_study_metadata.json @@ -0,0 +1,119 @@ +{ + "title": "StudyMetadataSchema", + "description": "Schema for expression metadata.", + "type": "object", + "properties": { + "experimentId": { + "title": "Experimentid", + "description": "Identifier for the experiment.", + "type": "string" + }, + "experimentType": { + "title": "Experimenttype", + "description": "Type of the experiment.", + "type": "string" + }, + "species": { + "title": "Species", + "description": "Species name.", + "examples": [ + "Sorghum bicolor" + ], + "type": "string" + }, + "speciesOntURI": { + "title": "Speciesonturi", + "description": "Species ontology URI.", + "examples": [ + "http://purl.obolibrary.org/obo/NCBITaxon_4558" + ], + "type": "string" + }, + "pubmedIds": { + "title": "Pubmedids", + "description": "List of pubmed identifiers.", + "examples": [ + "28186631" + ], + "type": "array", + "items": { + "type": "string" + } + }, + "provider": { + "title": "Provider", + "description": "Provider of the data.", + "type": "string" + }, + "experimentalDesigns": { + "title": "Experimentaldesigns", + "type": "array", + "items": { + "$ref": "#/definitions/ExperimentalDesign" + } + } + }, + "required": [ + "experimentId", + "experimentType", + "species", + "speciesOntURI", + "pubmedIds", + "provider", + "experimentalDesigns" + ], + "definitions": { + "ExperimentalDesign": { + "title": "ExperimentalDesign", + "description": "Experimental design object.", + "type": "object", + "properties": { + "assayGroupId": { + "title": "Assaygroupid", + "description": "Identifier for the assay group.", + "type": "string" + }, + "assayId": { + "title": "Assayid", + "description": "Identifier for the assay.", + "type": "string" + }, + "assayGroup": { + "title": "Assaygroup", + "description": "Group of the assay.", + "type": "string" + }, + "age": { + "title": "Age", + "description": "Age of the organism.", + "type": "string" + }, + "cultivar": { + "title": "Cultivar", + "description": "Cultivar name.", + "type": "string" + }, + "genotype": { + "title": "Genotype", + "description": "Genotype of the organism.", + "type": "string" + }, + "organismPart": { + "title": "Organismpart", + "description": "Part of the organism.", + "type": "string" + } + }, + "required": [ + "assayGroupId", + "assayId", + "assayGroup", + "age", + "cultivar", + "genotype", + "organismPart" + ], + "additionalProperties": false + } + } +} \ No newline at end of file diff --git a/schemas/expression_unaggregated.json b/schemas/expression_unaggregated.json new file mode 100644 index 0000000..bc888f8 --- /dev/null +++ b/schemas/expression_unaggregated.json @@ -0,0 +1,61 @@ +{ + "title": "OpenTargets-gene-expression-unaggregated", + "description": "Schema for unaggregated expression data.", + "type": "object", + "properties": { + "geneProductId": { + "title": "Geneproductid", + "description": "Identifier of measured gene product, protein or transcript.", + "examples": [ + "ENSG00000157764", + "Q9HC10" + ], + "type": "string" + }, + "unit": { + "title": "Unit", + "description": "Unit of the expression value.", + "examples": [ + "tpms" + ], + "type": "string" + }, + "expression": { + "title": "Expression", + "type": "array", + "items": { + "$ref": "#/definitions/ExpressionUnAggregated" + } + } + }, + "required": [ + "geneProductId", + "unit", + "expression" + ], + "additionalProperties": false, + "definitions": { + "ExpressionUnAggregated": { + "title": "ExpressionUnAggregated", + "description": "Expression object for unaggregated data.", + "type": "object", + "properties": { + "assayId": { + "title": "Assayid", + "description": "Identifier for the assay.", + "type": "string" + }, + "value": { + "title": "Value", + "description": "Expression value in the assay.", + "type": "number" + } + }, + "required": [ + "assayId", + "value" + ], + "additionalProperties": false + } + } +} \ No newline at end of file From 7a9234e09e907bfa0402f651ed571c38c5329604 Mon Sep 17 00:00:00 2001 From: "pre-commit-ci[bot]" <66853113+pre-commit-ci[bot]@users.noreply.github.com> Date: Wed, 6 Nov 2024 12:16:03 +0000 Subject: [PATCH 2/2] [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --- pydantic_models/expression_input.py | 2 +- schemas/expression_aggregated.json | 2 +- schemas/expression_study_metadata.json | 2 +- schemas/expression_unaggregated.json | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/pydantic_models/expression_input.py b/pydantic_models/expression_input.py index 45ea205..c6fe0b5 100644 --- a/pydantic_models/expression_input.py +++ b/pydantic_models/expression_input.py @@ -145,7 +145,7 @@ def main(): with open('expression_study_metadata.json', 'wt') as f: f.write(StudyMetadataSchema.schema_json(indent=2)) - + if __name__ == '__main__': main() diff --git a/schemas/expression_aggregated.json b/schemas/expression_aggregated.json index a42cf3f..77f0a4e 100644 --- a/schemas/expression_aggregated.json +++ b/schemas/expression_aggregated.json @@ -82,4 +82,4 @@ "additionalProperties": false } } -} \ No newline at end of file +} diff --git a/schemas/expression_study_metadata.json b/schemas/expression_study_metadata.json index 99bcd87..4a8e600 100644 --- a/schemas/expression_study_metadata.json +++ b/schemas/expression_study_metadata.json @@ -116,4 +116,4 @@ "additionalProperties": false } } -} \ No newline at end of file +} diff --git a/schemas/expression_unaggregated.json b/schemas/expression_unaggregated.json index bc888f8..313909e 100644 --- a/schemas/expression_unaggregated.json +++ b/schemas/expression_unaggregated.json @@ -58,4 +58,4 @@ "additionalProperties": false } } -} \ No newline at end of file +}