From 118e17c3740beed801a8de0e474937b53f0a62eb Mon Sep 17 00:00:00 2001 From: Robert Sidney Cox Date: Tue, 14 May 2024 08:02:24 -0400 Subject: [PATCH] rearrange enumerations --- .../AnVILBioCoreMinimal.schema.json | 163 ------------------ .../linkml/AnVILBioCoreMinimal.linkml.yaml | 49 +++--- .../input/AnVILBioCoreMinimal.schema.json | 16 +- .../python/output/biocore_tdr_schema.json | 2 +- 4 files changed, 34 insertions(+), 196 deletions(-) delete mode 100644 content/json_schema/AnVILBioCoreMinimal.schema.json diff --git a/content/json_schema/AnVILBioCoreMinimal.schema.json b/content/json_schema/AnVILBioCoreMinimal.schema.json deleted file mode 100644 index 0eb402e..0000000 --- a/content/json_schema/AnVILBioCoreMinimal.schema.json +++ /dev/null @@ -1,163 +0,0 @@ -{ - "$defs": { - "AnVILBioSample": { - "additionalProperties": false, - "description": "Contains information about the sample(s) included in the study.", - "properties": { - "anatomical_site": { - "description": "A reference to the site within the organism from which the BioSample was taken.", - "type": "string" - }, - "apriori_cell_type": { - "description": "A priori cell type(s) for the sample, a human assignment of cell type. This should be used when the cell type is known, but not necessarily confirmed by primary experimental data.", - "items": { - "type": "string" - }, - "type": "array" - }, - "biosample_id": { - "type": "string" - }, - "biosample_type": { - "description": "The type of biosample represented by the record. This is a controlled vocabulary term from BioCoreTerms.", - "type": "string" - }, - "disease": { - "description": "A property that identifies a disease or condition has been reported in this entity.", - "type": "string" - }, - "donor_age_at_collection_lower_bound": { - "description": "Lower bound for age of donor at time sample was taken. If any age at collection data is present, must specify a unit as well.", - "type": "number" - }, - "donor_age_at_collection_unit": { - "description": "The units (e.g. years or days) of the Age of the Donor at the point in time that the BioSample was obtained or other representative entity (test, diagnosis, treatment...) was created.", - "type": "string" - }, - "donor_age_at_collection_upper_bound": { - "description": "Upper bound for age of donor at time sample was taken. If any age at collection data is present, must specify a unit as well.", - "type": "number" - }, - "donor_id_fk": { - "$ref": "#/$defs/AnVILDonor", - "description": "This property references the Donor organism from which the BioSample was acquired." - } - }, - "required": [ - "biosample_id" - ], - "title": "AnVILBioSample", - "type": "object" - }, - "AnVILDonor": { - "additionalProperties": false, - "description": "Demographic and phenotypic information about the donor.", - "properties": { - "donor_id": { - "type": "string" - }, - "genetic_ancestry": { - "description": "A property that relects a HumanDonor's reported major contributing ancestral origins based on genetic/genomic data. If you are not sure your values are calculated from the genomic data, use *reported_ethnicity* instead.", - "items": { - "type": "string" - }, - "type": "array" - }, - "human_phenotype": { - "description": "phenotype code from the human phenotype ontology (HPO)", - "type": "string" - }, - "organism_type": { - "description": "A reference to the organism type.", - "type": "string" - }, - "phenotypic_sex": { - "$ref": "#/$defs/PhenotypicSex", - "description": "A reference to the BiologicalSex of the Donor organism. \\\"An organismal quality inhering in a bearer by virtue of the bearer's physical expression of sexual characteristics. [PATO_0001894]\\" - }, - "reported_ethnicity": { - "description": "A property that relects a Human Donor's reported ethnic origins. Note this may contain both Race and Ethnicity information as define by the US Department of Interior (DOI) https://www.doi.gov/pmb/eeo/directives/race-data", - "items": { - "type": "string" - }, - "type": "array" - } - }, - "required": [ - "donor_id" - ], - "title": "AnVILDonor", - "type": "object" - }, - "AnVILFile": { - "additionalProperties": false, - "description": "Information for files associated with the study.", - "properties": { - "biosample_id_fk": { - "$ref": "#/$defs/AnVILBioSample", - "description": "This property references the BioSample from which information in a file was acquired." - }, - "data_modality": { - "description": "Data modality describes the biological nature of the information gathered as the result of an Activity, independent of the technology or methods used to produce the information.", - "items": { - "type": "string" - }, - "type": "array" - }, - "file_format": { - "description": "An indication of the format of an electronic file; include the full file extension including compression extensions.", - "type": "string" - }, - "file_id": { - "type": "string" - }, - "file_md5sum": { - "description": "md5 checksum for the file", - "type": "string" - }, - "file_name": { - "description": "The name of the file.", - "type": "string" - }, - "file_ref": { - "description": "The fully qualified path to the file.", - "type": "string" - }, - "file_size": { - "description": "Property that describes the approximate size of a file in megabytes.", - "type": "integer" - }, - "reference_assembly": { - "description": "A reference to the collection of sequences taken as the standard for a given organism. May be defined by https://www.ncbi.nlm.nih.gov/grc.", - "items": { - "type": "string" - }, - "type": "array" - } - }, - "required": [ - "file_id" - ], - "title": "AnVILFile", - "type": "object" - }, - "PhenotypicSex": { - "description": "", - "enum": [ - "female", - "male", - "intersex", - "prefer not to say" - ], - "title": "PhenotypicSex", - "type": "string" - } - }, - "$id": "https://github.com/DataBiosphere/biocore-data-model/tree/main/content", - "$schema": "https://json-schema.org/draft/2019-09/schema", - "additionalProperties": true, - "metamodel_version": "1.7.0", - "title": "AnVILBioCoreMinimal", - "type": "object", - "version": null -} diff --git a/content/linkml/AnVILBioCoreMinimal.linkml.yaml b/content/linkml/AnVILBioCoreMinimal.linkml.yaml index a8f16b7..1f16f80 100644 --- a/content/linkml/AnVILBioCoreMinimal.linkml.yaml +++ b/content/linkml/AnVILBioCoreMinimal.linkml.yaml @@ -45,8 +45,9 @@ classes: - reference_assembly - biosample_id_fk # this is the foregin key to the biosample table -slots: -# ids are unique identifiers for each entity or explicitly defined foreign key +slots: ## ids are unique identifiers for each entity or explicitly defined foreign key + +# foreign keys donor_id_fk: aliases: hasDonor description: This property references the Donor organism from which the BioSample was acquired. @@ -61,6 +62,8 @@ slots: domain: AnVILFile multivalued: false # assume that a biosample is only from one donor (1-1 relationship) inlined: true + + # primary keys biosample_id: identifier: true # this is the type of primary key range: string @@ -103,12 +106,6 @@ slots: comments: - 'For example: Homo sapiens from NCBITaxon or http://purl.obolibrary.org/obo/NCBITaxon_9606 ' multivalued: false - phenotypic_sex: - aliases: hasPhenotypicSex - description: A reference to the BiologicalSex of the Donor organism. \"An organismal quality inhering in a bearer by virtue of the bearer's physical expression of sexual characteristics. [PATO_0001894]\ - multivalued: false - inlined: true - range: PhenotypicSex reported_ethnicity: aliases: hasReportedEthnicity description: A property that relects a Human Donor's reported ethnic origins. Note this may contain both Race and Ethnicity information as define by the US Department of Interior (DOI) https://www.doi.gov/pmb/eeo/directives/race-data @@ -147,21 +144,29 @@ slots: aliases: usesReferenceAssembly description: A reference to the collection of sequences taken as the standard for a given organism. May be defined by https://www.ncbi.nlm.nih.gov/grc. multivalued: true + + # fields for enumerations human_phenotype: description: phenotype code from the human phenotype ontology (HPO) # range: HumanPhenotypeOntology + phenotypic_sex: + aliases: hasPhenotypicSex + description: A reference to the BiologicalSex of the Donor organism. \"An organismal quality inhering in a bearer by virtue of the bearer's physical expression of sexual characteristics. [PATO_0001894]\ + multivalued: false + inlined: true + # range: PhenotypicSex -enums: - # HumanPhenotypeOntology: - # permissible_values: - # All: - PhenotypicSex: - permissible_values: - female: - meaning: PATO:0000383 - male: - meaning: PATO:0000384 - intersex: - description: between male and female sex - prefer not to say: - description: the individual prefers not to disclose +# enums: +# # HumanPhenotypeOntology: +# # permissible_values: +# # All: +# PhenotypicSex: +# permissible_values: +# female: +# meaning: PATO:0000383 +# male: +# meaning: PATO:0000384 +# intersex: +# description: between male and female sex +# prefer not to say: +# description: the individual prefers not to disclose diff --git a/content/tools/python/input/AnVILBioCoreMinimal.schema.json b/content/tools/python/input/AnVILBioCoreMinimal.schema.json index 1b064a6..efc023d 100644 --- a/content/tools/python/input/AnVILBioCoreMinimal.schema.json +++ b/content/tools/python/input/AnVILBioCoreMinimal.schema.json @@ -64,8 +64,8 @@ "type": "array" }, "human_phenotype": { - "$ref": "#/$defs/HumanPhenotypeOntology", - "description": "phenotype code from the human phenotype ontology (HPO)" + "description": "phenotype code from the human phenotype ontology (HPO)", + "type": "string" }, "organism_type": { "description": "A reference to the organism type.", @@ -93,6 +93,10 @@ "additionalProperties": false, "description": "Information for files associated with the study.", "properties": { + "biosample_id_fk": { + "$ref": "#/$defs/AnVILBioSample", + "description": "This property references the BioSample from which information in a file was acquired." + }, "data_modality": { "description": "Data modality describes the biological nature of the information gathered as the result of an Activity, independent of the technology or methods used to produce the information.", "items": { @@ -136,14 +140,6 @@ ], "title": "AnVILFile", "type": "object" - }, - "HumanPhenotypeOntology": { - "description": "", - "enum": [ - "All" - ], - "title": "HumanPhenotypeOntology", - "type": "string" } }, "$id": "https://github.com/DataBiosphere/biocore-data-model/tree/main/content", diff --git a/content/tools/python/output/biocore_tdr_schema.json b/content/tools/python/output/biocore_tdr_schema.json index c32b309..c5e2212 100644 --- a/content/tools/python/output/biocore_tdr_schema.json +++ b/content/tools/python/output/biocore_tdr_schema.json @@ -1 +1 @@ -{"tables": [{"name": "AnVILBioSample", "columns": [{"name": "anatomical_site", "datatype": "string", "array_of": false, "required": false}, {"name": "apriori_cell_type", "datatype": "string", "array_of": true, "required": false}, {"name": "biosample_id", "datatype": "string", "array_of": false, "required": true}, {"name": "biosample_type", "datatype": "string", "array_of": false, "required": false}, {"name": "disease", "datatype": "string", "array_of": false, "required": false}, {"name": "donor_age_at_collection_lower_bound", "datatype": "numeric", "array_of": false, "required": false}, {"name": "donor_age_at_collection_unit", "datatype": "string", "array_of": false, "required": false}, {"name": "donor_age_at_collection_upper_bound", "datatype": "numeric", "array_of": false, "required": false}, {"name": "donor_id", "datatype": "string", "array_of": false, "required": false}], "primaryKey": ["biosample_id"]}, {"name": "AnVILDonor", "columns": [{"name": "donor_id", "datatype": "string", "array_of": false, "required": true}, {"name": "genetic_ancestry", "datatype": "string", "array_of": true, "required": false}, {"name": "human_phenotype", "datatype": "string", "array_of": false, "required": false}, {"name": "organism_type", "datatype": "string", "array_of": false, "required": false}, {"name": "phenotypic_sex", "datatype": "string", "array_of": false, "required": false}, {"name": "reported_ethnicity", "datatype": "string", "array_of": true, "required": false}], "primaryKey": ["donor_id"]}, {"name": "AnVILFile", "columns": [{"name": "data_modality", "datatype": "string", "array_of": true, "required": false}, {"name": "file_format", "datatype": "string", "array_of": false, "required": false}, {"name": "file_id", "datatype": "string", "array_of": false, "required": true}, {"name": "file_md5sum", "datatype": "string", "array_of": false, "required": false}, {"name": "file_name", "datatype": "string", "array_of": false, "required": false}, {"name": "file_ref", "datatype": "string", "array_of": false, "required": false}, {"name": "file_size", "datatype": "integer", "array_of": false, "required": false}, {"name": "reference_assembly", "datatype": "string", "array_of": true, "required": false}], "primaryKey": ["file_id"]}], "relationships": [{"name": "AnVILBioSample.donor_id_to_AnVILDonor.donor_id", "from": {"table": "AnVILBioSample", "column": "donor_id"}, "to": {"table": "AnVILDonor", "column": "donor_id"}}]} \ No newline at end of file +{"tables": [{"name": "AnVILBioSample", "columns": [{"name": "anatomical_site", "datatype": "string", "array_of": false, "required": false}, {"name": "apriori_cell_type", "datatype": "string", "array_of": true, "required": false}, {"name": "biosample_id", "datatype": "string", "array_of": false, "required": true}, {"name": "biosample_type", "datatype": "string", "array_of": false, "required": false}, {"name": "disease", "datatype": "string", "array_of": false, "required": false}, {"name": "donor_age_at_collection_lower_bound", "datatype": "numeric", "array_of": false, "required": false}, {"name": "donor_age_at_collection_unit", "datatype": "string", "array_of": false, "required": false}, {"name": "donor_age_at_collection_upper_bound", "datatype": "numeric", "array_of": false, "required": false}, {"name": "donor_id", "datatype": "string", "array_of": false, "required": false}], "primaryKey": ["biosample_id"]}, {"name": "AnVILDonor", "columns": [{"name": "donor_id", "datatype": "string", "array_of": false, "required": true}, {"name": "genetic_ancestry", "datatype": "string", "array_of": true, "required": false}, {"name": "human_phenotype", "datatype": "string", "array_of": false, "required": false}, {"name": "organism_type", "datatype": "string", "array_of": false, "required": false}, {"name": "phenotypic_sex", "datatype": "string", "array_of": false, "required": false}, {"name": "reported_ethnicity", "datatype": "string", "array_of": true, "required": false}], "primaryKey": ["donor_id"]}, {"name": "AnVILFile", "columns": [{"name": "biosample_id", "datatype": "string", "array_of": false, "required": false}, {"name": "data_modality", "datatype": "string", "array_of": true, "required": false}, {"name": "file_format", "datatype": "string", "array_of": false, "required": false}, {"name": "file_id", "datatype": "string", "array_of": false, "required": true}, {"name": "file_md5sum", "datatype": "string", "array_of": false, "required": false}, {"name": "file_name", "datatype": "string", "array_of": false, "required": false}, {"name": "file_ref", "datatype": "string", "array_of": false, "required": false}, {"name": "file_size", "datatype": "integer", "array_of": false, "required": false}, {"name": "reference_assembly", "datatype": "string", "array_of": true, "required": false}], "primaryKey": ["file_id"]}], "relationships": [{"name": "AnVILBioSample.donor_id_to_AnVILDonor.donor_id", "from": {"table": "AnVILBioSample", "column": "donor_id"}, "to": {"table": "AnVILDonor", "column": "donor_id"}}, {"name": "AnVILFile.biosample_id_to_AnVILBioSample.biosample_id", "from": {"table": "AnVILFile", "column": "biosample_id"}, "to": {"table": "AnVILBioSample", "column": "biosample_id"}}]} \ No newline at end of file