From beda87bf961f2fdc0c958d9b32a92574a0438c17 Mon Sep 17 00:00:00 2001 From: Sid Cox Date: Tue, 12 Mar 2024 15:40:32 -0400 Subject: [PATCH] update minimal schema without imports --- .../AnVILBioCoreMinimal.schema.json | 297 ++---------------- content/linkml/AnVILBioCoreMinimal.yaml | 157 +++++---- 2 files changed, 105 insertions(+), 349 deletions(-) diff --git a/content/json_schema/AnVILBioCoreMinimal.schema.json b/content/json_schema/AnVILBioCoreMinimal.schema.json index 6448fee..10c7733 100644 --- a/content/json_schema/AnVILBioCoreMinimal.schema.json +++ b/content/json_schema/AnVILBioCoreMinimal.schema.json @@ -1,63 +1,15 @@ { "$defs": { - "Activity": { - "additionalProperties": false, - "description": "", - "title": "Activity", - "type": "object" - }, - "ActivityTypes": { - "description": "", - "enum": [ - "SampleCollectionActivity", - "SampleTreatmentActivity", - "SequenceActivity", - "AlignmentActivity", - "VariantCallingActivity", - "ExpressionActivity", - "AnalysisActivity", - "ImagingActivity", - "IndexingActivity", - "ChecksumActivity", - "Activity" - ], - "title": "ActivityTypes", - "type": "string" - }, - "AlignmentActivity": { - "additionalProperties": false, - "description": "", - "title": "AlignmentActivity", - "type": "object" - }, - "AlignmentFile": { - "additionalProperties": false, - "description": "", - "title": "AlignmentFile", - "type": "object" - }, - "AnalysisActivity": { - "additionalProperties": false, - "description": "", - "title": "AnalysisActivity", - "type": "object" - }, - "Antibody": { - "additionalProperties": false, - "description": "", - "title": "Antibody", - "type": "object" - }, "AnvilBioSample": { "additionalProperties": false, - "description": "", + "description": "Contains information about the sample(s) included in the study.", "properties": { "anatomical_site": { "description": "A reference to the site within the organism from which the BioSample was taken.", "type": "string" }, "apriori_cell_type": { - "description": "A priori cell type(s) for the sample, a human assignment of cell type without experimental data.", + "description": "A priori cell type(s) for the sample, a human assignment of cell type.", "items": { "type": "string" }, @@ -67,28 +19,27 @@ "type": "string" }, "biosample_type": { - "items": { - "type": "string" - }, - "type": "array" + "description": "The type of biosample represented by the record.", + "type": "string" }, "disease": { "description": "A property that identifies a disease or condition has been reported in this entity.", - "items": { - "type": "string" - }, - "type": "array" + "type": "string" }, "donor_age_at_collection_lower_bound": { + "description": "Lower bound for age of donor at time sample was taken. If any age at collection data is present, must specify a unit as well.", "type": "number" }, "donor_age_at_collection_unit": { + "description": "The units (e.g. years or days) of the Age of the Donor at the point in time that the BioSample was obtained or other representative entity (test, diagnosis, treatment...) was created.", "type": "string" }, "donor_age_at_collection_upper_bound": { + "description": "Upper bound for age of donor at time sample was taken. If any age at collection data is present, must specify a unit as well.", "type": "number" }, - "donor_id": { + "donor_id_fk": { + "description": "This property references the Donor organism from which the BioSample was acquired.", "type": "string" } }, @@ -100,16 +51,13 @@ }, "AnvilDonor": { "additionalProperties": false, - "description": "", + "description": "Demographic and phenotypic information about the donor.", "properties": { - "diagnosis": { - "description": "A property that identifies a Diagnosis including provenance and the disease or condition diagnosed." - }, "donor_id": { "type": "string" }, "genetic_ancestry": { - "description": "A property that relects a HumanDonor's reported major contributing ancestral origins based on genetic/genomic data.", + "description": "A property that relects a HumanDonor's reported major contributing ancestral origins based on genetic/genomic data.", "items": { "type": "string" }, @@ -120,11 +68,11 @@ "type": "string" }, "phenotypic_sex": { - "description": "A reference to the BiologicalSex of the Donor organism. \\\"An organismal quality inhering in a bearer by virtue of the bearer's physical expression of sexual characteristics. [PATO]\\", + "description": "A reference to the BiologicalSex of the Donor organism. \\\"An organismal quality inhering in a bearer by virtue of the bearer's physical expression of sexual characteristics. [PATO_0001894]\\", "type": "string" }, "reported_ethnicity": { - "description": "A property that relects a HumanDonor's reported ethnic origins.", + "description": "A property that relects a Human Donor's reported ethnic origins.", "items": { "type": "string" }, @@ -139,7 +87,7 @@ }, "AnvilFile": { "additionalProperties": false, - "description": "", + "description": "Information for files associated with the study.", "properties": { "data_modality": { "description": "Data modality describes the biological nature of the information gathered as the result of an Activity, independent of the technology or methods used to produce the information.", @@ -156,20 +104,27 @@ "type": "string" }, "file_md5sum": { - "items": { - "type": "string" - }, - "type": "array" + "description": "md5 checksum for the file", + "type": "string" }, "file_name": { + "description": "The name of the file.", "type": "string" }, "file_ref": { + "description": "The fully qualified path to the file.", "type": "string" }, "file_size": { "description": "Property that describes the approximate size of a file in megabytes.", "type": "integer" + }, + "reference_assembly": { + "description": "A reference to the collection of sequences taken as the standard for a given organism. May be defined by https://www.ncbi.nlm.nih.gov/grc.", + "items": { + "type": "string" + }, + "type": "array" } }, "required": [ @@ -177,207 +132,9 @@ ], "title": "AnvilFile", "type": "object" - }, - "AssayActivity": { - "additionalProperties": false, - "description": "", - "title": "AssayActivity", - "type": "object" - }, - "BioSample": { - "additionalProperties": false, - "description": "", - "title": "BioSample", - "type": "object" - }, - "BioSampleDissociationActivity": { - "additionalProperties": false, - "description": "", - "title": "BioSampleDissociationActivity", - "type": "object" - }, - "DNASequence": { - "additionalProperties": false, - "description": "", - "title": "DNASequence", - "type": "object" - }, - "Diagnosis": { - "additionalProperties": false, - "description": "", - "title": "Diagnosis", - "type": "object" - }, - "DogDonor": { - "additionalProperties": false, - "description": "", - "title": "DogDonor", - "type": "object" - }, - "Donor": { - "additionalProperties": false, - "description": "", - "title": "Donor", - "type": "object" - }, - "DonorTreatmentActivity": { - "additionalProperties": false, - "description": "", - "title": "DonorTreatmentActivity", - "type": "object" - }, - "ExperimentActivity": { - "additionalProperties": false, - "description": "", - "title": "ExperimentActivity", - "type": "object" - }, - "FamilyMember": { - "additionalProperties": false, - "description": "", - "title": "FamilyMember", - "type": "object" - }, - "File": { - "additionalProperties": false, - "description": "", - "title": "File", - "type": "object" - }, - "HistopathologyActivity": { - "additionalProperties": false, - "description": "", - "title": "HistopathologyActivity", - "type": "object" - }, - "HumanDonor": { - "additionalProperties": false, - "description": "", - "title": "HumanDonor", - "type": "object" - }, - "Library": { - "additionalProperties": false, - "description": "", - "title": "Library", - "type": "object" - }, - "LibraryPreparationActivity": { - "additionalProperties": false, - "description": "", - "title": "LibraryPreparationActivity", - "type": "object" - }, - "Measurement": { - "additionalProperties": false, - "description": "", - "title": "Measurement", - "type": "object" - }, - "MolecularSample": { - "additionalProperties": false, - "description": "", - "title": "MolecularSample", - "type": "object" - }, - "MouseDonor": { - "additionalProperties": false, - "description": "", - "title": "MouseDonor", - "type": "object" - }, - "NucleusIsolationActivity": { - "additionalProperties": false, - "description": "", - "title": "NucleusIsolationActivity", - "type": "object" - }, - "Project": { - "additionalProperties": false, - "description": "", - "title": "Project", - "type": "object" - }, - "ReferenceAssembly": { - "additionalProperties": false, - "description": "", - "title": "ReferenceAssembly", - "type": "object" - }, - "RelationalTable": { - "additionalProperties": false, - "description": "", - "title": "RelationalTable", - "type": "object" - }, - "Sample": { - "additionalProperties": false, - "description": "", - "title": "Sample", - "type": "object" - }, - "SampleTreatmentActivity": { - "additionalProperties": false, - "description": "", - "title": "SampleTreatmentActivity", - "type": "object" - }, - "SequenceFile": { - "additionalProperties": false, - "description": "", - "title": "SequenceFile", - "type": "object" - }, - "SequenceLocation": { - "additionalProperties": false, - "description": "", - "title": "SequenceLocation", - "type": "object" - }, - "SequencingActivity": { - "additionalProperties": false, - "description": "", - "title": "SequencingActivity", - "type": "object" - }, - "SingleCell": { - "additionalProperties": false, - "description": "", - "title": "SingleCell", - "type": "object" - }, - "SingleCellAssignment": { - "additionalProperties": false, - "description": "", - "title": "SingleCellAssignment", - "type": "object" - }, - "SingleCellIsolationActivity": { - "additionalProperties": false, - "description": "", - "title": "SingleCellIsolationActivity", - "type": "object" - }, - "VariantCall": { - "additionalProperties": false, - "description": "", - "title": "VariantCall", - "type": "object" - }, - "VariantCallSetFile": { - "additionalProperties": false, - "description": "", - "title": "VariantCallSetFile", - "type": "object" - }, - "VariantCallingActivity": { - "additionalProperties": false, - "description": "", - "title": "VariantCallingActivity", - "type": "object" } }, - "$id": "https://datamodel.terra.bio/BioCore", + "$id": "https://docs.google.com/spreadsheets/d/1kOWpQV7pIUXcFx5jGgx75qnNI5-g_c2D/edit#gid=1482408180", "$schema": "https://json-schema.org/draft/2019-09/schema", "additionalProperties": true, "metamodel_version": "1.7.0", diff --git a/content/linkml/AnVILBioCoreMinimal.yaml b/content/linkml/AnVILBioCoreMinimal.yaml index 5647006..1a32d18 100644 --- a/content/linkml/AnVILBioCoreMinimal.yaml +++ b/content/linkml/AnVILBioCoreMinimal.yaml @@ -1,19 +1,18 @@ name: AnVILBioCoreMinimal description: AnVIL minimal BioCore schema -id: https://datamodel.terra.bio/BioCore +id: https://docs.google.com/spreadsheets/d/1kOWpQV7pIUXcFx5jGgx75qnNI5-g_c2D/edit#gid=1482408180 prefixes: linkml: https://w3id.org/linkml/ anvil: https://anvilproject.org/ default_prefix: anvil imports: - - linkml:types # this imports the linkml types schema - - BioCoreFields # The BioCore set of standard fields (i.e. columns) - - BioCoreTables # The BioCore set of standard tables (classes) + - linkml:types # this imports the linkml types schema +# - BioCoreFields # The BioCore set of standard fields (i.e. columns) default_range: string # if not specified, the default range is string classes: AnvilBioSample: - is_a: BioSample + description: Contains information about the sample(s) included in the study. slots: - biosample_id - anatomical_site @@ -23,16 +22,17 @@ classes: - donor_age_at_collection_unit - donor_age_at_collection_lower_bound - donor_age_at_collection_upper_bound - - donor_id + - donor_id_fk # this is the foregin key to the donor table AnvilDonor: + description: Demographic and phenotypic information about the donor. slots: - donor_id - - diagnosis - organism_type - phenotypic_sex - reported_ethnicity - genetic_ancestry AnvilFile: + description: Information for files associated with the study. slots: - file_id - data_modality @@ -41,99 +41,98 @@ classes: - file_md5sum # say that it is_a: checksum - file_name - file_ref + - reference_assembly slots: - donor: +# ids are unique identifiers for each entity or explicitly defined foreign key + donor_id_fk: aliases: hasDonor description: This property references the Donor organism from which the BioSample was acquired. # inverse: donated slot_uri: https://datamodel.terra.bio/TerraCore#hasDonor range: AnvilDonor domain: AnvilBioSample - # multivalued: true - diagnosis: - aliases: hasDiagnosis - description: A property that identifies a Diagnosis including provenance and the disease or condition diagnosed. - slot_uri: https://datamodel.terra.bio/TerraCore#hasDiagnosis - range: AnvilDiagnosis - domain: AnvilDonor - # multivalued: true + multivalued: false # assume that a biosample is only from one donor (1-1 relationship) id: - identifier: true # this is the primary identifier for the class + identifier: true # this is the type of primary key range: uriorcurie - xref_id: # external identifier for a donor - multivalued: true - recommended: true # example of making a slot recommended, does NOT serialize to JSON schema! - description: - other identifiers for the person, e.g. ORCID, etc - activity_id: - is_a: id - used_file_id: - # multivalued: true - range: AnvilFile - generated_file_id: - # multivalued: true - range: AnvilFile - used_biosample_id: - # multivalued: true - range: AnvilBioSample - source_datarepo_row_ids: - multivalued: true - alignmentactivity_id: + biosample_id: is_a: id - antibody_id: + donor_id: is_a: id - assayactivity_id: + diagnosis_id: is_a: id - biosample_id: + file_id: is_a: id + + anatomical_site: + aliases: hasAnatomicalSite + description: A reference to the site within the organism from which the BioSample was taken. + multivalued: false + apriori_cell_type: + aliases: hasAprioriCellType + description: A priori cell type(s) for the sample, a human assignment of cell type. + multivalued: true + biosample_type: + description: The type of biosample represented by the record. + multivalued: false + disease: + aliases: hasDisease + description: A property that identifies a disease or condition has been reported in this entity. + multivalued: false donor_age_at_collection_unit: + description: The units (e.g. years or days) of the Age of the Donor at the point in time that the BioSample was obtained or other representative entity (test, diagnosis, treatment...) was created. donor_age_at_collection_lower_bound: + description: Lower bound for age of donor at time sample was taken. If any age at collection data is present, must specify a unit as well. range: float donor_age_at_collection_upper_bound: + description: Upper bound for age of donor at time sample was taken. If any age at collection data is present, must specify a unit as well. range: float - donor_id: - is_a: id - part_of_dataset_id: - multivalued: true - dataset_id: - is_a: id - consent_group: + organism_type: + aliases: hasOrganismType + description: A reference to the organism type. + comments: + - 'For example: Homo sapiens from NCBITaxon or http://purl.obolibrary.org/obo/NCBITaxon_9606 ' + multivalued: false + phenotypic_sex: + aliases: hasPhenotypicSex + description: A reference to the BiologicalSex of the Donor organism. \"An organismal quality inhering in a bearer by virtue of the bearer's physical expression of sexual characteristics. [PATO_0001894]\ + multivalued: false + reported_ethnicity: + aliases: hasReportedEthnicity + description: A property that relects a Human Donor's reported ethnic origins. + comments: + - Recommend using HANCESTRO ancestry categories. http://purl.obolibrary.org/obo/HANCESTRO_0004. multivalued: true - data_use_permission: + genetic_ancestry: + aliases: hasGeneticAncestry + description: A property that relects a HumanDonor's reported major contributing ancestral origins based on genetic/genomic data. + comments: + - Recommend using HANCESTRO ancestry categories. http://purl.obolibrary.org/obo/HANCESTRO_0004 multivalued: true - owner: + data_modality: + aliases: hasDataModality + description: Data modality describes the biological nature of the information gathered as the result of an Activity, independent of the technology or methods used to produce the information. multivalued: true - principal_investigator: - multivalued: true - registered_identifier: - multivalued: true - title: - diagnosis_id: - is_a: id - diagnosis_age_unit: - diagnosis_age_lower_bound: - range: float - diagnosis_age_upper_bound: - range: float - onset_age_unit: - onset_age_lower_bound: - range: float - onset_age_upper_bound: - range: float - file_id: - is_a: id - file_md5sum: - is_a: checksum file_name: + description: The name of the file. file_ref: - is_supplementary: - range: boolean - project_id: - is_a: id - generated_dataset_id: - multivalued: true - sequencingactivity_id: - is_a: id - variantcallingactivity_id: - is_a: id \ No newline at end of file + description: The fully qualified path to the file. + file_format: + aliases: hasFileFormat + description: An indication of the format of an electronic file; include the full file extension including compression extensions. + comments: + - The definition of this field follows the convention used by the Human Cell Atlas. + multivalued: false + file_size: + aliases: hasFileSize + description: Property that describes the approximate size of a file in megabytes. + multivalued: false + range: integer + file_md5sum: + description: md5 checksum for the file + multivalued: false + reference_assembly: + aliases: usesReferenceAssembly + description: A reference to the collection of sequences taken as the standard for a given organism. May be defined by https://www.ncbi.nlm.nih.gov/grc. + multivalued: true \ No newline at end of file