From 347f2a166ec376270356781b9194c2d90e0b1dcd Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Tue, 13 Aug 2024 15:46:49 +0100 Subject: [PATCH 01/23] schema updates hdr v3 and gwdm v2 --- docs/GWDM/2.0.form.json | 228 ++++++++++- docs/GWDM/2.0.md | 185 ++++++++- docs/GWDM/2.0.structure.json | 246 +++++++++++- docs/HDRUK/3.0.0.form.json | 258 +++++++++++-- docs/HDRUK/3.0.0.md | 207 +++++++++- docs/HDRUK/3.0.0.structure.json | 262 ++++++++++++- hdr_schemata/definitions/HDRUK/Age.py | 33 ++ hdr_schemata/definitions/HDRUK/Assay.py | 21 + hdr_schemata/definitions/HDRUK/DatasetType.py | 4 +- hdr_schemata/definitions/HDRUK/Disease.py | 13 + hdr_schemata/definitions/HDRUK/Ethnicity.py | 26 ++ .../HDRUK/GenderAssignedAtBirth.py | 10 + hdr_schemata/definitions/HDRUK/Platform.py | 12 + hdr_schemata/definitions/HDRUK/Purpose.py | 2 +- .../HDRUK/StatisticalPopulationConstrained.py | 4 +- hdr_schemata/definitions/HDRUK/__init__.py | 6 + hdr_schemata/models/GWDM/2.0/schema.json | 331 +++++++++++++++- hdr_schemata/models/GWDM/v2_0/Coverage.py | 2 +- .../models/GWDM/v2_0/DemographicFrequency.py | 30 ++ hdr_schemata/models/GWDM/v2_0/Observations.py | 2 +- hdr_schemata/models/GWDM/v2_0/Omics.py | 27 ++ hdr_schemata/models/GWDM/v2_0/__init__.py | 14 + .../models/GWDM/v2_0/annotations/config.yaml | 28 +- hdr_schemata/models/HDRUK/3.0.0/schema.json | 363 +++++++++++++++++- hdr_schemata/models/HDRUK/v3_0_0/Access.py | 2 +- .../HDRUK/v3_0_0/DemographicFrequency.py | 30 ++ .../models/HDRUK/v3_0_0/Observations.py | 4 +- hdr_schemata/models/HDRUK/v3_0_0/Omics.py | 27 ++ .../models/HDRUK/v3_0_0/Organisation.py | 2 +- hdr_schemata/models/HDRUK/v3_0_0/Origin.py | 4 +- hdr_schemata/models/HDRUK/v3_0_0/Summary.py | 4 +- hdr_schemata/models/HDRUK/v3_0_0/__init__.py | 14 + .../HDRUK/v3_0_0/annotations/config.yaml | 34 +- 33 files changed, 2329 insertions(+), 106 deletions(-) create mode 100644 hdr_schemata/definitions/HDRUK/Age.py create mode 100644 hdr_schemata/definitions/HDRUK/Assay.py create mode 100644 hdr_schemata/definitions/HDRUK/Disease.py create mode 100644 hdr_schemata/definitions/HDRUK/Ethnicity.py create mode 100644 hdr_schemata/definitions/HDRUK/GenderAssignedAtBirth.py create mode 100644 hdr_schemata/definitions/HDRUK/Platform.py create mode 100644 hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py create mode 100644 hdr_schemata/models/GWDM/v2_0/Omics.py create mode 100644 hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py create mode 100644 hdr_schemata/models/HDRUK/v3_0_0/Omics.py diff --git a/docs/GWDM/2.0.form.json b/docs/GWDM/2.0.form.json index 66bab33..1c79457 100644 --- a/docs/GWDM/2.0.form.json +++ b/docs/GWDM/2.0.form.json @@ -371,7 +371,7 @@ null ] }, - "location": "coverage.followup" + "location": "coverage.followUp" }, { "required": false, @@ -1064,7 +1064,7 @@ "required": true, "title": "Statistical Population", "description": "Please select one of the following statistical populations for you observation", - "guidance": "- **Persons**: Unique persons recorded in the dataset\\n- **Events**: Unique events such as procedures and prescriptions within the dataset\\n-**Findings**: Unique findings included in the dataset such as diagnoses'\\n-**Number of scans per modality**: Unique scans for a specified imaging method modality (e.g. 12 x-rays)", + "guidance": "", "examples": [ "PERSONS" ], @@ -1073,13 +1073,13 @@ "types": { "type": "string", "options": [ - "Person", - "Event", + "Persons", + "Events", "Findings", "Number of scans per modality" ] }, - "location": "observations.observedNode" + "location": "observations.measuredNode" }, { "required": true, @@ -1422,6 +1422,224 @@ "type": "string" }, "location": "tissuesSampleCollection.tissueSampleMetadata.useRestrictions" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "0-6 days", + "7-27 days", + "1-11 months", + "1-4 years", + "5-9 years", + "10-14 years", + "15-19 years", + "20-14 years", + "25-29 years", + "30-34 years", + "35-39 years", + "40-44 years", + "45-49 years", + "50-54 years", + "55-59 years", + "60-64 years", + "65-69 years", + "70-74 years", + "75-79 years", + "80-84 years", + "85-89 years", + "90-94 years", + "65-99 years", + "100+ years" + ] + }, + "location": "demographicFrequency.age.bin" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "demographicFrequency.age.count" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "White - British", + "White - Irish", + "White - Any other White background", + "Mixed - White and Black Caribbean", + "Mixed - White and Black African", + "Mixed - White and Asian", + "Mixed - Any other mixed background", + "Asian or Asian British - Indian", + "Asian or Asian British - Pakistani", + "Asian or Asian British - Bangladeshi", + "Asian or Asian British - Any other Asian background", + "Black or Black British - Caribbean", + "Black or Black British - African", + "Black or Black British - Any other Black background", + "Other Ethnic Groups - Chinese", + "Other Ethnic Groups - Any other ethnic group", + "Not stated", + "Not known" + ] + }, + "location": "demographicFrequency.ethnicity.bin" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "demographicFrequency.ethnicity.count" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "male", + "female" + ] + }, + "location": "demographicFrequency.genderAssignedAtBirth.bin" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "demographicFrequency.genderAssignedAtBirth.count" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "str", + "location": "demographicFrequency.disease.diseaseCode" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "ICD10", + "SNOMED CT", + "MeSH" + ] + }, + "location": "demographicFrequency.disease.diseaseCodeVocabulary" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "demographicFrequency.disease.count" + }, + { + "required": true, + "title": "Omics assay", + "description": "The specific 'omics assay that generated the dataset.", + "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "NMR spectroscopy", + "mass-spectrometry", + "whole genome sequencing", + "exome sequencing", + "genotyping by array", + "transcriptome profiling by high-throughput sequencing", + "transcriptome profiling by array", + "amplicon sequencing", + "methylation binding domain sequencing", + "methylation profiling by high-throughput sequencing", + "genomic variant calling", + "chromatin accessibility profiling by high-throughput sequencing", + "histone modification profiling by high-throughput sequencing", + "chromatin immunoprecipitation sequencing", + "whole genome shotgun sequencing", + "whole transcriptome sequencing", + "targeted mutation analysis" + ] + }, + "location": "omics.assay" + }, + { + "required": true, + "title": "Omics Platform", + "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Other", + "NMR Nightingale", + "Metabolon", + "Biocrates", + "Illumina", + "Oxford Nanopore", + "454", + "Hi-C", + "HiFi" + ] + }, + "location": "omics.platform" } ] } \ No newline at end of file diff --git a/docs/GWDM/2.0.md b/docs/GWDM/2.0.md index 5973821..d4d665c 100644 --- a/docs/GWDM/2.0.md +++ b/docs/GWDM/2.0.md @@ -330,7 +330,7 @@ Please indicate if the dataset is representative of the patient pathway and any -### followup +### followUp If known, what is the typical time span that a patient appears in the dataset (follow up period) @@ -937,13 +937,13 @@ Multiple observations about the dataset may be provided and users are expected t -### observedNode +### measuredNode Please select one of the following statistical populations for you observation -| title | guidance | is_list | required | type | -|:-----------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------------------------------------------------------------------------------------| -| Statistical Population | - **Persons**: Unique persons recorded in the dataset
- **Events**: Unique events such as procedures and prescriptions within the dataset
-**Findings**: Unique findings included in the dataset such as diagnoses'
-**Number of scans per modality**: Unique scans for a specified imaging method modality (e.g. 12 x-rays) | False | True | ["StatisticalPopulationConstrainedV2['Person','Event','Findings','Number of scans per modality']"] | +| title | guidance | is_list | required | type | +|:-----------------------|:-----------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------| +| Statistical Population | | False | True | ["StatisticalPopulationConstrainedV2['Persons','Events','Findings','Number of scans per modality']"] | Examples: @@ -1377,3 +1377,178 @@ Restrictions on the use of the tissue sample + +## demographicFrequency + +An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes. + + + + + + +### age + +Array of age bins and their corresponding counts. + + + + + + +#### bin + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| | | False | True | ["AgeBin['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']"] | + + + + +#### count + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------| +| | | False | True | ['int'] | + + + + +### ethnicity + +Array of ethnicity bins and their corresponding counts. + + + + + + +#### bin + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| | | False | True | ["EthnicityBin['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']"] | + + + + +#### count + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------| +| | | False | True | ['int'] | + + + + +### genderAssignedAtBirth + +Gender assigned at birth, male or female, and their corresponding counts. + + + + + + +#### bin + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:-------------------------------| +| | | False | True | ["GenderBin['male','female']"] | + + + + +#### count + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------| +| | | False | True | ['int'] | + + + + +### disease + +Array of diseases and their corresponding counts. + + + + + + +#### diseaseCode + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:---------------| +| | | False | True | ['str', 'int'] | + + + + +#### diseaseCodeVocabulary + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:------------------------------------------------------| +| | | False | True | ["DiseaseCodeVocabulary['ICD10','SNOMED CT','MeSH']"] | + + + + +#### count + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------| +| | | False | True | ['int'] | + + + + +## omics + +Omics + + + + + + +### assay + +The specific 'omics assay that generated the dataset. + +| title | guidance | is_list | required | type | +|:------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Omics assay | The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry. | False | True | ["Assay['NMR spectroscopy','mass-spectrometry','whole genome sequencing','exome sequencing','genotyping by array','transcriptome profiling by high-throughput sequencing','transcriptome profiling by array','amplicon sequencing','methylation binding domain sequencing','methylation profiling by high-throughput sequencing','genomic variant calling','chromatin accessibility profiling by high-throughput sequencing','histone modification profiling by high-throughput sequencing','chromatin immunoprecipitation sequencing','whole genome shotgun sequencing','whole transcriptome sequencing','targeted mutation analysis']", 'null'] | + + + + +### platform + +The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf. + +| title | guidance | is_list | required | type | +|:---------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------------| +| Omics Platform | The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf. | False | True | ["Platform['Other','NMR Nightingale','Metabolon','Biocrates','Illumina','Oxford Nanopore','454','Hi-C','HiFi']", 'null'] | + + + diff --git a/docs/GWDM/2.0.structure.json b/docs/GWDM/2.0.structure.json index 61f3cb8..e558dd8 100644 --- a/docs/GWDM/2.0.structure.json +++ b/docs/GWDM/2.0.structure.json @@ -437,7 +437,7 @@ "subItems": [] }, { - "name": "followup", + "name": "followUp", "required": false, "title": "Followup", "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", @@ -1277,16 +1277,16 @@ "is_optional": true, "subItems": [ { - "name": "observedNode", + "name": "measuredNode", "required": true, "title": "Statistical Population", "description": "Please select one of the following statistical populations for you observation", - "guidance": "Persons: Unique persons recorded in the dataset- Events: Unique events such as procedures and prescriptions within the dataset-Findings: Unique findings included in the dataset such as diagnoses'-Number of scans per modality: Unique scans for a specified imaging method modality (e.g. 12 x-rays)", + "guidance": "", "examples": [ "PERSONS" ], "type": [ - "StatisticalPopulationConstrainedV2['Person','Event','Findings','Number of scans per modality']" + "StatisticalPopulationConstrainedV2['Persons','Events','Findings','Number of scans per modality']" ], "is_list": false, "is_optional": false @@ -1875,5 +1875,243 @@ ] } ] + }, + { + "name": "demographicFrequency", + "required": false, + "title": "Demographic frequency", + "description": "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes.", + "guidance": "", + "examples": null, + "type": [ + "DemographicFrequency", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "age", + "required": true, + "title": "Age", + "description": "Array of age bins and their corresponding counts.", + "guidance": "", + "examples": null, + "type": [ + "Age" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "bin", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "AgeBin['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "count", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + } + ] + }, + { + "name": "ethnicity", + "required": true, + "title": "Ethnicity", + "description": "Array of ethnicity bins and their corresponding counts.", + "guidance": "", + "examples": null, + "type": [ + "Ethnicity" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "bin", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "EthnicityBin['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "count", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + } + ] + }, + { + "name": "genderAssignedAtBirth", + "required": true, + "title": "Gender assigned at birth", + "description": "Gender assigned at birth, male or female, and their corresponding counts.", + "guidance": "", + "examples": null, + "type": [ + "GenderAssignedAtBirth" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "bin", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "GenderBin['male','female']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "count", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + } + ] + }, + { + "name": "disease", + "required": true, + "title": "Disease", + "description": "Array of diseases and their corresponding counts.", + "guidance": "", + "examples": null, + "type": [ + "Disease" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "diseaseCode", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "str", + "int" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "diseaseCodeVocabulary", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "DiseaseCodeVocabulary['ICD10','SNOMED CT','MeSH']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "count", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + } + ] + } + ] + }, + { + "name": "omics", + "required": false, + "title": "Omics", + "description": "Omics", + "guidance": "", + "examples": null, + "type": [ + "Omics", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "assay", + "required": true, + "title": "Omics assay", + "description": "The specific 'omics assay that generated the dataset.", + "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", + "examples": null, + "type": [ + "Assay['NMR spectroscopy','mass-spectrometry','whole genome sequencing','exome sequencing','genotyping by array','transcriptome profiling by high-throughput sequencing','transcriptome profiling by array','amplicon sequencing','methylation binding domain sequencing','methylation profiling by high-throughput sequencing','genomic variant calling','chromatin accessibility profiling by high-throughput sequencing','histone modification profiling by high-throughput sequencing','chromatin immunoprecipitation sequencing','whole genome shotgun sequencing','whole transcriptome sequencing','targeted mutation analysis']", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "platform", + "required": true, + "title": "Omics Platform", + "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "examples": null, + "type": [ + "Platform['Other','NMR Nightingale','Metabolon','Biocrates','Illumina','Oxford Nanopore','454','Hi-C','HiFi']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] } ] \ No newline at end of file diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 0da9cea..3c2f695 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -139,13 +139,8 @@ "examples": null, "is_list": false, "is_optional": true, - "types": { - "title": "Url", - "format": "uri", - "minLength": 1, - "type": "string" - }, - "location": "summary.dataProvider.identifier" + "types": "int", + "location": "summary.dataCustodian.identifier" }, { "required": true, @@ -161,7 +156,7 @@ "title": "OneHundredFiftyCharacters", "type": "string" }, - "location": "summary.dataProvider.name" + "location": "summary.dataCustodian.name" }, { "required": false, @@ -177,7 +172,7 @@ "minLength": 1, "type": "string" }, - "location": "summary.dataProvider.logo" + "location": "summary.dataCustodian.logo" }, { "required": false, @@ -193,7 +188,7 @@ "minLength": 2, "type": "string" }, - "location": "summary.dataProvider.description" + "location": "summary.dataCustodian.description" }, { "required": true, @@ -208,7 +203,7 @@ "format": "email", "type": "string" }, - "location": "summary.dataProvider.contactPoint" + "location": "summary.dataCustodian.contactPoint" }, { "required": false, @@ -227,7 +222,7 @@ "NCS" ] }, - "location": "summary.dataProvider.memberOf" + "location": "summary.dataCustodian.memberOf" }, { "required": true, @@ -483,7 +478,7 @@ null ] }, - "location": "coverage.followup" + "location": "coverage.followUp" }, { "required": false, @@ -538,7 +533,7 @@ "Audit", "Administrative", "Financial", - "Statuatory", + "Statutory", "Other", null ] @@ -551,7 +546,7 @@ "description": "The topic areas to which the dataset content relates.", "guidance": "Types include those listed below. Datasets can have more than one type associated.\\n- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.\\n- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.\\n- **Measurements/Tests**: Includes any data related to laboratory or other diagnostics.\\n- **Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.\\n- **Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.\\n- **Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics.\\n- **Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.\\n- **Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.\\n- **Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.\\n- **Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.\\n- **Information and communication**: Includes any data related to the study or application of information and communication.\\n- **Politics**: Includes any data related to political views, activities, voting, etc.", "examples": null, - "is_list": false, + "is_list": true, "is_optional": false, "types": { "type": "string", @@ -578,7 +573,7 @@ "description": "The sub-type of the dataset content. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType ('proteomics', 'transcriptomics', 'epigenomics', 'metabolomics', 'metagenomics', 'genomics', 'lipidomics') is selected", "guidance": "Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.\\n- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.\\n- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.\\n- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.\\n- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.\\n- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.\\n- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.\\n- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.\\n- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.\\n- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.\\n- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.\\n- *Information and communication**: Includes any data related to the study or application of information and communication.\\n- *Politics**: Includes any data related to political views, activities, voting, etc.", "examples": null, - "is_list": false, + "is_list": true, "is_optional": true, "types": { "type": "string", @@ -623,6 +618,7 @@ "Multiomics", "Metagenomics", "Genomics", + "Lipidomics", "Education", "Crime and Justice", "Ethnicity", @@ -1000,11 +996,11 @@ "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", "guidance": "A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/", "examples": null, - "is_list": false, + "is_list": true, "is_optional": true, "types": { - "title": "CommaSeparatedValues", - "pattern": "([^,]+)", + "pattern": "^[A-Z]{2}(-[A-Z]{2,3})?$", + "title": "Isocountrycode", "type": "string" }, "location": "accessibility.access.jurisdiction" @@ -1624,13 +1620,13 @@ "types": { "type": "string", "options": [ - "Person", - "Event", + "Persons", + "Events", "Findings", "Number of scans per modality" ] }, - "location": "observations.observedNode" + "location": "observations.measuredNode" }, { "required": true, @@ -1682,6 +1678,224 @@ "title": "MeasuredProperty" }, "location": "observations.measuredProperty" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "0-6 days", + "7-27 days", + "1-11 months", + "1-4 years", + "5-9 years", + "10-14 years", + "15-19 years", + "20-14 years", + "25-29 years", + "30-34 years", + "35-39 years", + "40-44 years", + "45-49 years", + "50-54 years", + "55-59 years", + "60-64 years", + "65-69 years", + "70-74 years", + "75-79 years", + "80-84 years", + "85-89 years", + "90-94 years", + "65-99 years", + "100+ years" + ] + }, + "location": "demographicFrequency.age.bin" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "demographicFrequency.age.count" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "White - British", + "White - Irish", + "White - Any other White background", + "Mixed - White and Black Caribbean", + "Mixed - White and Black African", + "Mixed - White and Asian", + "Mixed - Any other mixed background", + "Asian or Asian British - Indian", + "Asian or Asian British - Pakistani", + "Asian or Asian British - Bangladeshi", + "Asian or Asian British - Any other Asian background", + "Black or Black British - Caribbean", + "Black or Black British - African", + "Black or Black British - Any other Black background", + "Other Ethnic Groups - Chinese", + "Other Ethnic Groups - Any other ethnic group", + "Not stated", + "Not known" + ] + }, + "location": "demographicFrequency.ethnicity.bin" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "demographicFrequency.ethnicity.count" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "male", + "female" + ] + }, + "location": "demographicFrequency.genderAssignedAtBirth.bin" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "demographicFrequency.genderAssignedAtBirth.count" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "str", + "location": "demographicFrequency.disease.diseaseCode" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": { + "type": "string", + "options": [ + "ICD10", + "SNOMED CT", + "MeSH" + ] + }, + "location": "demographicFrequency.disease.diseaseCodeVocabulary" + }, + { + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "is_list": false, + "is_optional": false, + "types": "int", + "location": "demographicFrequency.disease.count" + }, + { + "required": true, + "title": "Omics assay", + "description": "The specific 'omics assay that generated the dataset.", + "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "NMR spectroscopy", + "mass-spectrometry", + "whole genome sequencing", + "exome sequencing", + "genotyping by array", + "transcriptome profiling by high-throughput sequencing", + "transcriptome profiling by array", + "amplicon sequencing", + "methylation binding domain sequencing", + "methylation profiling by high-throughput sequencing", + "genomic variant calling", + "chromatin accessibility profiling by high-throughput sequencing", + "histone modification profiling by high-throughput sequencing", + "chromatin immunoprecipitation sequencing", + "whole genome shotgun sequencing", + "whole transcriptome sequencing", + "targeted mutation analysis" + ] + }, + "location": "omics.assay" + }, + { + "required": true, + "title": "Omics Platform", + "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "examples": null, + "is_list": false, + "is_optional": true, + "types": { + "type": "string", + "options": [ + "Other", + "NMR Nightingale", + "Metabolon", + "Biocrates", + "Illumina", + "Oxford Nanopore", + "454", + "Hi-C", + "HiFi" + ] + }, + "location": "omics.platform" } ] } \ No newline at end of file diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index 72049c7..b9d819e 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -119,7 +119,7 @@ Examples: * CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web® including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice. -### dataProvider +### dataCustodian This is the organisation responsible for running or supporting the data access request process, as well as enquiries about a dataset. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank. @@ -132,9 +132,9 @@ This is the organisation responsible for running or supporting the data access r Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation. -| title | guidance | is_list | required | type | -|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| -| identifier | **Example**: https://ror.org/053fq8t95
If your organisation does not have a ROR identifier please use the “suggest and institute” function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| title | guidance | is_list | required | type | +|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------| +| identifier | **Example**: https://ror.org/053fq8t95
If your organisation does not have a ROR identifier please use the “suggest and institute” function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform | False | False | ['int', 'null'] | @@ -376,7 +376,7 @@ The type of biospecimen saved from a biological entity. -### followup +### followUp If known, what is the typical time span that a patient appears in the dataset (follow up period). In a prospective cohort study, after baseline information is collected, participants are followed “longitudinally” i.e. new information is collected about them for a period of time afterward. This is known as the “follow up period”. What is the typical time span of follow up, e.g. 1 year, 5 years? If there are multiple cohorts in the dataset with varying follow up periods, please provide the longest follow up period. @@ -431,9 +431,9 @@ Coverage by origin (geographical and situations). Please indicate the purpose(s) that the dataset was collected. -| title | guidance | is_list | required | type | -|:------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------| -| Purpose of dataset collection | - **Research cohort**: Data collected for a defined group of people.
- **Study**: Data collected for a specific research study.
- **Disease registry**: Data collected as part of a disease registry.
- **Trial**: Data collected for as part of a clinical trial.
- **Care**: Data collected as part of routine clinical care.
- **Audit**: Data collected as part of an audit programme.
- **Administrative**: Data collected for administrative and management information purposes.
- **Financial**: Data collected either for payments or for billing.
- **Statutory**: Data collected in compliance with statutory requirements.
- **Other**: Data collected for other purpose. | True | False | ["PurposeV2['Research cohort','Study','Disease registry','Trial','Care','Audit','Administrative','Financial','Statuatory','Other',null]"] | +| title | guidance | is_list | required | type | +|:------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------| +| Purpose of dataset collection | - **Research cohort**: Data collected for a defined group of people.
- **Study**: Data collected for a specific research study.
- **Disease registry**: Data collected as part of a disease registry.
- **Trial**: Data collected for as part of a clinical trial.
- **Care**: Data collected as part of routine clinical care.
- **Audit**: Data collected as part of an audit programme.
- **Administrative**: Data collected for administrative and management information purposes.
- **Financial**: Data collected either for payments or for billing.
- **Statutory**: Data collected in compliance with statutory requirements.
- **Other**: Data collected for other purpose. | True | False | ["PurposeV2['Research cohort','Study','Disease registry','Trial','Care','Audit','Administrative','Financial','Statutory','Other',null]"] | @@ -444,7 +444,7 @@ The topic areas to which the dataset content relates. | title | guidance | is_list | required | type | |:-------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Dataset type | Types include those listed below. Datasets can have more than one type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- **Measurements/Tests**: Includes any data related to laboratory or other diagnostics.
- **Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- **Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- **Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics.
- **Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- **Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- **Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- **Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- **Information and communication**: Includes any data related to the study or application of information and communication.
- **Politics**: Includes any data related to political views, activities, voting, etc. | False | True | ["DatasetTypeV2['Health and disease','Treatments/Interventions','Measurements/Tests','Imaging types','Imaging area of the body','Omics','Socioeconomic','Lifestyle','Registry','Environment and energy','Information and communication','Politics']"] | +| Dataset type | Types include those listed below. Datasets can have more than one type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- **Measurements/Tests**: Includes any data related to laboratory or other diagnostics.
- **Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- **Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- **Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics.
- **Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- **Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- **Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- **Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- **Information and communication**: Includes any data related to the study or application of information and communication.
- **Politics**: Includes any data related to political views, activities, voting, etc. | True | True | ["DatasetTypeV2['Health and disease','Treatments/Interventions','Measurements/Tests','Imaging types','Imaging area of the body','Omics','Socioeconomic','Lifestyle','Registry','Environment and energy','Information and communication','Politics']"] | @@ -453,9 +453,9 @@ The topic areas to which the dataset content relates. The sub-type of the dataset content. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType ('proteomics', 'transcriptomics', 'epigenomics', 'metabolomics', 'metagenomics', 'genomics', 'lipidomics') is selected -| title | guidance | is_list | required | type | -|:-----------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Dataset sub-type | Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.
- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.
- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- *Information and communication**: Includes any data related to the study or application of information and communication.
- *Politics**: Includes any data related to political views, activities, voting, etc. | False | True | ["DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Multiomics','Metagenomics','Genomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']", 'null'] | +| title | guidance | is_list | required | type | +|:-----------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset sub-type | Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.
- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.
- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- *Information and communication**: Includes any data related to the study or application of information and communication.
- *Politics**: Includes any data related to political views, activities, voting, etc. | True | True | ["DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Multiomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']"] | @@ -1023,13 +1023,13 @@ This section provides an overview of observations of your dataset linked to spec -### observedNode +### measuredNode Please select one of the following broad notes for your measured observation. Indicating whether the measured property is a recording of unique persons, events, findings or scans per modality. -| title | guidance | is_list | required | type | -|:-----------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------------------------------------------------------------------------------------| -| Dataset volume measure | - **Persons**: Unique persons recorded in the dataset
- **Events**: Unique events such as procedures and prescriptions within the dataset
-**Findings**: Unique findings included in the dataset such as diagnoses'
-**Number of scans per modality**: Unique scans for a specified imaging method modality (e.g. 12 x-rays) | False | True | ["StatisticalPopulationConstrainedV2['Person','Event','Findings','Number of scans per modality']"] | +| title | guidance | is_list | required | type | +|:-----------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------| +| Dataset volume measure | - **Persons**: Unique persons recorded in the dataset
- **Events**: Unique events such as procedures and prescriptions within the dataset
-**Findings**: Unique findings included in the dataset such as diagnoses'
-**Number of scans per modality**: Unique scans for a specified imaging method modality (e.g. 12 x-rays) | False | True | ["StatisticalPopulationConstrainedV2['Persons','Events','Findings','Number of scans per modality']"] | Examples: @@ -1225,3 +1225,178 @@ Links to locations of information and or raw downloads of synthetic data associa + +## demographicFrequency + +An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes. + + + + + + +### age + +Array of age bins and their corresponding counts. + + + + + + +#### bin + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| | | False | True | ["AgeBin['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']"] | + + + + +#### count + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------| +| | | False | True | ['int'] | + + + + +### ethnicity + +Array of ethnicity bins and their corresponding counts. + + + + + + +#### bin + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| | | False | True | ["EthnicityBin['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']"] | + + + + +#### count + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------| +| | | False | True | ['int'] | + + + + +### genderAssignedAtBirth + +Gender assigned at birth, male or female, and their corresponding counts. + + + + + + +#### bin + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:-------------------------------| +| | | False | True | ["GenderBin['male','female']"] | + + + + +#### count + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------| +| | | False | True | ['int'] | + + + + +### disease + +Array of diseases and their corresponding counts. + + + + + + +#### diseaseCode + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:---------------| +| | | False | True | ['str', 'int'] | + + + + +#### diseaseCodeVocabulary + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:------------------------------------------------------| +| | | False | True | ["DiseaseCodeVocabulary['ICD10','SNOMED CT','MeSH']"] | + + + + +#### count + +None + +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:--------| +| | | False | True | ['int'] | + + + + +## omics + +Omics + + + + + + +### assay + +The specific 'omics assay that generated the dataset. + +| title | guidance | is_list | required | type | +|:------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Omics assay | The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry. | False | True | ["Assay['NMR spectroscopy','mass-spectrometry','whole genome sequencing','exome sequencing','genotyping by array','transcriptome profiling by high-throughput sequencing','transcriptome profiling by array','amplicon sequencing','methylation binding domain sequencing','methylation profiling by high-throughput sequencing','genomic variant calling','chromatin accessibility profiling by high-throughput sequencing','histone modification profiling by high-throughput sequencing','chromatin immunoprecipitation sequencing','whole genome shotgun sequencing','whole transcriptome sequencing','targeted mutation analysis']", 'null'] | + + + + +### platform + +The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf. + +| title | guidance | is_list | required | type | +|:---------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------------| +| Omics Platform | The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf. | False | True | ["Platform['Other','NMR Nightingale','Metabolon','Biocrates','Illumina','Oxford Nanopore','454','Hi-C','HiFi']", 'null'] | + + + diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index c0b4df8..c33186a 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -155,7 +155,7 @@ "subItems": [] }, { - "name": "dataProvider", + "name": "dataCustodian", "required": true, "title": "Dataset Custodian", "description": "This is the organisation responsible for running or supporting the data access request process, as well as enquiries about a dataset. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", @@ -175,12 +175,11 @@ "guidance": "Example: https://ror.org/053fq8t95If your organisation does not have a ROR identifier please use the \u201csuggest and institute\u201d function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform", "examples": null, "type": [ - "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "int", "null" ], "is_list": false, - "is_optional": true, - "subItems": [] + "is_optional": true }, { "name": "name", @@ -498,7 +497,7 @@ "is_optional": true }, { - "name": "followup", + "name": "followUp", "required": false, "title": "Follow-up", "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period). In a prospective cohort study, after baseline information is collected, participants are followed \u201clongitudinally\u201d i.e. new information is collected about them for a period of time afterward. This is known as the \u201cfollow up period\u201d. What is the typical time span of follow up, e.g. 1 year, 5 years? If there are multiple cohorts in the dataset with varying follow up periods, please provide the longest follow up period.", @@ -577,7 +576,7 @@ "guidance": "Research cohort: Data collected for a defined group of people.- Study: Data collected for a specific research study.- Disease registry: Data collected as part of a disease registry.- Trial: Data collected for as part of a clinical trial.- Care: Data collected as part of routine clinical care.- Audit: Data collected as part of an audit programme.- Administrative: Data collected for administrative and management information purposes.- Financial: Data collected either for payments or for billing.- Statutory: Data collected in compliance with statutory requirements.- Other: Data collected for other purpose.", "examples": null, "type": [ - "PurposeV2['Research cohort','Study','Disease registry','Trial','Care','Audit','Administrative','Financial','Statuatory','Other',null]" + "PurposeV2['Research cohort','Study','Disease registry','Trial','Care','Audit','Administrative','Financial','Statutory','Other',null]" ], "is_list": true, "is_optional": true @@ -592,7 +591,7 @@ "type": [ "DatasetTypeV2['Health and disease','Treatments/Interventions','Measurements/Tests','Imaging types','Imaging area of the body','Omics','Socioeconomic','Lifestyle','Registry','Environment and energy','Information and communication','Politics']" ], - "is_list": false, + "is_list": true, "is_optional": false }, { @@ -603,10 +602,9 @@ "guidance": "Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.- Health and disease: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.- Treatments/Interventions: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.- Measurements/Tests- Includes any data related to laboratory or other diagnostics.- Imaging types: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.- *Imaging area of the body: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.- Omics: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.- Socioeconomic: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.- *Lifestyle: Includes any data related to smoking, physical activity, dietary habits or alcohol.- Registry: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.- Environment and energy: Includes any data related to the monitoring or study of environmental or energy factors or events.- *Information and communication: Includes any data related to the study or application of information and communication.- Politics*: Includes any data related to political views, activities, voting, etc.", "examples": null, "type": [ - "DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Multiomics','Metagenomics','Genomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']", - "null" + "DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Multiomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']" ], - "is_list": false, + "is_list": true, "is_optional": true }, { @@ -927,7 +925,7 @@ "List", "null" ], - "is_list": false, + "is_list": true, "is_optional": true, "subItems": [] }, @@ -1378,7 +1376,7 @@ "is_optional": false, "subItems": [ { - "name": "observedNode", + "name": "measuredNode", "required": true, "title": "Dataset volume measure", "description": "Please select one of the following broad notes for your measured observation. Indicating whether the measured property is a recording of unique persons, events, findings or scans per modality.", @@ -1387,7 +1385,7 @@ "Persons" ], "type": [ - "StatisticalPopulationConstrainedV2['Person','Event','Findings','Number of scans per modality']" + "StatisticalPopulationConstrainedV2['Persons','Events','Findings','Number of scans per modality']" ], "is_list": false, "is_optional": false @@ -1648,5 +1646,243 @@ "subItems": [] } ] + }, + { + "name": "demographicFrequency", + "required": false, + "title": "Demographic frequency", + "description": "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes.", + "guidance": "", + "examples": null, + "type": [ + "DemographicFrequency", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "age", + "required": true, + "title": "Age", + "description": "Array of age bins and their corresponding counts.", + "guidance": "", + "examples": null, + "type": [ + "Age" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "bin", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "AgeBin['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "count", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + } + ] + }, + { + "name": "ethnicity", + "required": true, + "title": "Ethnicity", + "description": "Array of ethnicity bins and their corresponding counts.", + "guidance": "", + "examples": null, + "type": [ + "Ethnicity" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "bin", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "EthnicityBin['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "count", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + } + ] + }, + { + "name": "genderAssignedAtBirth", + "required": true, + "title": "Gender assigned at birth", + "description": "Gender assigned at birth, male or female, and their corresponding counts.", + "guidance": "", + "examples": null, + "type": [ + "GenderAssignedAtBirth" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "bin", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "GenderBin['male','female']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "count", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + } + ] + }, + { + "name": "disease", + "required": true, + "title": "Disease", + "description": "Array of diseases and their corresponding counts.", + "guidance": "", + "examples": null, + "type": [ + "Disease" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "diseaseCode", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "str", + "int" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "diseaseCodeVocabulary", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "DiseaseCodeVocabulary['ICD10','SNOMED CT','MeSH']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "count", + "required": true, + "title": null, + "description": null, + "guidance": "", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + } + ] + } + ] + }, + { + "name": "omics", + "required": false, + "title": "Omics", + "description": "Omics", + "guidance": "", + "examples": null, + "type": [ + "Omics", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "assay", + "required": true, + "title": "Omics assay", + "description": "The specific 'omics assay that generated the dataset.", + "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", + "examples": null, + "type": [ + "Assay['NMR spectroscopy','mass-spectrometry','whole genome sequencing','exome sequencing','genotyping by array','transcriptome profiling by high-throughput sequencing','transcriptome profiling by array','amplicon sequencing','methylation binding domain sequencing','methylation profiling by high-throughput sequencing','genomic variant calling','chromatin accessibility profiling by high-throughput sequencing','histone modification profiling by high-throughput sequencing','chromatin immunoprecipitation sequencing','whole genome shotgun sequencing','whole transcriptome sequencing','targeted mutation analysis']", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "platform", + "required": true, + "title": "Omics Platform", + "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "examples": null, + "type": [ + "Platform['Other','NMR Nightingale','Metabolon','Biocrates','Illumina','Oxford Nanopore','454','Hi-C','HiFi']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] } ] \ No newline at end of file diff --git a/hdr_schemata/definitions/HDRUK/Age.py b/hdr_schemata/definitions/HDRUK/Age.py new file mode 100644 index 0000000..8c47b91 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/Age.py @@ -0,0 +1,33 @@ +from pydantic import BaseModel +from enum import Enum + +class AgeBin(Enum): + DAYS_0_6 = '0-6 days' + DAYS_7_27 = '7-27 days' + MONTHS_1_11 = '1-11 months' + YEARS_1_4 = '1-4 years' + YEARS_5_9 = '5-9 years' + YEARS_10_14 = '10-14 years' + YEARS_15_19 = '15-19 years' + YEARS_20_14 = '20-14 years' + YEARS_25_29 = '25-29 years' + YEARS_30_34 = '30-34 years' + YEARS_35_39 = '35-39 years' + YEARS_40_44 = '40-44 years' + YEARS_45_49 = '45-49 years' + YEARS_50_54 = '50-54 years' + YEARS_55_59 = '55-59 years' + YEARS_60_64 = '60-64 years' + YEARS_65_69 = '65-69 years' + YEARS_70_74 = '70-74 years' + YEARS_75_79 = '75-79 years' + YEARS_80_84 = '80-84 years' + YEARS_85_89 = '85-89 years' + YEARS_90_94 = '90-94 years' + YEARS_65_99 = '65-99 years' + YEARS_100_PLUS = '100+ years' + +class Age(BaseModel): + bin: AgeBin + count: int + diff --git a/hdr_schemata/definitions/HDRUK/Assay.py b/hdr_schemata/definitions/HDRUK/Assay.py new file mode 100644 index 0000000..d6bf24b --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/Assay.py @@ -0,0 +1,21 @@ +from enum import Enum + +class Assay(Enum): + NMR_SPECTROSCOPY = 'NMR spectroscopy' + MASS_SPECTROMETRY = 'mass-spectrometry' + WHOLE_GENOME_SEQUENCING = 'whole genome sequencing' + EXOME_SEQUENCING = 'exome sequencing' + GENOTYPING_BY_ARRAY = 'genotyping by array' + TRANSCRIPTOME_PROFILING_BY_HIGH_THROUGHPUT_SEQUENCING = 'transcriptome profiling by high-throughput sequencing' + TRANSCRIPTOME_PROFILING_BY_ARRAY = 'transcriptome profiling by array' + AMPLICON_SEQUENCING = 'amplicon sequencing' + METHYLATION_BINDING_DOMAIN_SEQUENCING = 'methylation binding domain sequencing' + METHYLATION_PROFILING_BY_HIGH_THROUGHPUT_SEQUENCING = 'methylation profiling by high-throughput sequencing' + GENOMIC_VARIANT_CALLING = 'genomic variant calling' + CHROMATIN_ACCESSIBILITY_PROFILING_BY_HIGH_THROUGHPUT_SEQUENCING = 'chromatin accessibility profiling by high-throughput sequencing' + HISTONE_MODIFICATION_PROFILING_BY_HIGH_THROUGHPUT_SEQUENCING = 'histone modification profiling by high-throughput sequencing' + CHROMATIN_IMMUNOPRECIPITATION_SEQUENCING = 'chromatin immunoprecipitation sequencing' + WHOLE_GENOME_SHOTGUN_SEQUENCING = 'whole genome shotgun sequencing' + WHOLE_TRANSCRIPTOME_SEQUENCING = 'whole transcriptome sequencing' + TARGETED_MUTATION_ANALYSIS = 'targeted mutation analysis' + diff --git a/hdr_schemata/definitions/HDRUK/DatasetType.py b/hdr_schemata/definitions/HDRUK/DatasetType.py index eefb7ad..2438873 100644 --- a/hdr_schemata/definitions/HDRUK/DatasetType.py +++ b/hdr_schemata/definitions/HDRUK/DatasetType.py @@ -65,6 +65,7 @@ class DatasetSubType(Enum): MULTIOMICS = 'Multiomics' METAGENOMICS = 'Metagenomics' GENOMICS = 'Genomics' + LIPIDOMICS = 'Lipidomics' # OTHERS = 'Others' EDUCATION = 'Education' CRIME_AND_JUSTICE = 'Crime and Justice' @@ -90,4 +91,5 @@ class DatasetSubType(Enum): NATIONAL_DISEASE_REGISTRIES_AND_AUDITS = 'National Disease Registries and Audits' BIRTHS_AND_DEATHS = 'Births and Deaths' # OTHERS = 'Others' - NOT_APPLICABLE = 'Not applicable' \ No newline at end of file + NOT_APPLICABLE = 'Not applicable' + \ No newline at end of file diff --git a/hdr_schemata/definitions/HDRUK/Disease.py b/hdr_schemata/definitions/HDRUK/Disease.py new file mode 100644 index 0000000..2374c04 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/Disease.py @@ -0,0 +1,13 @@ +from pydantic import BaseModel +from enum import Enum +from typing import Union + +class DiseaseCodeVocabulary(Enum): + ICD10 = 'ICD10' + SNOMED_CT = 'SNOMED CT' + MESH = 'MeSH' + +class Disease(BaseModel): + diseaseCode: Union[str, int] + diseaseCodeVocabulary: DiseaseCodeVocabulary + count: int diff --git a/hdr_schemata/definitions/HDRUK/Ethnicity.py b/hdr_schemata/definitions/HDRUK/Ethnicity.py new file mode 100644 index 0000000..b02ee64 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/Ethnicity.py @@ -0,0 +1,26 @@ +from pydantic import BaseModel +from enum import Enum + +class EthnicityBin(Enum): + WHITE___BRITISH = 'White - British' + WHITE___IRISH = 'White - Irish' + WHITE___ANY_OTHER_WHITE_BACKGROUND = 'White - Any other White background' + MIXED___WHITE_AND_BLACK_CARIBBEAN = 'Mixed - White and Black Caribbean' + MIXED___WHITE_AND_BLACK_AFRICAN = 'Mixed - White and Black African' + MIXED___WHITE_AND_ASIAN = 'Mixed - White and Asian' + MIXED___ANY_OTHER_MIXED_BACKGROUND = 'Mixed - Any other mixed background' + ASIAN_OR_ASIAN_BRITISH___INDIAN = 'Asian or Asian British - Indian' + ASIAN_OR_ASIAN_BRITISH___PAKISTANI = 'Asian or Asian British - Pakistani' + ASIAN_OR_ASIAN_BRITISH___BANGLADESHI = 'Asian or Asian British - Bangladeshi' + ASIAN_OR_ASIAN_BRITISH___ANY_OTHER_ASIAN_BACKGROUND = 'Asian or Asian British - Any other Asian background' + BLACK_OR_BLACK_BRITISH___CARIBBEAN = 'Black or Black British - Caribbean' + BLACK_OR_BLACK_BRITISH___AFRICAN = 'Black or Black British - African' + BLACK_OR_BLACK_BRITISH___ANY_OTHER_BLACK_BACKGROUND = 'Black or Black British - Any other Black background' + OTHER_ETHNIC_GROUPS___CHINESE = 'Other Ethnic Groups - Chinese' + OTHER_ETHNIC_GROUPS___ANY_OTHER_ETHNIC_GROUP = 'Other Ethnic Groups - Any other ethnic group' + NOT_STATED = 'Not stated' + NOT_KNOWN = 'Not known' + +class Ethnicity(BaseModel): + bin: EthnicityBin + count: int diff --git a/hdr_schemata/definitions/HDRUK/GenderAssignedAtBirth.py b/hdr_schemata/definitions/HDRUK/GenderAssignedAtBirth.py new file mode 100644 index 0000000..ab6bbb7 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/GenderAssignedAtBirth.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel +from enum import Enum + +class GenderBin(Enum): + MALE = 'male' + FEMALE = 'female' + +class GenderAssignedAtBirth(BaseModel): + bin: GenderBin + count: int diff --git a/hdr_schemata/definitions/HDRUK/Platform.py b/hdr_schemata/definitions/HDRUK/Platform.py new file mode 100644 index 0000000..5b6b279 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/Platform.py @@ -0,0 +1,12 @@ +from enum import Enum + +class Platform(Enum): + OTHER = 'Other' + NMR_NIGHTINGALE = 'NMR Nightingale' + METABOLON = 'Metabolon' + BIOCRATES = 'Biocrates' + ILLUMINA = 'Illumina' + OXFORD_NANOPORE = 'Oxford Nanopore' + FOUR_FIVE_FOUR = '454' + HI_C = 'Hi-C' + HIFI = 'HiFi' \ No newline at end of file diff --git a/hdr_schemata/definitions/HDRUK/Purpose.py b/hdr_schemata/definitions/HDRUK/Purpose.py index 86a5c35..e432d6a 100644 --- a/hdr_schemata/definitions/HDRUK/Purpose.py +++ b/hdr_schemata/definitions/HDRUK/Purpose.py @@ -24,6 +24,6 @@ class PurposeV2(Enum): AUDIT = 'Audit' ADMINISTRATIVE = 'Administrative' FINANCIAL = 'Financial' - STATUATORY = 'Statuatory' + STATUTORY = 'Statutory' OTHER = 'Other' NoneType_None = None diff --git a/hdr_schemata/definitions/HDRUK/StatisticalPopulationConstrained.py b/hdr_schemata/definitions/HDRUK/StatisticalPopulationConstrained.py index a5ed7f9..dea2392 100644 --- a/hdr_schemata/definitions/HDRUK/StatisticalPopulationConstrained.py +++ b/hdr_schemata/definitions/HDRUK/StatisticalPopulationConstrained.py @@ -6,7 +6,7 @@ class StatisticalPopulationConstrained(Enum): FINDINGS = 'FINDINGS' class StatisticalPopulationConstrainedV2(Enum): - PERSON = 'Person' - EVENT = 'Event' + PERSONS = 'Persons' + EVENTS = 'Events' FINDINGS = 'Findings' NUMBER_OF_SCANS_PER_MODALITY = 'Number of scans per modality' diff --git a/hdr_schemata/definitions/HDRUK/__init__.py b/hdr_schemata/definitions/HDRUK/__init__.py index 03ce7c9..6d3299b 100644 --- a/hdr_schemata/definitions/HDRUK/__init__.py +++ b/hdr_schemata/definitions/HDRUK/__init__.py @@ -1,7 +1,9 @@ from .AbstractText import AbstractText from .AccessMode import AccessMode from .AccessService import AccessService +from .Age import Age from .AgeRange import AgeRange +from .Assay import Assay from .CommaSeparatedIntegers import CommaSeparatedIntegers from .CommaSeparatedValues import CommaSeparatedValues from .ControlledVocabulary import ControlledVocabulary @@ -11,11 +13,14 @@ from .DataUseRequirements import DataUseRequirements, DataUseRequirementsV2 from .DeliveryLeadTime import DeliveryLeadTime, DeliveryLeadTimeV2 from .Description import Description +from .Disease import Disease from .Doi import Doi +from .Ethnicity import Ethnicity from .EmailAddress import EmailAddress from .EndDateEnum import EndDateEnum from .Followup import Followup from .Format import Format +from .GenderAssignedAtBirth import GenderAssignedAtBirth from .ICD_0_3 import ICD_0_3 from .IsPartOfEnum import IsPartOfEnum from .Isocountrycode import Isocountrycode @@ -31,6 +36,7 @@ from .Periodicity import Periodicity, PeriodicityV2 from .PhysicalSampleAvailability import PhysicalSampleAvailability from .Pipeline import Pipeline +from .Platform import Platform from .Purpose import Purpose, PurposeV2 from .Semver import Semver from .Setting import Setting, SettingV2 diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index a1f62fb..69a380e 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -197,6 +197,53 @@ "title": "Accessibility", "type": "object" }, + "Age": { + "properties": { + "bin": { + "$ref": "#/$defs/AgeBin" + }, + "count": { + "title": "Count", + "type": "integer" + } + }, + "required": [ + "bin", + "count" + ], + "title": "Age", + "type": "object" + }, + "AgeBin": { + "enum": [ + "0-6 days", + "7-27 days", + "1-11 months", + "1-4 years", + "5-9 years", + "10-14 years", + "15-19 years", + "20-14 years", + "25-29 years", + "30-34 years", + "35-39 years", + "40-44 years", + "45-49 years", + "50-54 years", + "55-59 years", + "60-64 years", + "65-69 years", + "70-74 years", + "75-79 years", + "80-84 years", + "85-89 years", + "90-94 years", + "65-99 years", + "100+ years" + ], + "title": "AgeBin", + "type": "string" + }, "AgeRange": { "anyOf": [ { @@ -209,6 +256,29 @@ ], "title": "AgeRange" }, + "Assay": { + "enum": [ + "NMR spectroscopy", + "mass-spectrometry", + "whole genome sequencing", + "exome sequencing", + "genotyping by array", + "transcriptome profiling by high-throughput sequencing", + "transcriptome profiling by array", + "amplicon sequencing", + "methylation binding domain sequencing", + "methylation profiling by high-throughput sequencing", + "genomic variant calling", + "chromatin accessibility profiling by high-throughput sequencing", + "histone modification profiling by high-throughput sequencing", + "chromatin immunoprecipitation sequencing", + "whole genome shotgun sequencing", + "whole transcriptome sequencing", + "targeted mutation analysis" + ], + "title": "Assay", + "type": "string" + }, "CommaSeparatedValues": { "anyOf": [ { @@ -546,6 +616,117 @@ ], "title": "DeliveryLeadTime" }, + "DemographicFrequency": { + "additionalProperties": false, + "properties": { + "age": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Age" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Array of age bins and their corresponding counts.", + "title": "Age" + }, + "ethnicity": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Ethnicity" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Array of ethnicity bins and their corresponding counts.", + "title": "Ethnicity" + }, + "genderAssignedAtBirth": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/GenderAssignedAtBirth" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Gender assigned at birth, male or female, and their corresponding counts.", + "title": "Gender assigned at birth" + }, + "disease": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Disease" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Array of diseases and their corresponding counts.", + "title": "Disease" + } + }, + "required": [ + "age", + "ethnicity", + "genderAssignedAtBirth", + "disease" + ], + "title": "DemographicFrequency", + "type": "object" + }, + "Disease": { + "properties": { + "diseaseCode": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ], + "title": "Diseasecode" + }, + "diseaseCodeVocabulary": { + "$ref": "#/$defs/DiseaseCodeVocabulary" + }, + "count": { + "title": "Count", + "type": "integer" + } + }, + "required": [ + "diseaseCode", + "diseaseCodeVocabulary", + "count" + ], + "title": "Disease", + "type": "object" + }, + "DiseaseCodeVocabulary": { + "enum": [ + "ICD10", + "SNOMED CT", + "MeSH" + ], + "title": "DiseaseCodeVocabulary", + "type": "string" + }, "Doi": { "anyOf": [ { @@ -558,6 +739,47 @@ ], "title": "Doi" }, + "Ethnicity": { + "properties": { + "bin": { + "$ref": "#/$defs/EthnicityBin" + }, + "count": { + "title": "Count", + "type": "integer" + } + }, + "required": [ + "bin", + "count" + ], + "title": "Ethnicity", + "type": "object" + }, + "EthnicityBin": { + "enum": [ + "White - British", + "White - Irish", + "White - Any other White background", + "Mixed - White and Black Caribbean", + "Mixed - White and Black African", + "Mixed - White and Asian", + "Mixed - Any other mixed background", + "Asian or Asian British - Indian", + "Asian or Asian British - Pakistani", + "Asian or Asian British - Bangladeshi", + "Asian or Asian British - Any other Asian background", + "Black or Black British - Caribbean", + "Black or Black British - African", + "Black or Black British - Any other Black background", + "Other Ethnic Groups - Chinese", + "Other Ethnic Groups - Any other ethnic group", + "Not stated", + "Not known" + ], + "title": "EthnicityBin", + "type": "string" + }, "Followup": { "enum": [ "0 - 6 MONTHS", @@ -644,6 +866,31 @@ "title": "FormatAndStandards", "type": "object" }, + "GenderAssignedAtBirth": { + "properties": { + "bin": { + "$ref": "#/$defs/GenderBin" + }, + "count": { + "title": "Count", + "type": "integer" + } + }, + "required": [ + "bin", + "count" + ], + "title": "GenderAssignedAtBirth", + "type": "object" + }, + "GenderBin": { + "enum": [ + "male", + "female" + ], + "title": "GenderBin", + "type": "string" + }, "Linkage": { "additionalProperties": false, "properties": { @@ -894,6 +1141,43 @@ "title": "Observation", "type": "object" }, + "Omics": { + "additionalProperties": false, + "properties": { + "assay": { + "anyOf": [ + { + "$ref": "#/$defs/Assay" + }, + { + "type": "null" + } + ], + "description": "The specific 'omics assay that generated the dataset.", + "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", + "title": "Omics assay" + }, + "platform": { + "anyOf": [ + { + "$ref": "#/$defs/Platform" + }, + { + "type": "null" + } + ], + "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "title": "Omics Platform" + } + }, + "required": [ + "assay", + "platform" + ], + "title": "Omics", + "type": "object" + }, "Organisation": { "properties": { "name": { @@ -1032,6 +1316,21 @@ "title": "Pipeline", "type": "string" }, + "Platform": { + "enum": [ + "Other", + "NMR Nightingale", + "Metabolon", + "Biocrates", + "Illumina", + "Oxford Nanopore", + "454", + "Hi-C", + "HiFi" + ], + "title": "Platform", + "type": "string" + }, "Provenance": { "additionalProperties": false, "properties": { @@ -1075,7 +1374,7 @@ "issued": { "description": "Datetime stamp of when this metadata version was initially issued", "format": "date-time", - "title": "Metadata Issued Datetime',", + "title": "Metadata Issued Datetime", "type": "string" }, "modified": { @@ -1225,8 +1524,8 @@ }, "StatisticalPopulationConstrainedV2": { "enum": [ - "Person", - "Event", + "Persons", + "Events", "Findings", "Number of scans per modality" ], @@ -1981,6 +2280,32 @@ "default": null, "description": "metedata for tissue samples", "title": "Tissue Sample Collection" + }, + "demographicFrequency": { + "anyOf": [ + { + "$ref": "#/$defs/DemographicFrequency" + }, + { + "type": "null" + } + ], + "default": null, + "description": "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes.", + "title": "Demographic frequency" + }, + "omics": { + "anyOf": [ + { + "$ref": "#/$defs/Omics" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Omics", + "title": "Omics" } }, "required": [ diff --git a/hdr_schemata/models/GWDM/v2_0/Coverage.py b/hdr_schemata/models/GWDM/v2_0/Coverage.py index a060dfc..edddbe8 100644 --- a/hdr_schemata/models/GWDM/v2_0/Coverage.py +++ b/hdr_schemata/models/GWDM/v2_0/Coverage.py @@ -17,7 +17,7 @@ class Coverage(BaseModel): pathway: Optional[LongDescription] = Field(None, **an.pathway.__dict__) - followup: Optional[Followup] = Field(None, **an.followup.__dict__) + followUp: Optional[Followup] = Field(None, **an.followUp.__dict__) typicalAgeRange: Optional[AgeRange] = Field(None, **an.typicalAgeRange.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py b/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py new file mode 100644 index 0000000..73f6ed6 --- /dev/null +++ b/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py @@ -0,0 +1,30 @@ +from typing import Optional, List, Union +from pydantic import BaseModel, Field +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.demographicFrequency + + +class DemographicFrequency(BaseModel): + class Config: + extra = "forbid" + + age: Optional[List[Age]] = Field( + ..., + **an.age.__dict__, + # json_schema_extra={"guidance": an.age.guidance} + ) + + ethnicity: Optional[List[Ethnicity]] = Field( + ..., **an.ethnicity.__dict__, # json_schema_extra={"guidance": an.ethnicity.guidance} + ) + + genderAssignedAtBirth: Optional[List[GenderAssignedAtBirth]] = Field( + ..., **an.genderAssignedAtBirth.__dict__, # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} + ) + + disease: Optional[List[Disease]] = Field( + ..., **an.disease.__dict__, # json_schema_extra={"guidance": an.disease.guidance} + ) diff --git a/hdr_schemata/models/GWDM/v2_0/Observations.py b/hdr_schemata/models/GWDM/v2_0/Observations.py index f09579a..c3caed3 100644 --- a/hdr_schemata/models/GWDM/v2_0/Observations.py +++ b/hdr_schemata/models/GWDM/v2_0/Observations.py @@ -11,7 +11,7 @@ class Observation(BaseModel): class Config: extra = "forbid" - observedNode: StatisticalPopulationConstrainedV2 = Field( + measuredNode: StatisticalPopulationConstrainedV2 = Field( ..., **an.observedNode.__dict__ ) measuredValue: int = Field(..., **an.measuredValue.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/Omics.py b/hdr_schemata/models/GWDM/v2_0/Omics.py new file mode 100644 index 0000000..7068319 --- /dev/null +++ b/hdr_schemata/models/GWDM/v2_0/Omics.py @@ -0,0 +1,27 @@ +from typing import Optional, List, Union +from pydantic import BaseModel, Field +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.omics + + +class Omics(BaseModel): + class Config: + extra = "forbid" + + assay: Optional[Assay] = Field( + ..., + **an.assay.__dict__, + json_schema_extra={"guidance": an.assay.guidance} + ) + + platform: Optional[Platform] = Field( + ..., + **an.platform.__dict__, + json_schema_extra={"guidance": an.platform.guidance} + ) + + + diff --git a/hdr_schemata/models/GWDM/v2_0/__init__.py b/hdr_schemata/models/GWDM/v2_0/__init__.py index c8f0827..157ede4 100644 --- a/hdr_schemata/models/GWDM/v2_0/__init__.py +++ b/hdr_schemata/models/GWDM/v2_0/__init__.py @@ -10,10 +10,12 @@ from .Required import Required from .Summary import Summary from .Coverage import Coverage +from .DemographicFrequency import DemographicFrequency from .Provenance import Provenance from .Accessibility import Accessibility from .Linkage import Linkage from .Observations import Observation +from .Omics import Omics from .DataTable import DataTable from .TissuesSampleCollection import TissuesSampleCollection @@ -69,6 +71,18 @@ class Config: title=an.tissuesSampleCollection.title, ) + demographicFrequency: Optional[DemographicFrequency] = Field( + None, + description=an.demographicFrequency.description, + title=an.demographicFrequency.title + ) + + omics: Optional[Omics] = Field( + None, + description=an.omics.description, + title=an.omics.title + ) + @classmethod def save_schema(cls, location="./2.0/schema.json"): with open(location, "w") as f: diff --git a/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml b/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml index c18ce1b..41b211f 100644 --- a/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml +++ b/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml @@ -170,7 +170,7 @@ coverage: - "BONE MARROW" title: Physical Sample Availability - followup: + followUp: description: If known, what is the typical time span that a patient appears in the dataset (follow up period) title: Followup @@ -559,3 +559,29 @@ tissuesSampleCollection: useRestrictions: title: "Use Restrictions" description: "Restrictions on the use of the tissue sample" +demographicFrequency: + title: "Demographic frequency" + description: "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes." + age: + title: "Age" + description: "Array of age bins and their corresponding counts." + ethnicity: + title: "Ethnicity" + description: "Array of ethnicity bins and their corresponding counts." + genderAssignedAtBirth: + title: "Gender assigned at birth" + description: "Gender assigned at birth, male or female, and their corresponding counts." + disease: + title: "Disease" + description: "Array of diseases and their corresponding counts." +omics: + title: "Omics" + description: "Omics" + assay: + title: "Omics assay" + description: "The specific 'omics assay that generated the dataset." + guidance: "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry." + platform: + title: "Omics Platform" + description: "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf." + guidance: "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf." diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 5720f8e..8ecd4b0 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -104,9 +104,6 @@ }, "jurisdiction": { "anyOf": [ - { - "$ref": "#/$defs/CommaSeparatedValues" - }, { "items": { "$ref": "#/$defs/Isocountrycode" @@ -227,6 +224,76 @@ "title": "Accessibility", "type": "object" }, + "Age": { + "properties": { + "bin": { + "$ref": "#/$defs/AgeBin" + }, + "count": { + "title": "Count", + "type": "integer" + } + }, + "required": [ + "bin", + "count" + ], + "title": "Age", + "type": "object" + }, + "AgeBin": { + "enum": [ + "0-6 days", + "7-27 days", + "1-11 months", + "1-4 years", + "5-9 years", + "10-14 years", + "15-19 years", + "20-14 years", + "25-29 years", + "30-34 years", + "35-39 years", + "40-44 years", + "45-49 years", + "50-54 years", + "55-59 years", + "60-64 years", + "65-69 years", + "70-74 years", + "75-79 years", + "80-84 years", + "85-89 years", + "90-94 years", + "65-99 years", + "100+ years" + ], + "title": "AgeBin", + "type": "string" + }, + "Assay": { + "enum": [ + "NMR spectroscopy", + "mass-spectrometry", + "whole genome sequencing", + "exome sequencing", + "genotyping by array", + "transcriptome profiling by high-throughput sequencing", + "transcriptome profiling by array", + "amplicon sequencing", + "methylation binding domain sequencing", + "methylation profiling by high-throughput sequencing", + "genomic variant calling", + "chromatin accessibility profiling by high-throughput sequencing", + "histone modification profiling by high-throughput sequencing", + "chromatin immunoprecipitation sequencing", + "whole genome shotgun sequencing", + "whole transcriptome sequencing", + "targeted mutation analysis" + ], + "title": "Assay", + "type": "string" + }, "CommaSeparatedValues": { "anyOf": [ { @@ -374,7 +441,7 @@ "guidance": "Indicate the specimen type, can be several values from the list below:- Availability of physical samples associated with the dataset.- If samples are available, please indicate the types of samples that are available.- More than one type may be provided.- If samples are not yet available, please provide \u201cAvailability to be confirmed\u201d.- If samples are not available, then please provide \u201cNot available\u201d.- Not available: Samples associated with the dataset are not available.- Bone marrow: Bone marrow samples associated with the data are available.- Cancer cell lines: Cancer cell line samples associated with the data are available.- CDNA/MRNA: CDNA/MRNA samples associated with the data are available.- Core biopsy: Core biopsy samples associated with the data are available.- DNA: DNA samples associated with the data are available.- Entire body organ: Entire body organ associated with the data are available.- Faeces: Faeces samples associated with the data are available.- Immortalized cell lines: Immortalized cell line samples associated with the data are available.- Isolated pathogen: Isolated pathogen associated with the data are available.- MicroRNA: MicroRNA samples associated with the data are available.- Peripheral blood cells: Peripheral blood cell samples associated with the data are available.- Plasma: Plasma samples associated with the data are available.- PM Tissue: PM Tissue samples associated with the data are available.- Primary cells: Primary cell samples associated with the data are available.- RNA: RNA samples associated with the data are available.- Saliva: Saliva samples associated with the data are available.- Serum: Serum samples associated with the data are available.- Swabs: Swab samples associated with the data are available.- Tissue: Tissue samples associated with the data are available.- Urine: Urine samples associated with the data are available.- Whole blood: Whole blood samples associated with the data are available.- Availability to be confirmed: Availability of samples is currently being confirmed.- Other: Other types of sample available.", "title": "Biological sample availability" }, - "followup": { + "followUp": { "anyOf": [ { "$ref": "#/$defs/Followup" @@ -694,6 +761,7 @@ "Multiomics", "Metagenomics", "Genomics", + "Lipidomics", "Education", "Crime and Justice", "Ethnicity", @@ -753,6 +821,79 @@ "title": "DeliveryLeadTimeV2", "type": "string" }, + "DemographicFrequency": { + "additionalProperties": false, + "properties": { + "age": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Age" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Array of age bins and their corresponding counts.", + "title": "Age" + }, + "ethnicity": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Ethnicity" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Array of ethnicity bins and their corresponding counts.", + "title": "Ethnicity" + }, + "genderAssignedAtBirth": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/GenderAssignedAtBirth" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Gender assigned at birth, male or female, and their corresponding counts.", + "title": "Gender assigned at birth" + }, + "disease": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Disease" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Array of diseases and their corresponding counts.", + "title": "Disease" + } + }, + "required": [ + "age", + "ethnicity", + "genderAssignedAtBirth", + "disease" + ], + "title": "DemographicFrequency", + "type": "object" + }, "Description": { "anyOf": [ { @@ -766,6 +907,44 @@ ], "title": "Description" }, + "Disease": { + "properties": { + "diseaseCode": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "integer" + } + ], + "title": "Diseasecode" + }, + "diseaseCodeVocabulary": { + "$ref": "#/$defs/DiseaseCodeVocabulary" + }, + "count": { + "title": "Count", + "type": "integer" + } + }, + "required": [ + "diseaseCode", + "diseaseCodeVocabulary", + "count" + ], + "title": "Disease", + "type": "object" + }, + "DiseaseCodeVocabulary": { + "enum": [ + "ICD10", + "SNOMED CT", + "MeSH" + ], + "title": "DiseaseCodeVocabulary", + "type": "string" + }, "Documentation": { "additionalProperties": false, "properties": { @@ -1007,6 +1186,47 @@ "title": "EnrichmentAndLinkage", "type": "object" }, + "Ethnicity": { + "properties": { + "bin": { + "$ref": "#/$defs/EthnicityBin" + }, + "count": { + "title": "Count", + "type": "integer" + } + }, + "required": [ + "bin", + "count" + ], + "title": "Ethnicity", + "type": "object" + }, + "EthnicityBin": { + "enum": [ + "White - British", + "White - Irish", + "White - Any other White background", + "Mixed - White and Black Caribbean", + "Mixed - White and Black African", + "Mixed - White and Asian", + "Mixed - Any other mixed background", + "Asian or Asian British - Indian", + "Asian or Asian British - Pakistani", + "Asian or Asian British - Bangladeshi", + "Asian or Asian British - Any other Asian background", + "Black or Black British - Caribbean", + "Black or Black British - African", + "Black or Black British - Any other Black background", + "Other Ethnic Groups - Chinese", + "Other Ethnic Groups - Any other ethnic group", + "Not stated", + "Not known" + ], + "title": "EthnicityBin", + "type": "string" + }, "Followup": { "enum": [ "0 - 6 MONTHS", @@ -1081,6 +1301,31 @@ "title": "FormatAndStandards", "type": "object" }, + "GenderAssignedAtBirth": { + "properties": { + "bin": { + "$ref": "#/$defs/GenderBin" + }, + "count": { + "title": "Count", + "type": "integer" + } + }, + "required": [ + "bin", + "count" + ], + "title": "GenderAssignedAtBirth", + "type": "object" + }, + "GenderBin": { + "enum": [ + "male", + "female" + ], + "title": "GenderBin", + "type": "string" + }, "GenderType": { "enum": [ "Male", @@ -1347,7 +1592,7 @@ "Observation": { "additionalProperties": false, "properties": { - "observedNode": { + "measuredNode": { "allOf": [ { "$ref": "#/$defs/StatisticalPopulationConstrainedV2" @@ -1407,7 +1652,7 @@ } }, "required": [ - "observedNode", + "measuredNode", "measuredValue", "observationDate", "measuredProperty" @@ -1415,6 +1660,43 @@ "title": "Observation", "type": "object" }, + "Omics": { + "additionalProperties": false, + "properties": { + "assay": { + "anyOf": [ + { + "$ref": "#/$defs/Assay" + }, + { + "type": "null" + } + ], + "description": "The specific 'omics assay that generated the dataset.", + "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", + "title": "Omics assay" + }, + "platform": { + "anyOf": [ + { + "$ref": "#/$defs/Platform" + }, + { + "type": "null" + } + ], + "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", + "title": "Omics Platform" + } + }, + "required": [ + "assay", + "platform" + ], + "title": "Omics", + "type": "object" + }, "OneHundredFiftyCharacters": { "maxLength": 150, "minLength": 2, @@ -1427,7 +1709,7 @@ "identifier": { "anyOf": [ { - "$ref": "#/$defs/Url" + "type": "integer" }, { "type": "null" @@ -1531,19 +1813,21 @@ "title": "Purpose of dataset collection" }, "datasetType": { - "allOf": [ - { - "$ref": "#/$defs/DatasetTypeV2" - } - ], "description": "The topic areas to which the dataset content relates.", "guidance": "Types include those listed below. Datasets can have more than one type associated.- Health and disease: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.- Treatments/Interventions: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.- Measurements/Tests: Includes any data related to laboratory or other diagnostics.- Imaging types: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.- Imaging area of the body: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.- Omics: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics.- Socioeconomic: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.- Lifestyle: Includes any data related to smoking, physical activity, dietary habits or alcohol.- Registry: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.- Environment and energy: Includes any data related to the monitoring or study of environmental or energy factors or events.- Information and communication: Includes any data related to the study or application of information and communication.- Politics: Includes any data related to political views, activities, voting, etc.", - "title": "Dataset type" + "items": { + "$ref": "#/$defs/DatasetTypeV2" + }, + "title": "Dataset type", + "type": "array" }, "datasetSubType": { "anyOf": [ { - "$ref": "#/$defs/DatasetSubType" + "items": { + "$ref": "#/$defs/DatasetSubType" + }, + "type": "array" }, { "type": "null" @@ -1637,6 +1921,21 @@ "title": "Pipeline", "type": "string" }, + "Platform": { + "enum": [ + "Other", + "NMR Nightingale", + "Metabolon", + "Biocrates", + "Illumina", + "Oxford Nanopore", + "454", + "Hi-C", + "HiFi" + ], + "title": "Platform", + "type": "string" + }, "Provenance": { "additionalProperties": false, "properties": { @@ -1677,7 +1976,7 @@ "Audit", "Administrative", "Financial", - "Statuatory", + "Statutory", "Other", null ], @@ -1808,8 +2107,8 @@ }, "StatisticalPopulationConstrainedV2": { "enum": [ - "Person", - "Event", + "Persons", + "Events", "Findings", "Number of scans per modality" ], @@ -1885,7 +2184,7 @@ "guidance": "The abstract should provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research.- The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content.- Effective abstracts should avoid long sentences and abbreviations where possible.- Note: Researchers will view Titles and the first line of Abstracts (list view) when searching for datasets and choosing whether to explore their content further.- Abstracts should be different from the full description for a dataset.- Example: CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice.", "title": "Dataset abstract" }, - "dataProvider": { + "dataCustodian": { "allOf": [ { "$ref": "#/$defs/Organisation" @@ -1984,7 +2283,7 @@ "required": [ "title", "abstract", - "dataProvider", + "dataCustodian", "populationSize", "keywords", "contactPoint" @@ -2337,6 +2636,32 @@ "default": null, "description": "Descriptions of all tables and data elements that can be included in the dataset.", "title": "Structural metadata" + }, + "demographicFrequency": { + "anyOf": [ + { + "$ref": "#/$defs/DemographicFrequency" + }, + { + "type": "null" + } + ], + "default": null, + "description": "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes.", + "title": "Demographic frequency" + }, + "omics": { + "anyOf": [ + { + "$ref": "#/$defs/Omics" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Omics", + "title": "Omics" } }, "required": [ diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Access.py b/hdr_schemata/models/HDRUK/v3_0_0/Access.py index 3dd22d6..a77a4d3 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Access.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Access.py @@ -37,7 +37,7 @@ class Config: None, **an.deliveryLeadTime.__dict__, json_schema_extra={"guidance": an.deliveryLeadTime.guidance} ) - jurisdiction: Union[Optional[CommaSeparatedValues], List[Isocountrycode]] = Field( + jurisdiction: Optional[List[Isocountrycode]] = Field( ..., **an.jurisdiction.__dict__, json_schema_extra={"guidance": an.jurisdiction.guidance} ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py b/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py new file mode 100644 index 0000000..73f6ed6 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py @@ -0,0 +1,30 @@ +from typing import Optional, List, Union +from pydantic import BaseModel, Field +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.demographicFrequency + + +class DemographicFrequency(BaseModel): + class Config: + extra = "forbid" + + age: Optional[List[Age]] = Field( + ..., + **an.age.__dict__, + # json_schema_extra={"guidance": an.age.guidance} + ) + + ethnicity: Optional[List[Ethnicity]] = Field( + ..., **an.ethnicity.__dict__, # json_schema_extra={"guidance": an.ethnicity.guidance} + ) + + genderAssignedAtBirth: Optional[List[GenderAssignedAtBirth]] = Field( + ..., **an.genderAssignedAtBirth.__dict__, # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} + ) + + disease: Optional[List[Disease]] = Field( + ..., **an.disease.__dict__, # json_schema_extra={"guidance": an.disease.guidance} + ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Observations.py b/hdr_schemata/models/HDRUK/v3_0_0/Observations.py index 928bbe7..56a24dc 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Observations.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Observations.py @@ -11,8 +11,8 @@ class Observation(BaseModel): class Config: extra = "forbid" - observedNode: StatisticalPopulationConstrainedV2 = Field( - ..., **an.observedNode.__dict__, json_schema_extra={"guidance": an.observedNode.guidance} + measuredNode: StatisticalPopulationConstrainedV2 = Field( + ..., **an.measuredNode.__dict__, json_schema_extra={"guidance": an.measuredNode.guidance} ) measuredValue: int = Field(..., **an.measuredValue.__dict__, json_schema_extra={"guidance": an.measuredValue.guidance}) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Omics.py b/hdr_schemata/models/HDRUK/v3_0_0/Omics.py new file mode 100644 index 0000000..7068319 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v3_0_0/Omics.py @@ -0,0 +1,27 @@ +from typing import Optional, List, Union +from pydantic import BaseModel, Field +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.omics + + +class Omics(BaseModel): + class Config: + extra = "forbid" + + assay: Optional[Assay] = Field( + ..., + **an.assay.__dict__, + json_schema_extra={"guidance": an.assay.guidance} + ) + + platform: Optional[Platform] = Field( + ..., + **an.platform.__dict__, + json_schema_extra={"guidance": an.platform.guidance} + ) + + + diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py index c3f0f67..51b75cf 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py @@ -11,7 +11,7 @@ class Organisation(BaseOrganisation): - identifier: Optional[Url] = Field( + identifier: Optional[int] = Field( None, **an.identifier.__dict__, json_schema_extra={"guidance": an.identifier.guidance} ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Origin.py b/hdr_schemata/models/HDRUK/v3_0_0/Origin.py index 83af08c..7e1b448 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Origin.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Origin.py @@ -15,11 +15,11 @@ class Config: None, **an.purpose.__dict__, json_schema_extra={"guidance": an.purpose.guidance} ) - datasetType: DatasetTypeV2 = Field( + datasetType: List[DatasetTypeV2] = Field( ..., **an.datasetType.__dict__, json_schema_extra={"guidance": an.datasetType.guidance} ) - datasetSubType: Optional[DatasetSubType] = Field( + datasetSubType: Optional[List[DatasetSubType]] = Field( ..., **an.datasetSubType.__dict__, json_schema_extra={"guidance": an.datasetSubType.guidance} ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Summary.py b/hdr_schemata/models/HDRUK/v3_0_0/Summary.py index 9c7b143..bc731f9 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Summary.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Summary.py @@ -21,8 +21,8 @@ class Config: ..., **an.abstract.__dict__, json_schema_extra={"guidance": an.abstract.guidance} ) - dataProvider: Organisation = Field( - ..., title=an.dataProvider.title, description=an.dataProvider.description + dataCustodian: Organisation = Field( + ..., title=an.dataCustodian.title, description=an.dataCustodian.description ) populationSize: int = Field( diff --git a/hdr_schemata/models/HDRUK/v3_0_0/__init__.py b/hdr_schemata/models/HDRUK/v3_0_0/__init__.py index 561d474..857f83b 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/__init__.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/__init__.py @@ -6,9 +6,11 @@ from .Accessibility import Accessibility from .Coverage import Coverage +from .DemographicFrequency import DemographicFrequency from .Documentation import Documentation from .EnrichmentAndLinkage import EnrichmentAndLinkage from .Observations import Observation +from .Omics import Omics from .Provenance import Provenance from .Revision import Revision from .StructuralMetadata import StructuralMetadata @@ -70,6 +72,18 @@ class Config: title=an.structuralMetadata.title, ) + demographicFrequency: Optional[DemographicFrequency] = Field( + None, + description=an.demographicFrequency.description, + title=an.demographicFrequency.title + ) + + omics: Optional[Omics] = Field( + None, + description=an.omics.description, + title=an.omics.title + ) + @classmethod def save_schema(cls, location="./3.0.0/schema.json"): with open(location, "w") as f: diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index 8135966..3f04eee 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -1,5 +1,5 @@ summary: - dataProvider: + dataCustodian: title: "Dataset Custodian" description: "This is the organisation responsible for running or supporting the data access request process, as well as enquiries about a dataset. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank." contactPoint: @@ -123,7 +123,7 @@ observations: title: "Observations" description: "This section provides an overview of observations of your dataset linked to specific points in time. Multiple observations about the dataset are encouraged to be provided, including multiple observations of the same property at different timepoints. At least one observation is required." - observedNode: + measuredNode: guidance: "- **Persons**: Unique persons recorded in the dataset\\n- **Events**: Unique events such as procedures and prescriptions within the dataset\\n-**Findings**: Unique findings included in the dataset such as diagnoses'\\n-**Number of scans per modality**: Unique scans for a specified imaging method modality (e.g. 12 x-rays)" examples: - "Persons" @@ -372,7 +372,7 @@ coverage: - 18 title: "Minimum age range" description: "Please indicate the minimum age in years of participants in the dataset as a whole number (integer)." - followup: + followUp: guidance: "If known, please indicate the typical time span that a patient appears in the dataset (follow up period).\\n-**0 - 6 MONTHS**: Data typically available for a patient over a 0-6 month period.\\n-**6 - 12 MONTHS**: Data typically available for a patient over a 6-12 month period.\\n-**1 - 10 YEARS**: Data typically available for a patient over a 1-10 year period.\\n-**> 10 YEARS**: Data typically available for a patient for over a 10 year period.\\n-**CONTINUOUS**: Data for patients is being regularly added to and updated.\\n-**UNKNOWN**: Timespan is Unknown.\\n-**OTHER**: Data available for a patient over another time period." title: "Follow-up" description: "If known, what is the typical time span that a patient appears in the dataset (follow up period). In a prospective cohort study, after baseline information is collected, participants are followed “longitudinally” i.e. new information is collected about them for a period of time afterward. This is known as the “follow up period”. What is the typical time span of follow up, e.g. 1 year, 5 years? If there are multiple cohorts in the dataset with varying follow up periods, please provide the longest follow up period." @@ -401,4 +401,30 @@ datasetDescriptor: title: title: "Title of a dataset" url: - title: "Url of a dataset" \ No newline at end of file + title: "Url of a dataset" +demographicFrequency: + title: "Demographic frequency" + description: "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes." + age: + title: "Age" + description: "Array of age bins and their corresponding counts." + ethnicity: + title: "Ethnicity" + description: "Array of ethnicity bins and their corresponding counts." + genderAssignedAtBirth: + title: "Gender assigned at birth" + description: "Gender assigned at birth, male or female, and their corresponding counts." + disease: + title: "Disease" + description: "Array of diseases and their corresponding counts." +omics: + title: "Omics" + description: "Omics" + assay: + title: "Omics assay" + description: "The specific 'omics assay that generated the dataset." + guidance: "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry." + platform: + title: "Omics Platform" + description: "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf." + guidance: "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf." \ No newline at end of file From 5114d045225f637cf9806a16d20372784038e7f6 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Tue, 13 Aug 2024 16:26:54 +0100 Subject: [PATCH 02/23] regen GWDM 2 --- hdr_schemata/models/GWDM/2.0/schema.json | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index 69a380e..2f7859e 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -322,7 +322,7 @@ "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", "title": "Pathway" }, - "followup": { + "followUp": { "anyOf": [ { "$ref": "#/$defs/Followup" @@ -1078,7 +1078,7 @@ "Observation": { "additionalProperties": false, "properties": { - "observedNode": { + "measuredNode": { "allOf": [ { "$ref": "#/$defs/StatisticalPopulationConstrainedV2" @@ -1133,7 +1133,7 @@ } }, "required": [ - "observedNode", + "measuredNode", "measuredValue", "observationDate", "measuredProperty" From 894fe10c2dbfabbfc9f4348e33cd65d9504ff5df Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Wed, 14 Aug 2024 15:09:01 +0100 Subject: [PATCH 03/23] extra schema updates --- hdr_schemata/definitions/HDRUK/DatasetType.py | 2 - hdr_schemata/models/GWDM/2.0/schema.json | 4 +- hdr_schemata/models/GWDM/v2_0/Observations.py | 2 +- hdr_schemata/models/HDRUK/3.0.0/schema.json | 38 +++++-------------- hdr_schemata/models/HDRUK/v3_0_0/Access.py | 4 -- .../models/HDRUK/v3_0_0/Observations.py | 4 +- .../models/HDRUK/v3_0_0/Organisation.py | 4 +- .../HDRUK/v3_0_0/annotations/config.yaml | 7 ++-- 8 files changed, 20 insertions(+), 45 deletions(-) diff --git a/hdr_schemata/definitions/HDRUK/DatasetType.py b/hdr_schemata/definitions/HDRUK/DatasetType.py index 2438873..42e6c2b 100644 --- a/hdr_schemata/definitions/HDRUK/DatasetType.py +++ b/hdr_schemata/definitions/HDRUK/DatasetType.py @@ -62,7 +62,6 @@ class DatasetSubType(Enum): TRANSCRIPTOMICS = 'Transcriptomics' EPIGENOMICS = 'Epigenomics' METABOLOMICS = 'Metabolomics' - MULTIOMICS = 'Multiomics' METAGENOMICS = 'Metagenomics' GENOMICS = 'Genomics' LIPIDOMICS = 'Lipidomics' @@ -92,4 +91,3 @@ class DatasetSubType(Enum): BIRTHS_AND_DEATHS = 'Births and Deaths' # OTHERS = 'Others' NOT_APPLICABLE = 'Not applicable' - \ No newline at end of file diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index 2f7859e..ea73687 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -1078,7 +1078,7 @@ "Observation": { "additionalProperties": false, "properties": { - "measuredNode": { + "observedNode": { "allOf": [ { "$ref": "#/$defs/StatisticalPopulationConstrainedV2" @@ -1133,7 +1133,7 @@ } }, "required": [ - "measuredNode", + "observedNode", "measuredValue", "observationDate", "measuredProperty" diff --git a/hdr_schemata/models/GWDM/v2_0/Observations.py b/hdr_schemata/models/GWDM/v2_0/Observations.py index c3caed3..f09579a 100644 --- a/hdr_schemata/models/GWDM/v2_0/Observations.py +++ b/hdr_schemata/models/GWDM/v2_0/Observations.py @@ -11,7 +11,7 @@ class Observation(BaseModel): class Config: extra = "forbid" - measuredNode: StatisticalPopulationConstrainedV2 = Field( + observedNode: StatisticalPopulationConstrainedV2 = Field( ..., **an.observedNode.__dict__ ) measuredValue: int = Field(..., **an.measuredValue.__dict__) diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 8ecd4b0..5f18146 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -43,20 +43,6 @@ "guidance": "Select the category which best matches how a Researcher will access the dataset, if approved for access. If the access method changes based on the data required for the project (e.g. the dataset can be shared via secure email if the extract is fully anonymised, but must be accessed via a TRE/SDE if the extract is only pseudonymised) then select 'varies based on project'.", "title": "Access method category" }, - "accessMode": { - "anyOf": [ - { - "$ref": "#/$defs/AccessMode" - }, - { - "type": "null" - } - ], - "default": "New project", - "description": "Indication of the application type to enable research access.", - "guidance": "Indicate whether a Researcher will need to join a consortium to enable access to the dataset, or if a Researcher will need to submit an application which describes a specific project and required dataset fields required.", - "title": "Access mode" - }, "accessService": { "anyOf": [ { @@ -69,7 +55,8 @@ "default": null, "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers; additional consultancy and services; any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", "examples": [ - "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + "https://re-docs.genomicsengland.co.uk/tutorials/", + "https://publichealthscotland.scot/services/data-research-and-innovation-services/electronic-data-research-and-innovation-service-edris/national-safe-haven-nsh/" ], "guidance": "Please provide a brief description of the data access services that are available including:- environment that is currently available to researchers- additional consultancy and services- any indication of costs associated If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers. Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", "title": "Access service description" @@ -161,14 +148,6 @@ "title": "Access", "type": "object" }, - "AccessMode": { - "enum": [ - "Join research consortium", - "New project" - ], - "title": "AccessMode", - "type": "string" - }, "AccessService": { "enum": [ "TRE/SDE", @@ -758,7 +737,6 @@ "Transcriptomics", "Epigenomics", "Metabolomics", - "Multiomics", "Metagenomics", "Genomics", "Lipidomics", @@ -982,7 +960,7 @@ } ], "default": null, - "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "description": "Please provide any media associated with the Gateway Organisation using a valid URL for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", "examples": [ "PDF document that describes study protocol - https://link.to/document.pdf" ], @@ -1592,7 +1570,7 @@ "Observation": { "additionalProperties": false, "properties": { - "measuredNode": { + "observedNode": { "allOf": [ { "$ref": "#/$defs/StatisticalPopulationConstrainedV2" @@ -1652,7 +1630,7 @@ } }, "required": [ - "measuredNode", + "observedNode", "measuredValue", "observationDate", "measuredProperty" @@ -1709,10 +1687,12 @@ "identifier": { "anyOf": [ { - "type": "integer" + "maxLength": 50, + "minLength": 2, + "type": "string" }, { - "type": "null" + "type": "integer" } ], "default": null, diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Access.py b/hdr_schemata/models/HDRUK/v3_0_0/Access.py index a77a4d3..b7511f7 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Access.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Access.py @@ -21,10 +21,6 @@ class Config: json_schema_extra={"guidance": an.accessServiceCategory.guidance} ) - accessMode: Optional[AccessMode] = Field( - "New project", **an.accessMode.__dict__, json_schema_extra={"guidance": an.accessMode.guidance} - ) - accessService: Optional[LongDescription] = Field( None, **an.accessService.__dict__, json_schema_extra={"guidance": an.accessService.guidance} ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Observations.py b/hdr_schemata/models/HDRUK/v3_0_0/Observations.py index 56a24dc..928bbe7 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Observations.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Observations.py @@ -11,8 +11,8 @@ class Observation(BaseModel): class Config: extra = "forbid" - measuredNode: StatisticalPopulationConstrainedV2 = Field( - ..., **an.measuredNode.__dict__, json_schema_extra={"guidance": an.measuredNode.guidance} + observedNode: StatisticalPopulationConstrainedV2 = Field( + ..., **an.observedNode.__dict__, json_schema_extra={"guidance": an.observedNode.guidance} ) measuredValue: int = Field(..., **an.measuredValue.__dict__, json_schema_extra={"guidance": an.measuredValue.guidance}) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py index 51b75cf..985826f 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py @@ -1,6 +1,6 @@ from hdr_schemata.models.HDRUK.v2_1_2.Organisation import Organisation as BaseOrganisation from typing import Optional, Union, List -from pydantic import Field +from pydantic import Field, constr from hdr_schemata.definitions.HDRUK import * @@ -11,7 +11,7 @@ class Organisation(BaseOrganisation): - identifier: Optional[int] = Field( + identifier: Union[constr(min_length=2, max_length=50), int] = Field( None, **an.identifier.__dict__, json_schema_extra={"guidance": an.identifier.guidance} ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index 3f04eee..03d4f38 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -123,7 +123,7 @@ observations: title: "Observations" description: "This section provides an overview of observations of your dataset linked to specific points in time. Multiple observations about the dataset are encouraged to be provided, including multiple observations of the same property at different timepoints. At least one observation is required." - measuredNode: + observedNode: guidance: "- **Persons**: Unique persons recorded in the dataset\\n- **Events**: Unique events such as procedures and prescriptions within the dataset\\n-**Findings**: Unique findings included in the dataset such as diagnoses'\\n-**Number of scans per modality**: Unique scans for a specified imaging method modality (e.g. 12 x-rays)" examples: - "Persons" @@ -234,7 +234,8 @@ accessibility: accessService: guidance: "Please provide a brief description of the data access services that are available including:\\n- environment that is currently available to researchers\\n- additional consultancy and services\\n- any indication of costs associated\\n\\n If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers.\\n **Note**: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset." examples: - - "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + - "https://re-docs.genomicsengland.co.uk/tutorials/" + - "https://publichealthscotland.scot/services/data-research-and-innovation-services/electronic-data-research-and-innovation-service-edris/national-safe-haven-nsh/" title: "Access service description" description: "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers; additional consultancy and services; any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset." accessRights: @@ -336,7 +337,7 @@ documentation: examples: - "PDF document that describes study protocol - https://link.to/document.pdf" title: "Associated media" - description: "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal." + description: "Please provide any media associated with the Gateway Organisation using a valid URL for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal." description: guidance: "- An HTML account of the data that **provides context and scope** of the data, **limited to 3000 characters, and/or a resolvable URL** that describes the dataset.\\n- Additional information can be recorded and included using the Associated media field." title: "Description" From 063395670b319a09ad7b936ed7daef9519f2dc73 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Wed, 14 Aug 2024 17:04:24 +0100 Subject: [PATCH 04/23] update gwdm rm accessMode --- hdr_schemata/models/GWDM/2.0/schema.json | 21 --------------------- hdr_schemata/models/GWDM/v2_0/Access.py | 5 +---- 2 files changed, 1 insertion(+), 25 deletions(-) diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index ea73687..cb92044 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -122,19 +122,6 @@ "TRE/SDE" ], "title": "Access/governance requirements" - }, - "accessMode": { - "anyOf": [ - { - "$ref": "#/$defs/AccessMode" - }, - { - "type": "null" - } - ], - "default": "New project", - "description": "Indication of the application type to enable research access.", - "title": "Access mode" } }, "required": [ @@ -145,14 +132,6 @@ "title": "Access", "type": "object" }, - "AccessMode": { - "enum": [ - "Join research consortium", - "New project" - ], - "title": "AccessMode", - "type": "string" - }, "Accessibility": { "additionalProperties": false, "properties": { diff --git a/hdr_schemata/models/GWDM/v2_0/Access.py b/hdr_schemata/models/GWDM/v2_0/Access.py index 0daec1b..c95bc89 100644 --- a/hdr_schemata/models/GWDM/v2_0/Access.py +++ b/hdr_schemata/models/GWDM/v2_0/Access.py @@ -36,7 +36,4 @@ class Config: accessServiceCategory: Optional[CommaSeparatedValues] = Field( None, **an.accessServiceCategory.__dict__ ) - - accessMode: Optional[AccessMode] = Field( - "New project", **an.accessMode.__dict__ - ) \ No newline at end of file + \ No newline at end of file From e367ee21bd2fbf33a409ab1f28c05ca3a905c948 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Thu, 15 Aug 2024 13:57:49 +0100 Subject: [PATCH 05/23] fix form enum lists and new fields --- docs/HDRUK/3.0.0.form.json | 29 ++++++----------------------- docs/HDRUK/3.0.0.md | 30 ++++++++++-------------------- docs/HDRUK/3.0.0.structure.json | 29 ++++++++--------------------- 3 files changed, 24 insertions(+), 64 deletions(-) diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 3c2f695..c9a678d 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -138,8 +138,8 @@ "guidance": "**Example**: https://ror.org/053fq8t95\\nIf your organisation does not have a ROR identifier please use the \u201csuggest and institute\u201d function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform", "examples": null, "is_list": false, - "is_optional": true, - "types": "int", + "is_optional": false, + "types": "str", "location": "summary.dataCustodian.identifier" }, { @@ -326,7 +326,7 @@ { "required": false, "title": "Associated media", - "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "description": "Please provide any media associated with the Gateway Organisation using a valid URL for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", "guidance": "- Please provide any media associated with the Gateway Organisation **using a valid URL** for the content.\\n- This is an opportunity to **provide additional context** that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question.\\n- Note: media assets should be hosted by the organisation.\n- **Example**: This could be a **link to a PDF Document** that describes methodology or further detail about the datasets, or a graph or chart that provides further context about the dataset.\\n- If you are providing multiple links for associated media, we recommend that you separate these with a comma.", "examples": [ "PDF document that describes study protocol - https://link.to/document.pdf" @@ -615,7 +615,6 @@ "Transcriptomics", "Epigenomics", "Metabolomics", - "Multiomics", "Metagenomics", "Genomics", "Lipidomics", @@ -915,30 +914,14 @@ }, "location": "accessibility.access.accessServiceCategory" }, - { - "required": false, - "title": "Access mode", - "description": "Indication of the application type to enable research access.", - "guidance": "Indicate whether a Researcher will need to join a consortium to enable access to the dataset, or if a Researcher will need to submit an application which describes a specific project and required dataset fields required.", - "examples": null, - "is_list": false, - "is_optional": true, - "types": { - "type": "string", - "options": [ - "Join research consortium", - "New project" - ] - }, - "location": "accessibility.access.accessMode" - }, { "required": false, "title": "Access service description", "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers; additional consultancy and services; any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", "guidance": "Please provide a brief description of the data access services that are available including:\\n- environment that is currently available to researchers\\n- additional consultancy and services\\n- any indication of costs associated\\n\\n If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers.\\n **Note**: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", "examples": [ - "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + "https://re-docs.genomicsengland.co.uk/tutorials/", + "https://publichealthscotland.scot/services/data-research-and-innovation-services/electronic-data-research-and-innovation-service-edris/national-safe-haven-nsh/" ], "is_list": false, "is_optional": true, @@ -1626,7 +1609,7 @@ "Number of scans per modality" ] }, - "location": "observations.measuredNode" + "location": "observations.observedNode" }, { "required": true, diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index b9d819e..9ecf1ce 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -132,9 +132,9 @@ This is the organisation responsible for running or supporting the data access r Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation. -| title | guidance | is_list | required | type | -|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------| -| identifier | **Example**: https://ror.org/053fq8t95
If your organisation does not have a ROR identifier please use the “suggest and institute” function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform | False | False | ['int', 'null'] | +| title | guidance | is_list | required | type | +|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------| +| identifier | **Example**: https://ror.org/053fq8t95
If your organisation does not have a ROR identifier please use the “suggest and institute” function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform | False | False | ['str', 'int'] | @@ -282,7 +282,7 @@ A free-text description of the dataset.
A URL can also be provided as the des ### associatedMedia -Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal. +Please provide any media associated with the Gateway Organisation using a valid URL for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal. | title | guidance | is_list | required | type | |:-----------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------| @@ -453,9 +453,9 @@ The topic areas to which the dataset content relates. The sub-type of the dataset content. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType ('proteomics', 'transcriptomics', 'epigenomics', 'metabolomics', 'metagenomics', 'genomics', 'lipidomics') is selected -| title | guidance | is_list | required | type | -|:-----------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Dataset sub-type | Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.
- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.
- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- *Information and communication**: Includes any data related to the study or application of information and communication.
- *Politics**: Includes any data related to political views, activities, voting, etc. | True | True | ["DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Multiomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']"] | +| title | guidance | is_list | required | type | +|:-----------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset sub-type | Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.
- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.
- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- *Information and communication**: Includes any data related to the study or application of information and communication.
- *Politics**: Includes any data related to political views, activities, voting, etc. | True | True | ["DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']"] | @@ -643,17 +643,6 @@ Examples: * TRE/SDE -#### accessMode - -Indication of the application type to enable research access. - -| title | guidance | is_list | required | type | -|:------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------| -| Access mode | Indicate whether a Researcher will need to join a consortium to enable access to the dataset, or if a Researcher will need to submit an application which describes a specific project and required dataset fields required. | False | False | ["AccessMode['Join research consortium','New project']", 'null'] | - - - - #### accessService Please provide a brief description of the data access services that are available including: environment that is currently available to researchers; additional consultancy and services; any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset. @@ -664,7 +653,8 @@ Please provide a brief description of the data access services that are availabl Examples: - * https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide + * https://re-docs.genomicsengland.co.uk/tutorials/ + * https://publichealthscotland.scot/services/data-research-and-innovation-services/electronic-data-research-and-innovation-service-edris/national-safe-haven-nsh/ #### accessRequestCost @@ -1023,7 +1013,7 @@ This section provides an overview of observations of your dataset linked to spec -### measuredNode +### observedNode Please select one of the following broad notes for your measured observation. Indicating whether the measured property is a recording of unique persons, events, findings or scans per modality. diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index c33186a..1f5e69f 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -175,11 +175,11 @@ "guidance": "Example: https://ror.org/053fq8t95If your organisation does not have a ROR identifier please use the \u201csuggest and institute\u201d function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform", "examples": null, "type": [ - "int", - "null" + "str", + "int" ], "is_list": false, - "is_optional": true + "is_optional": false }, { "name": "name", @@ -373,7 +373,7 @@ "name": "associatedMedia", "required": false, "title": "Associated media", - "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "description": "Please provide any media associated with the Gateway Organisation using a valid URL for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", "guidance": "Please provide any media associated with the Gateway Organisation using a valid URL for the content.- This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question.- Note: media assets should be hosted by the organisation.Example: This could be a link to a PDF Document that describes methodology or further detail about the datasets, or a graph or chart that provides further context about the dataset.- If you are providing multiple links for associated media, we recommend that you separate these with a comma.", "examples": [ "PDF document that describes study protocol - https://link.to/document.pdf" @@ -602,7 +602,7 @@ "guidance": "Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.- Health and disease: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.- Treatments/Interventions: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.- Measurements/Tests- Includes any data related to laboratory or other diagnostics.- Imaging types: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.- *Imaging area of the body: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.- Omics: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.- Socioeconomic: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.- *Lifestyle: Includes any data related to smoking, physical activity, dietary habits or alcohol.- Registry: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.- Environment and energy: Includes any data related to the monitoring or study of environmental or energy factors or events.- *Information and communication: Includes any data related to the study or application of information and communication.- Politics*: Includes any data related to political views, activities, voting, etc.", "examples": null, "type": [ - "DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Multiomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']" + "DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']" ], "is_list": true, "is_optional": true @@ -853,20 +853,6 @@ "is_list": false, "is_optional": true }, - { - "name": "accessMode", - "required": false, - "title": "Access mode", - "description": "Indication of the application type to enable research access.", - "guidance": "Indicate whether a Researcher will need to join a consortium to enable access to the dataset, or if a Researcher will need to submit an application which describes a specific project and required dataset fields required.", - "examples": null, - "type": [ - "AccessMode['Join research consortium','New project']", - "null" - ], - "is_list": false, - "is_optional": true - }, { "name": "accessService", "required": false, @@ -874,7 +860,8 @@ "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers; additional consultancy and services; any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", "guidance": "Please provide a brief description of the data access services that are available including:- environment that is currently available to researchers- additional consultancy and services- any indication of costs associated If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers. Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", "examples": [ - "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + "https://re-docs.genomicsengland.co.uk/tutorials/", + "https://publichealthscotland.scot/services/data-research-and-innovation-services/electronic-data-research-and-innovation-service-edris/national-safe-haven-nsh/" ], "type": [ "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", @@ -1376,7 +1363,7 @@ "is_optional": false, "subItems": [ { - "name": "measuredNode", + "name": "observedNode", "required": true, "title": "Dataset volume measure", "description": "Please select one of the following broad notes for your measured observation. Indicating whether the measured property is a recording of unique persons, events, findings or scans per modality.", From a0ebd9f141eab5f35ccdcb99f6b457936f98e57a Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Thu, 15 Aug 2024 16:56:10 +0100 Subject: [PATCH 06/23] fix nulls in form hydration --- docs/HDRUK/3.0.0.form.json | 18 +++---- docs/HDRUK/3.0.0.md | 54 +++++++++---------- docs/HDRUK/3.0.0.structure.json | 26 ++++----- .../definitions/HDRUK/{Age.py => AgeEnum.py} | 7 +-- hdr_schemata/definitions/HDRUK/Disease.py | 13 ----- .../definitions/HDRUK/DiseaseCodeEnum.py | 8 +++ .../HDRUK/{Ethnicity.py => EthnicityEnum.py} | 6 +-- .../HDRUK/GenderAssignedAtBirth.py | 10 ---- hdr_schemata/definitions/HDRUK/GenderEnum.py | 6 +++ hdr_schemata/definitions/HDRUK/__init__.py | 8 +-- hdr_schemata/models/GWDM/2.0/schema.json | 54 +++++++++++++------ hdr_schemata/models/GWDM/v2_0/Age.py | 10 ++++ .../models/GWDM/v2_0/DemographicFrequency.py | 24 +++++++-- hdr_schemata/models/GWDM/v2_0/Disease.py | 16 ++++++ hdr_schemata/models/GWDM/v2_0/Ethnicity.py | 11 ++++ .../models/GWDM/v2_0/GenderAssignedAtBirth.py | 11 ++++ .../models/GWDM/v2_0/annotations/config.yaml | 18 +++++++ hdr_schemata/models/HDRUK/3.0.0/schema.json | 54 +++++++++++++------ hdr_schemata/models/HDRUK/v3_0_0/Age.py | 10 ++++ .../HDRUK/v3_0_0/DemographicFrequency.py | 24 +++++++-- hdr_schemata/models/HDRUK/v3_0_0/Disease.py | 16 ++++++ hdr_schemata/models/HDRUK/v3_0_0/Ethnicity.py | 11 ++++ .../HDRUK/v3_0_0/GenderAssignedAtBirth.py | 11 ++++ .../HDRUK/v3_0_0/annotations/config.yaml | 18 +++++++ hdr_schemata/utils/create_markdown.py | 16 +++--- 25 files changed, 321 insertions(+), 139 deletions(-) rename hdr_schemata/definitions/HDRUK/{Age.py => AgeEnum.py} (91%) delete mode 100644 hdr_schemata/definitions/HDRUK/Disease.py create mode 100644 hdr_schemata/definitions/HDRUK/DiseaseCodeEnum.py rename hdr_schemata/definitions/HDRUK/{Ethnicity.py => EthnicityEnum.py} (93%) delete mode 100644 hdr_schemata/definitions/HDRUK/GenderAssignedAtBirth.py create mode 100644 hdr_schemata/definitions/HDRUK/GenderEnum.py create mode 100644 hdr_schemata/models/GWDM/v2_0/Age.py create mode 100644 hdr_schemata/models/GWDM/v2_0/Disease.py create mode 100644 hdr_schemata/models/GWDM/v2_0/Ethnicity.py create mode 100644 hdr_schemata/models/GWDM/v2_0/GenderAssignedAtBirth.py create mode 100644 hdr_schemata/models/HDRUK/v3_0_0/Age.py create mode 100644 hdr_schemata/models/HDRUK/v3_0_0/Disease.py create mode 100644 hdr_schemata/models/HDRUK/v3_0_0/Ethnicity.py create mode 100644 hdr_schemata/models/HDRUK/v3_0_0/GenderAssignedAtBirth.py diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index c9a678d..bc7f219 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -1664,7 +1664,7 @@ }, { "required": true, - "title": null, + "title": "Age bin", "description": null, "guidance": "", "examples": null, @@ -1703,7 +1703,7 @@ }, { "required": true, - "title": null, + "title": "Age count", "description": null, "guidance": "", "examples": null, @@ -1714,7 +1714,7 @@ }, { "required": true, - "title": null, + "title": "Ethnicity bin", "description": null, "guidance": "", "examples": null, @@ -1747,7 +1747,7 @@ }, { "required": true, - "title": null, + "title": "Ethnicity count", "description": null, "guidance": "", "examples": null, @@ -1758,7 +1758,7 @@ }, { "required": true, - "title": null, + "title": "Gender assigned at birth bin", "description": null, "guidance": "", "examples": null, @@ -1775,7 +1775,7 @@ }, { "required": true, - "title": null, + "title": "Gender assigned at birth count", "description": null, "guidance": "", "examples": null, @@ -1786,7 +1786,7 @@ }, { "required": true, - "title": null, + "title": "Disease code", "description": null, "guidance": "", "examples": null, @@ -1797,7 +1797,7 @@ }, { "required": true, - "title": null, + "title": "Disease code vocabulary", "description": null, "guidance": "", "examples": null, @@ -1815,7 +1815,7 @@ }, { "required": true, - "title": null, + "title": "Disease count", "description": null, "guidance": "", "examples": null, diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index 9ecf1ce..88e92ce 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -1238,9 +1238,9 @@ Array of age bins and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| | | False | True | ["AgeBin['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']"] | +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Age bin | | False | True | ["AgeEnum['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']"] | @@ -1249,9 +1249,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------| -| | | False | True | ['int'] | +| title | guidance | is_list | required | type | +|:----------|:-----------|:----------|:-----------|:--------| +| Age count | | False | True | ['int'] | @@ -1269,9 +1269,9 @@ Array of ethnicity bins and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| | | False | True | ["EthnicityBin['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']"] | +| title | guidance | is_list | required | type | +|:--------------|:-----------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Ethnicity bin | | False | True | ["EthnicityEnum['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']"] | @@ -1280,9 +1280,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------| -| | | False | True | ['int'] | +| title | guidance | is_list | required | type | +|:----------------|:-----------|:----------|:-----------|:--------| +| Ethnicity count | | False | True | ['int'] | @@ -1300,9 +1300,9 @@ Gender assigned at birth, male or female, and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:-------------------------------| -| | | False | True | ["GenderBin['male','female']"] | +| title | guidance | is_list | required | type | +|:-----------------------------|:-----------|:----------|:-----------|:--------------------------------| +| Gender assigned at birth bin | | False | True | ["GenderEnum['male','female']"] | @@ -1311,9 +1311,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------| -| | | False | True | ['int'] | +| title | guidance | is_list | required | type | +|:-------------------------------|:-----------|:----------|:-----------|:--------| +| Gender assigned at birth count | | False | True | ['int'] | @@ -1331,9 +1331,9 @@ Array of diseases and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:---------------| -| | | False | True | ['str', 'int'] | +| title | guidance | is_list | required | type | +|:-------------|:-----------|:----------|:-----------|:---------------| +| Disease code | | False | True | ['str', 'int'] | @@ -1342,9 +1342,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:------------------------------------------------------| -| | | False | True | ["DiseaseCodeVocabulary['ICD10','SNOMED CT','MeSH']"] | +| title | guidance | is_list | required | type | +|:------------------------|:-----------|:----------|:-----------|:------------------------------------------------| +| Disease code vocabulary | | False | True | ["DiseaseCodeEnum['ICD10','SNOMED CT','MeSH']"] | @@ -1353,9 +1353,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------| -| | | False | True | ['int'] | +| title | guidance | is_list | required | type | +|:--------------|:-----------|:----------|:-----------|:--------| +| Disease count | | False | True | ['int'] | diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index 1f5e69f..f745e41 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -1664,12 +1664,12 @@ { "name": "bin", "required": true, - "title": null, + "title": "Age bin", "description": null, "guidance": "", "examples": null, "type": [ - "AgeBin['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']" + "AgeEnum['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']" ], "is_list": false, "is_optional": false @@ -1677,7 +1677,7 @@ { "name": "count", "required": true, - "title": null, + "title": "Age count", "description": null, "guidance": "", "examples": null, @@ -1705,12 +1705,12 @@ { "name": "bin", "required": true, - "title": null, + "title": "Ethnicity bin", "description": null, "guidance": "", "examples": null, "type": [ - "EthnicityBin['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']" + "EthnicityEnum['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']" ], "is_list": false, "is_optional": false @@ -1718,7 +1718,7 @@ { "name": "count", "required": true, - "title": null, + "title": "Ethnicity count", "description": null, "guidance": "", "examples": null, @@ -1746,12 +1746,12 @@ { "name": "bin", "required": true, - "title": null, + "title": "Gender assigned at birth bin", "description": null, "guidance": "", "examples": null, "type": [ - "GenderBin['male','female']" + "GenderEnum['male','female']" ], "is_list": false, "is_optional": false @@ -1759,7 +1759,7 @@ { "name": "count", "required": true, - "title": null, + "title": "Gender assigned at birth count", "description": null, "guidance": "", "examples": null, @@ -1787,7 +1787,7 @@ { "name": "diseaseCode", "required": true, - "title": null, + "title": "Disease code", "description": null, "guidance": "", "examples": null, @@ -1801,12 +1801,12 @@ { "name": "diseaseCodeVocabulary", "required": true, - "title": null, + "title": "Disease code vocabulary", "description": null, "guidance": "", "examples": null, "type": [ - "DiseaseCodeVocabulary['ICD10','SNOMED CT','MeSH']" + "DiseaseCodeEnum['ICD10','SNOMED CT','MeSH']" ], "is_list": false, "is_optional": false @@ -1814,7 +1814,7 @@ { "name": "count", "required": true, - "title": null, + "title": "Disease count", "description": null, "guidance": "", "examples": null, diff --git a/hdr_schemata/definitions/HDRUK/Age.py b/hdr_schemata/definitions/HDRUK/AgeEnum.py similarity index 91% rename from hdr_schemata/definitions/HDRUK/Age.py rename to hdr_schemata/definitions/HDRUK/AgeEnum.py index 8c47b91..39ab081 100644 --- a/hdr_schemata/definitions/HDRUK/Age.py +++ b/hdr_schemata/definitions/HDRUK/AgeEnum.py @@ -1,7 +1,7 @@ from pydantic import BaseModel from enum import Enum -class AgeBin(Enum): +class AgeEnum(Enum): DAYS_0_6 = '0-6 days' DAYS_7_27 = '7-27 days' MONTHS_1_11 = '1-11 months' @@ -26,8 +26,3 @@ class AgeBin(Enum): YEARS_90_94 = '90-94 years' YEARS_65_99 = '65-99 years' YEARS_100_PLUS = '100+ years' - -class Age(BaseModel): - bin: AgeBin - count: int - diff --git a/hdr_schemata/definitions/HDRUK/Disease.py b/hdr_schemata/definitions/HDRUK/Disease.py deleted file mode 100644 index 2374c04..0000000 --- a/hdr_schemata/definitions/HDRUK/Disease.py +++ /dev/null @@ -1,13 +0,0 @@ -from pydantic import BaseModel -from enum import Enum -from typing import Union - -class DiseaseCodeVocabulary(Enum): - ICD10 = 'ICD10' - SNOMED_CT = 'SNOMED CT' - MESH = 'MeSH' - -class Disease(BaseModel): - diseaseCode: Union[str, int] - diseaseCodeVocabulary: DiseaseCodeVocabulary - count: int diff --git a/hdr_schemata/definitions/HDRUK/DiseaseCodeEnum.py b/hdr_schemata/definitions/HDRUK/DiseaseCodeEnum.py new file mode 100644 index 0000000..54d41e7 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/DiseaseCodeEnum.py @@ -0,0 +1,8 @@ +from pydantic import BaseModel +from enum import Enum +from typing import Union + +class DiseaseCodeEnum(Enum): + ICD10 = 'ICD10' + SNOMED_CT = 'SNOMED CT' + MESH = 'MeSH' diff --git a/hdr_schemata/definitions/HDRUK/Ethnicity.py b/hdr_schemata/definitions/HDRUK/EthnicityEnum.py similarity index 93% rename from hdr_schemata/definitions/HDRUK/Ethnicity.py rename to hdr_schemata/definitions/HDRUK/EthnicityEnum.py index b02ee64..aa39379 100644 --- a/hdr_schemata/definitions/HDRUK/Ethnicity.py +++ b/hdr_schemata/definitions/HDRUK/EthnicityEnum.py @@ -1,7 +1,7 @@ from pydantic import BaseModel from enum import Enum -class EthnicityBin(Enum): +class EthnicityEnum(Enum): WHITE___BRITISH = 'White - British' WHITE___IRISH = 'White - Irish' WHITE___ANY_OTHER_WHITE_BACKGROUND = 'White - Any other White background' @@ -20,7 +20,3 @@ class EthnicityBin(Enum): OTHER_ETHNIC_GROUPS___ANY_OTHER_ETHNIC_GROUP = 'Other Ethnic Groups - Any other ethnic group' NOT_STATED = 'Not stated' NOT_KNOWN = 'Not known' - -class Ethnicity(BaseModel): - bin: EthnicityBin - count: int diff --git a/hdr_schemata/definitions/HDRUK/GenderAssignedAtBirth.py b/hdr_schemata/definitions/HDRUK/GenderAssignedAtBirth.py deleted file mode 100644 index ab6bbb7..0000000 --- a/hdr_schemata/definitions/HDRUK/GenderAssignedAtBirth.py +++ /dev/null @@ -1,10 +0,0 @@ -from pydantic import BaseModel -from enum import Enum - -class GenderBin(Enum): - MALE = 'male' - FEMALE = 'female' - -class GenderAssignedAtBirth(BaseModel): - bin: GenderBin - count: int diff --git a/hdr_schemata/definitions/HDRUK/GenderEnum.py b/hdr_schemata/definitions/HDRUK/GenderEnum.py new file mode 100644 index 0000000..0a77a76 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/GenderEnum.py @@ -0,0 +1,6 @@ +from pydantic import BaseModel +from enum import Enum + +class GenderEnum(Enum): + MALE = 'male' + FEMALE = 'female' diff --git a/hdr_schemata/definitions/HDRUK/__init__.py b/hdr_schemata/definitions/HDRUK/__init__.py index 6d3299b..5e76eff 100644 --- a/hdr_schemata/definitions/HDRUK/__init__.py +++ b/hdr_schemata/definitions/HDRUK/__init__.py @@ -1,7 +1,7 @@ from .AbstractText import AbstractText from .AccessMode import AccessMode from .AccessService import AccessService -from .Age import Age +from .AgeEnum import AgeEnum from .AgeRange import AgeRange from .Assay import Assay from .CommaSeparatedIntegers import CommaSeparatedIntegers @@ -13,14 +13,14 @@ from .DataUseRequirements import DataUseRequirements, DataUseRequirementsV2 from .DeliveryLeadTime import DeliveryLeadTime, DeliveryLeadTimeV2 from .Description import Description -from .Disease import Disease +from .DiseaseCodeEnum import DiseaseCodeEnum from .Doi import Doi -from .Ethnicity import Ethnicity +from .EthnicityEnum import EthnicityEnum from .EmailAddress import EmailAddress from .EndDateEnum import EndDateEnum from .Followup import Followup from .Format import Format -from .GenderAssignedAtBirth import GenderAssignedAtBirth +from .GenderEnum import GenderEnum from .ICD_0_3 import ICD_0_3 from .IsPartOfEnum import IsPartOfEnum from .Isocountrycode import Isocountrycode diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index cb92044..a523138 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -179,10 +179,15 @@ "Age": { "properties": { "bin": { - "$ref": "#/$defs/AgeBin" + "allOf": [ + { + "$ref": "#/$defs/AgeEnum" + } + ], + "title": "Age bin" }, "count": { - "title": "Count", + "title": "Age count", "type": "integer" } }, @@ -193,7 +198,7 @@ "title": "Age", "type": "object" }, - "AgeBin": { + "AgeEnum": { "enum": [ "0-6 days", "7-27 days", @@ -220,7 +225,7 @@ "65-99 years", "100+ years" ], - "title": "AgeBin", + "title": "AgeEnum", "type": "string" }, "AgeRange": { @@ -679,13 +684,18 @@ "type": "integer" } ], - "title": "Diseasecode" + "title": "Disease code" }, "diseaseCodeVocabulary": { - "$ref": "#/$defs/DiseaseCodeVocabulary" + "allOf": [ + { + "$ref": "#/$defs/DiseaseCodeEnum" + } + ], + "title": "Disease code vocabulary" }, "count": { - "title": "Count", + "title": "Disease count", "type": "integer" } }, @@ -697,13 +707,13 @@ "title": "Disease", "type": "object" }, - "DiseaseCodeVocabulary": { + "DiseaseCodeEnum": { "enum": [ "ICD10", "SNOMED CT", "MeSH" ], - "title": "DiseaseCodeVocabulary", + "title": "DiseaseCodeEnum", "type": "string" }, "Doi": { @@ -721,10 +731,15 @@ "Ethnicity": { "properties": { "bin": { - "$ref": "#/$defs/EthnicityBin" + "allOf": [ + { + "$ref": "#/$defs/EthnicityEnum" + } + ], + "title": "Ethnicity bin" }, "count": { - "title": "Count", + "title": "Ethnicity count", "type": "integer" } }, @@ -735,7 +750,7 @@ "title": "Ethnicity", "type": "object" }, - "EthnicityBin": { + "EthnicityEnum": { "enum": [ "White - British", "White - Irish", @@ -756,7 +771,7 @@ "Not stated", "Not known" ], - "title": "EthnicityBin", + "title": "EthnicityEnum", "type": "string" }, "Followup": { @@ -848,10 +863,15 @@ "GenderAssignedAtBirth": { "properties": { "bin": { - "$ref": "#/$defs/GenderBin" + "allOf": [ + { + "$ref": "#/$defs/GenderEnum" + } + ], + "title": "Gender assigned at birth bin" }, "count": { - "title": "Count", + "title": "Gender assigned at birth count", "type": "integer" } }, @@ -862,12 +882,12 @@ "title": "GenderAssignedAtBirth", "type": "object" }, - "GenderBin": { + "GenderEnum": { "enum": [ "male", "female" ], - "title": "GenderBin", + "title": "GenderEnum", "type": "string" }, "Linkage": { diff --git a/hdr_schemata/models/GWDM/v2_0/Age.py b/hdr_schemata/models/GWDM/v2_0/Age.py new file mode 100644 index 0000000..5ef5f8f --- /dev/null +++ b/hdr_schemata/models/GWDM/v2_0/Age.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel, Field +from hdr_schemata.definitions.HDRUK.AgeEnum import AgeEnum + +from .annotations import annotations + +an = annotations.demographicFrequency.age + +class Age(BaseModel): + bin: AgeEnum = Field(..., **an.bin.__dict__) + count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py b/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py index 73f6ed6..51a6381 100644 --- a/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py +++ b/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py @@ -1,9 +1,13 @@ from typing import Optional, List, Union from pydantic import BaseModel, Field -from hdr_schemata.definitions.HDRUK import * from .annotations import annotations +from .Age import Age +from .Disease import Disease +from .Ethnicity import Ethnicity +from .GenderAssignedAtBirth import GenderAssignedAtBirth + an = annotations.demographicFrequency @@ -13,18 +17,28 @@ class Config: age: Optional[List[Age]] = Field( ..., - **an.age.__dict__, + title=an.age.title, + description=an.age.description, # json_schema_extra={"guidance": an.age.guidance} ) ethnicity: Optional[List[Ethnicity]] = Field( - ..., **an.ethnicity.__dict__, # json_schema_extra={"guidance": an.ethnicity.guidance} + ..., + title=an.ethnicity.title, + description=an.ethnicity.description, + # json_schema_extra={"guidance": an.ethnicity.guidance} ) genderAssignedAtBirth: Optional[List[GenderAssignedAtBirth]] = Field( - ..., **an.genderAssignedAtBirth.__dict__, # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} + ..., + title=an.genderAssignedAtBirth.title, + description=an.genderAssignedAtBirth.description, + # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} ) disease: Optional[List[Disease]] = Field( - ..., **an.disease.__dict__, # json_schema_extra={"guidance": an.disease.guidance} + ..., + title=an.disease.title, + description=an.disease.description, + # json_schema_extra={"guidance": an.disease.guidance} ) diff --git a/hdr_schemata/models/GWDM/v2_0/Disease.py b/hdr_schemata/models/GWDM/v2_0/Disease.py new file mode 100644 index 0000000..3ffe9d4 --- /dev/null +++ b/hdr_schemata/models/GWDM/v2_0/Disease.py @@ -0,0 +1,16 @@ +from pydantic import BaseModel, Field +from enum import Enum +from typing import Union +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.demographicFrequency.disease + + +class Disease(BaseModel): + diseaseCode: Union[str, int] = Field(..., **an.diseaseCode.__dict__) + diseaseCodeVocabulary: DiseaseCodeEnum = Field( + ..., **an.diseaseCodeVocabulary.__dict__ + ) + count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/Ethnicity.py b/hdr_schemata/models/GWDM/v2_0/Ethnicity.py new file mode 100644 index 0000000..e2402af --- /dev/null +++ b/hdr_schemata/models/GWDM/v2_0/Ethnicity.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel, Field +from enum import Enum +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.demographicFrequency.ethnicity + +class Ethnicity(BaseModel): + bin: EthnicityEnum = Field(..., **an.bin.__dict__) + count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/GenderAssignedAtBirth.py b/hdr_schemata/models/GWDM/v2_0/GenderAssignedAtBirth.py new file mode 100644 index 0000000..b8c137e --- /dev/null +++ b/hdr_schemata/models/GWDM/v2_0/GenderAssignedAtBirth.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel, Field +from enum import Enum +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.demographicFrequency.genderAssignedAtBirth + +class GenderAssignedAtBirth(BaseModel): + bin: GenderEnum = Field(..., **an.bin.__dict__) + count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml b/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml index 41b211f..76f0ac2 100644 --- a/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml +++ b/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml @@ -565,15 +565,33 @@ demographicFrequency: age: title: "Age" description: "Array of age bins and their corresponding counts." + bin: + title: "Age bin" + count: + title: "Age count" ethnicity: title: "Ethnicity" description: "Array of ethnicity bins and their corresponding counts." + bin: + title: "Ethnicity bin" + count: + title: "Ethnicity count" genderAssignedAtBirth: title: "Gender assigned at birth" description: "Gender assigned at birth, male or female, and their corresponding counts." + bin: + title: "Gender assigned at birth bin" + count: + title: "Gender assigned at birth count" disease: title: "Disease" description: "Array of diseases and their corresponding counts." + diseaseCode: + title: "Disease code" + diseaseCodeVocabulary: + title: "Disease code vocabulary" + count: + title: "Disease count" omics: title: "Omics" description: "Omics" diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 5f18146..56d33eb 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -206,10 +206,15 @@ "Age": { "properties": { "bin": { - "$ref": "#/$defs/AgeBin" + "allOf": [ + { + "$ref": "#/$defs/AgeEnum" + } + ], + "title": "Age bin" }, "count": { - "title": "Count", + "title": "Age count", "type": "integer" } }, @@ -220,7 +225,7 @@ "title": "Age", "type": "object" }, - "AgeBin": { + "AgeEnum": { "enum": [ "0-6 days", "7-27 days", @@ -247,7 +252,7 @@ "65-99 years", "100+ years" ], - "title": "AgeBin", + "title": "AgeEnum", "type": "string" }, "Assay": { @@ -896,13 +901,18 @@ "type": "integer" } ], - "title": "Diseasecode" + "title": "Disease code" }, "diseaseCodeVocabulary": { - "$ref": "#/$defs/DiseaseCodeVocabulary" + "allOf": [ + { + "$ref": "#/$defs/DiseaseCodeEnum" + } + ], + "title": "Disease code vocabulary" }, "count": { - "title": "Count", + "title": "Disease count", "type": "integer" } }, @@ -914,13 +924,13 @@ "title": "Disease", "type": "object" }, - "DiseaseCodeVocabulary": { + "DiseaseCodeEnum": { "enum": [ "ICD10", "SNOMED CT", "MeSH" ], - "title": "DiseaseCodeVocabulary", + "title": "DiseaseCodeEnum", "type": "string" }, "Documentation": { @@ -1167,10 +1177,15 @@ "Ethnicity": { "properties": { "bin": { - "$ref": "#/$defs/EthnicityBin" + "allOf": [ + { + "$ref": "#/$defs/EthnicityEnum" + } + ], + "title": "Ethnicity bin" }, "count": { - "title": "Count", + "title": "Ethnicity count", "type": "integer" } }, @@ -1181,7 +1196,7 @@ "title": "Ethnicity", "type": "object" }, - "EthnicityBin": { + "EthnicityEnum": { "enum": [ "White - British", "White - Irish", @@ -1202,7 +1217,7 @@ "Not stated", "Not known" ], - "title": "EthnicityBin", + "title": "EthnicityEnum", "type": "string" }, "Followup": { @@ -1282,10 +1297,15 @@ "GenderAssignedAtBirth": { "properties": { "bin": { - "$ref": "#/$defs/GenderBin" + "allOf": [ + { + "$ref": "#/$defs/GenderEnum" + } + ], + "title": "Gender assigned at birth bin" }, "count": { - "title": "Count", + "title": "Gender assigned at birth count", "type": "integer" } }, @@ -1296,12 +1316,12 @@ "title": "GenderAssignedAtBirth", "type": "object" }, - "GenderBin": { + "GenderEnum": { "enum": [ "male", "female" ], - "title": "GenderBin", + "title": "GenderEnum", "type": "string" }, "GenderType": { diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Age.py b/hdr_schemata/models/HDRUK/v3_0_0/Age.py new file mode 100644 index 0000000..5ef5f8f --- /dev/null +++ b/hdr_schemata/models/HDRUK/v3_0_0/Age.py @@ -0,0 +1,10 @@ +from pydantic import BaseModel, Field +from hdr_schemata.definitions.HDRUK.AgeEnum import AgeEnum + +from .annotations import annotations + +an = annotations.demographicFrequency.age + +class Age(BaseModel): + bin: AgeEnum = Field(..., **an.bin.__dict__) + count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py b/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py index 73f6ed6..51a6381 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py @@ -1,9 +1,13 @@ from typing import Optional, List, Union from pydantic import BaseModel, Field -from hdr_schemata.definitions.HDRUK import * from .annotations import annotations +from .Age import Age +from .Disease import Disease +from .Ethnicity import Ethnicity +from .GenderAssignedAtBirth import GenderAssignedAtBirth + an = annotations.demographicFrequency @@ -13,18 +17,28 @@ class Config: age: Optional[List[Age]] = Field( ..., - **an.age.__dict__, + title=an.age.title, + description=an.age.description, # json_schema_extra={"guidance": an.age.guidance} ) ethnicity: Optional[List[Ethnicity]] = Field( - ..., **an.ethnicity.__dict__, # json_schema_extra={"guidance": an.ethnicity.guidance} + ..., + title=an.ethnicity.title, + description=an.ethnicity.description, + # json_schema_extra={"guidance": an.ethnicity.guidance} ) genderAssignedAtBirth: Optional[List[GenderAssignedAtBirth]] = Field( - ..., **an.genderAssignedAtBirth.__dict__, # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} + ..., + title=an.genderAssignedAtBirth.title, + description=an.genderAssignedAtBirth.description, + # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} ) disease: Optional[List[Disease]] = Field( - ..., **an.disease.__dict__, # json_schema_extra={"guidance": an.disease.guidance} + ..., + title=an.disease.title, + description=an.disease.description, + # json_schema_extra={"guidance": an.disease.guidance} ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Disease.py b/hdr_schemata/models/HDRUK/v3_0_0/Disease.py new file mode 100644 index 0000000..3ffe9d4 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v3_0_0/Disease.py @@ -0,0 +1,16 @@ +from pydantic import BaseModel, Field +from enum import Enum +from typing import Union +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.demographicFrequency.disease + + +class Disease(BaseModel): + diseaseCode: Union[str, int] = Field(..., **an.diseaseCode.__dict__) + diseaseCodeVocabulary: DiseaseCodeEnum = Field( + ..., **an.diseaseCodeVocabulary.__dict__ + ) + count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Ethnicity.py b/hdr_schemata/models/HDRUK/v3_0_0/Ethnicity.py new file mode 100644 index 0000000..e2402af --- /dev/null +++ b/hdr_schemata/models/HDRUK/v3_0_0/Ethnicity.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel, Field +from enum import Enum +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.demographicFrequency.ethnicity + +class Ethnicity(BaseModel): + bin: EthnicityEnum = Field(..., **an.bin.__dict__) + count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/GenderAssignedAtBirth.py b/hdr_schemata/models/HDRUK/v3_0_0/GenderAssignedAtBirth.py new file mode 100644 index 0000000..b8c137e --- /dev/null +++ b/hdr_schemata/models/HDRUK/v3_0_0/GenderAssignedAtBirth.py @@ -0,0 +1,11 @@ +from pydantic import BaseModel, Field +from enum import Enum +from hdr_schemata.definitions.HDRUK import * + +from .annotations import annotations + +an = annotations.demographicFrequency.genderAssignedAtBirth + +class GenderAssignedAtBirth(BaseModel): + bin: GenderEnum = Field(..., **an.bin.__dict__) + count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index 03d4f38..1242fc8 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -409,15 +409,33 @@ demographicFrequency: age: title: "Age" description: "Array of age bins and their corresponding counts." + bin: + title: "Age bin" + count: + title: "Age count" ethnicity: title: "Ethnicity" description: "Array of ethnicity bins and their corresponding counts." + bin: + title: "Ethnicity bin" + count: + title: "Ethnicity count" genderAssignedAtBirth: title: "Gender assigned at birth" description: "Gender assigned at birth, male or female, and their corresponding counts." + bin: + title: "Gender assigned at birth bin" + count: + title: "Gender assigned at birth count" disease: title: "Disease" description: "Array of diseases and their corresponding counts." + diseaseCode: + title: "Disease code" + diseaseCodeVocabulary: + title: "Disease code vocabulary" + count: + title: "Disease count" omics: title: "Omics" description: "Omics" diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index b060d26..3cd4542 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -244,10 +244,10 @@ def remove_types(data): from hdr_schemata.models.GWDM.v2_0 import Gwdm20 -create_markdown(Hdruk220, dir_path+"/../../docs/HDRUK", "2.2.0") -create_markdown(Hdruk221, dir_path+"/../../docs/HDRUK", "2.2.1") -create_markdown(Hdruk212, dir_path+"/../../docs/HDRUK", "2.1.2") -create_markdown(Hdruk213, dir_path+"/../../docs/HDRUK", "2.1.3") +# create_markdown(Hdruk220, dir_path+"/../../docs/HDRUK", "2.2.0") +# create_markdown(Hdruk221, dir_path+"/../../docs/HDRUK", "2.2.1") +# create_markdown(Hdruk212, dir_path+"/../../docs/HDRUK", "2.1.2") +# create_markdown(Hdruk213, dir_path+"/../../docs/HDRUK", "2.1.3") create_markdown(Hdruk300, dir_path+"/../../docs/HDRUK", "3.0.0") from hdr_schemata.models.GWDM.v1_1 import Gwdm10 @@ -255,7 +255,7 @@ def remove_types(data): from hdr_schemata.models.GWDM.v1_2 import Gwdm12 from hdr_schemata.models.GWDM.v2_0 import Gwdm20 -create_markdown(Gwdm10, dir_path+"/../../docs/GWDM", "1.0") -create_markdown(Gwdm11, dir_path+"/../../docs/GWDM", "1.1") -create_markdown(Gwdm12, dir_path+"/../../docs/GWDM", "1.2") -create_markdown(Gwdm20, dir_path+"/../../docs/GWDM", "2.0") +# create_markdown(Gwdm10, dir_path+"/../../docs/GWDM", "1.0") +# create_markdown(Gwdm11, dir_path+"/../../docs/GWDM", "1.1") +# create_markdown(Gwdm12, dir_path+"/../../docs/GWDM", "1.2") +# create_markdown(Gwdm20, dir_path+"/../../docs/GWDM", "2.0") From 52ff107c8ea6547c98453f54bef12c0b5cb920c7 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Fri, 16 Aug 2024 12:06:43 +0100 Subject: [PATCH 07/23] debugging from onboarding form --- docs/HDRUK/3.0.0.form.json | 12 +++---- docs/HDRUK/3.0.0.md | 36 +++++++++---------- docs/HDRUK/3.0.0.structure.json | 12 +++---- hdr_schemata/models/HDRUK/3.0.0/schema.json | 31 +++++++++++++--- .../models/HDRUK/v3_0_0/FormatAndStandards.py | 5 +++ hdr_schemata/models/HDRUK/v3_0_0/Summary.py | 4 +++ .../HDRUK/v3_0_0/annotations/config.yaml | 11 +++--- 7 files changed, 71 insertions(+), 40 deletions(-) diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index bc7f219..5f2ee42 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -296,7 +296,7 @@ "required": false, "title": "Alternate dataset identifiers", "description": "Alternate dataset identifiers or local identifiers.", - "guidance": "", + "guidance": "Alternate dataset identifiers or local identifiers.", "examples": null, "is_list": false, "is_optional": true, @@ -311,7 +311,7 @@ "required": false, "title": "Description", "description": "A free-text description of the dataset.\\nA URL can also be provided as the description of the dataset.\\nGateway Feature: Keywords and text may be extracted out of the description and indexed for search.", - "guidance": "- An HTML account of the data that **provides context and scope** of the data, **limited to 3000 characters, and/or a resolvable URL** that describes the dataset.\\n- Additional information can be recorded and included using the Associated media field.", + "guidance": "- An HTML account of the data that **provides context and scope** of the data, **limited to 10000 characters, and/or a resolvable URL** that describes the dataset.\\n- Additional information can be recorded and included using the Associated media field.", "examples": null, "is_list": false, "is_optional": false, @@ -992,7 +992,7 @@ "required": true, "title": "Data Controller", "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", - "guidance": "- Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.\\n- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.\\n- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.\\n- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.\\n- Example: NHS England'", + "guidance": "- Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.\\n- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.\\n- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.\\n- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.\\n- Example: NHS England", "examples": [ "NHS England" ], @@ -1664,7 +1664,7 @@ }, { "required": true, - "title": "Age bin", + "title": "Age grouping", "description": null, "guidance": "", "examples": null, @@ -1714,7 +1714,7 @@ }, { "required": true, - "title": "Ethnicity bin", + "title": "Ethnicity grouping", "description": null, "guidance": "", "examples": null, @@ -1758,7 +1758,7 @@ }, { "required": true, - "title": "Gender assigned at birth bin", + "title": "Gender assigned at birth", "description": null, "guidance": "", "examples": null, diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index 88e92ce..4212032 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -253,9 +253,9 @@ Examples: Alternate dataset identifiers or local identifiers. -| title | guidance | is_list | required | type | -|:------------------------------|:-----------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------| -| Alternate dataset identifiers | | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | +| title | guidance | is_list | required | type | +|:------------------------------|:----------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------| +| Alternate dataset identifiers | Alternate dataset identifiers or local identifiers. | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | @@ -273,9 +273,9 @@ Documentation can include a rich text description of the dataset or links to med A free-text description of the dataset.
A URL can also be provided as the description of the dataset.
Gateway Feature: Keywords and text may be extracted out of the description and indexed for search. -| title | guidance | is_list | required | type | -|:------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------| -| Description | - An HTML account of the data that **provides context and scope** of the data, **limited to 3000 characters, and/or a resolvable URL** that describes the dataset.
- Additional information can be recorded and included using the Associated media field. | False | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]"] | +| title | guidance | is_list | required | type | +|:------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------| +| Description | - An HTML account of the data that **provides context and scope** of the data, **limited to 10000 characters, and/or a resolvable URL** that describes the dataset.
- Additional information can be recorded and included using the Associated media field. | False | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]"] | @@ -694,9 +694,9 @@ Please use country code from ISO 3166-1 country codes and the associated ISO 316 Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed. -| title | guidance | is_list | required | type | -|:----------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------| -| Data Controller | - Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.
- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.
- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.
- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.
- Example: NHS England' | False | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| title | guidance | is_list | required | type | +|:----------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | - Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.
- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.
- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.
- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.
- Example: NHS England | False | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -1238,9 +1238,9 @@ Array of age bins and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Age bin | | False | True | ["AgeEnum['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']"] | +| title | guidance | is_list | required | type | +|:-------------|:-----------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Age grouping | | False | True | ["AgeEnum['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']"] | @@ -1269,9 +1269,9 @@ Array of ethnicity bins and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------------|:-----------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Ethnicity bin | | False | True | ["EthnicityEnum['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']"] | +| title | guidance | is_list | required | type | +|:-------------------|:-----------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Ethnicity grouping | | False | True | ["EthnicityEnum['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']"] | @@ -1300,9 +1300,9 @@ Gender assigned at birth, male or female, and their corresponding counts. None -| title | guidance | is_list | required | type | -|:-----------------------------|:-----------|:----------|:-----------|:--------------------------------| -| Gender assigned at birth bin | | False | True | ["GenderEnum['male','female']"] | +| title | guidance | is_list | required | type | +|:-------------------------|:-----------|:----------|:-----------|:--------------------------------| +| Gender assigned at birth | | False | True | ["GenderEnum['male','female']"] | diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index f745e41..b8adf49 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -328,7 +328,7 @@ "required": false, "title": "Alternate dataset identifiers", "description": "Alternate dataset identifiers or local identifiers.", - "guidance": "", + "guidance": "Alternate dataset identifiers or local identifiers.", "examples": null, "type": [ "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", @@ -360,7 +360,7 @@ "required": false, "title": "Description", "description": "A free-text description of the dataset.A URL can also be provided as the description of the dataset.Gateway Feature: Keywords and text may be extracted out of the description and indexed for search.", - "guidance": "An HTML account of the data that provides context and scope of the data, limited to 3000 characters, and/or a resolvable URL that describes the dataset.- Additional information can be recorded and included using the Associated media field.", + "guidance": "An HTML account of the data that provides context and scope of the data, limited to 10000 characters, and/or a resolvable URL that describes the dataset.- Additional information can be recorded and included using the Associated media field.", "examples": null, "type": [ "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]" @@ -921,7 +921,7 @@ "required": true, "title": "Data Controller", "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", - "guidance": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.- Example: NHS England'", + "guidance": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.- Example: NHS England", "examples": [ "NHS England" ], @@ -1664,7 +1664,7 @@ { "name": "bin", "required": true, - "title": "Age bin", + "title": "Age grouping", "description": null, "guidance": "", "examples": null, @@ -1705,7 +1705,7 @@ { "name": "bin", "required": true, - "title": "Ethnicity bin", + "title": "Ethnicity grouping", "description": null, "guidance": "", "examples": null, @@ -1746,7 +1746,7 @@ { "name": "bin", "required": true, - "title": "Gender assigned at birth bin", + "title": "Gender assigned at birth", "description": null, "guidance": "", "examples": null, diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 56d33eb..eac6685 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -118,7 +118,7 @@ "examples": [ "NHS England" ], - "guidance": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.- Example: NHS England'", + "guidance": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.- Example: NHS England", "title": "Data Controller" }, "dataProcessor": { @@ -211,7 +211,7 @@ "$ref": "#/$defs/AgeEnum" } ], - "title": "Age bin" + "title": "Age grouping" }, "count": { "title": "Age count", @@ -944,7 +944,7 @@ ], "default": null, "description": "A free-text description of the dataset.A URL can also be provided as the description of the dataset.Gateway Feature: Keywords and text may be extracted out of the description and indexed for search.", - "guidance": "An HTML account of the data that provides context and scope of the data, limited to 3000 characters, and/or a resolvable URL that describes the dataset.- Additional information can be recorded and included using the Associated media field.", + "guidance": "An HTML account of the data that provides context and scope of the data, limited to 10000 characters, and/or a resolvable URL that describes the dataset.- Additional information can be recorded and included using the Associated media field.", "title": "Description" }, "associatedMedia": { @@ -1182,7 +1182,7 @@ "$ref": "#/$defs/EthnicityEnum" } ], - "title": "Ethnicity bin" + "title": "Ethnicity grouping" }, "count": { "title": "Ethnicity count", @@ -1275,7 +1275,24 @@ "type": "array" }, "format": { +<<<<<<< HEAD "default": null, +======= + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Format" + }, + "type": "array" + }, + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], +>>>>>>> 5c24292 (debugging from onboarding form) "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", "examples": [ "text/tab-separated-values", @@ -1302,7 +1319,7 @@ "$ref": "#/$defs/GenderEnum" } ], - "title": "Gender assigned at birth bin" + "title": "Gender assigned at birth" }, "count": { "title": "Gender assigned at birth count", @@ -2207,6 +2224,9 @@ }, "type": "array" }, + { + "$ref": "#/$defs/CommaSeparatedValues" + }, { "type": "null" } @@ -2277,6 +2297,7 @@ ], "default": null, "description": "Alternate dataset identifiers or local identifiers.", + "guidance": "Alternate dataset identifiers or local identifiers.", "title": "Alternate dataset identifiers" } }, diff --git a/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py b/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py index 4ac4cdc..9a6be6e 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py @@ -29,6 +29,11 @@ class Config: json_schema_extra={"guidance": an.language.guidance} ) +<<<<<<< HEAD format: List[Format] = Field( None, **an.format.__dict__, json_schema_extra={"guidance": an.format.guidance} +======= + format: Union[List[Format], Optional[CommaSeparatedValues]] = Field( + ..., **an.format.__dict__, json_schema_extra={"guidance": an.format.guidance} +>>>>>>> 5c24292 (debugging from onboarding form) ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Summary.py b/hdr_schemata/models/HDRUK/v3_0_0/Summary.py index bc731f9..a7efd5c 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Summary.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Summary.py @@ -29,7 +29,11 @@ class Config: ..., **an.populationSize.__dict__, json_schema_extra={"guidance": an.populationSize.guidance} ) +<<<<<<< HEAD keywords: Optional[List[OneHundredFiftyCharacters]] = ( +======= + keywords: Optional[Union[List[OneHundredFiftyCharacters], CommaSeparatedValues]] = ( +>>>>>>> 5c24292 (debugging from onboarding form) Field(..., **an.keywords.__dict__, json_schema_extra={"guidance": an.keywords.guidance}) ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index 1242fc8..f9b9f6a 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -66,6 +66,7 @@ summary: alternateIdentifiers: title: "Alternate dataset identifiers" description: "Alternate dataset identifiers or local identifiers." + guidance: "Alternate dataset identifiers or local identifiers." provenance: title: "Provenance" description: "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness." @@ -222,7 +223,7 @@ accessibility: title: "Access request cost" description: "Please provide link(s) to a webpage or description detailing the service or cost model for processing data access requests." dataController: - guidance: "- Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.\\n- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.\\n- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.\\n- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.\\n- Example: NHS England'" + guidance: "- Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.\\n- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.\\n- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.\\n- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.\\n- Example: NHS England" title: "Data Controller" examples: - "NHS England" @@ -339,7 +340,7 @@ documentation: title: "Associated media" description: "Please provide any media associated with the Gateway Organisation using a valid URL for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal." description: - guidance: "- An HTML account of the data that **provides context and scope** of the data, **limited to 3000 characters, and/or a resolvable URL** that describes the dataset.\\n- Additional information can be recorded and included using the Associated media field." + guidance: "- An HTML account of the data that **provides context and scope** of the data, **limited to 10000 characters, and/or a resolvable URL** that describes the dataset.\\n- Additional information can be recorded and included using the Associated media field." title: "Description" description: "A free-text description of the dataset.\\nA URL can also be provided as the description of the dataset.\\nGateway Feature: Keywords and text may be extracted out of the description and indexed for search." coverage: @@ -410,21 +411,21 @@ demographicFrequency: title: "Age" description: "Array of age bins and their corresponding counts." bin: - title: "Age bin" + title: "Age grouping" count: title: "Age count" ethnicity: title: "Ethnicity" description: "Array of ethnicity bins and their corresponding counts." bin: - title: "Ethnicity bin" + title: "Ethnicity grouping" count: title: "Ethnicity count" genderAssignedAtBirth: title: "Gender assigned at birth" description: "Gender assigned at birth, male or female, and their corresponding counts." bin: - title: "Gender assigned at birth bin" + title: "Gender assigned at birth" count: title: "Gender assigned at birth count" disease: From 86b40993a5a403bbd76ea5ad9c6b74899befd231 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Fri, 16 Aug 2024 16:45:21 +0100 Subject: [PATCH 08/23] align is optional and is required --- docs/GWDM/2.0.form.json | 81 +++++------- docs/GWDM/2.0.md | 121 ++++++++---------- docs/GWDM/2.0.structure.json | 102 +++++++-------- docs/HDRUK/3.0.0.form.json | 44 +++---- docs/HDRUK/3.0.0.md | 44 +++---- docs/HDRUK/3.0.0.structure.json | 56 ++++---- hdr_schemata/models/GWDM/2.0/schema.json | 73 +++++------ hdr_schemata/models/GWDM/v2_0/Access.py | 7 +- hdr_schemata/models/GWDM/v2_0/DataTable.py | 2 +- .../models/GWDM/v2_0/DemographicFrequency.py | 8 +- .../models/GWDM/v2_0/FormatAndStandards.py | 8 +- hdr_schemata/models/GWDM/v2_0/Omics.py | 4 +- hdr_schemata/models/GWDM/v2_0/Organisation.py | 2 +- hdr_schemata/models/GWDM/v2_0/Provenance.py | 2 +- hdr_schemata/models/GWDM/v2_0/Publisher.py | 2 +- hdr_schemata/models/GWDM/v2_0/Summary.py | 16 +-- hdr_schemata/models/GWDM/v2_0/Temporal.py | 2 +- hdr_schemata/models/GWDM/v2_0/Usage.py | 4 +- hdr_schemata/models/GWDM/v2_0/__init__.py | 2 +- hdr_schemata/models/HDRUK/3.0.0/schema.json | 76 +++++------ hdr_schemata/models/HDRUK/v3_0_0/Access.py | 4 +- hdr_schemata/models/HDRUK/v3_0_0/DataTable.py | 2 +- .../models/HDRUK/v3_0_0/DatasetDescriptor.py | 6 +- .../HDRUK/v3_0_0/DemographicFrequency.py | 8 +- .../models/HDRUK/v3_0_0/Documentation.py | 2 +- .../models/HDRUK/v3_0_0/FormatAndStandards.py | 5 - hdr_schemata/models/HDRUK/v3_0_0/Omics.py | 4 +- .../models/HDRUK/v3_0_0/Organisation.py | 2 +- hdr_schemata/models/HDRUK/v3_0_0/Origin.py | 2 +- .../models/HDRUK/v3_0_0/Provenance.py | 2 +- hdr_schemata/models/HDRUK/v3_0_0/Revision.py | 2 +- hdr_schemata/models/HDRUK/v3_0_0/Summary.py | 8 +- hdr_schemata/utils/create_markdown.py | 2 +- 33 files changed, 311 insertions(+), 394 deletions(-) diff --git a/docs/GWDM/2.0.form.json b/docs/GWDM/2.0.form.json index 1c79457..4058d53 100644 --- a/docs/GWDM/2.0.form.json +++ b/docs/GWDM/2.0.form.json @@ -107,7 +107,7 @@ "location": "summary.title" }, { - "required": true, + "required": false, "title": "Short Title", "description": "A shorter descriptive title of the dataset", "guidance": "", @@ -125,7 +125,7 @@ "location": "summary.shortTitle" }, { - "required": true, + "required": false, "title": "DOI Name", "description": "DOI associated to this dataset", "guidance": "- Please note: This is **not** the DOI of the publication(s) associated with the dataset.\\n- All HDR UK registered **datasets** should either have a **(DOI)** or be working towards obtaining one.\\n- If a DOI is available, please provide the DOI.\\n- **What happens if I do not have a DOI?**: Contact your academic organisation to find out if there is an existing relationship with a DOI provider. If that is not available, sites such as figshare offer free services to mint a DOI for your dataset. Subsequent versions of the Metadata Exchange will provide a DOI minting service.", @@ -160,7 +160,7 @@ "location": "summary.abstract" }, { - "required": true, + "required": false, "title": "Keywords", "description": "Comma separated key words associated to this dataset.", "guidance": "- Please provide **relevant** and **specific keywords** that can **improve the search engine optimization** of your dataset.\\n- Please **enter one keyword at a time** and click **Add New Field** to add further keywords.\\n- Text from the title is automatically included in the search, there is no need to include this in the keywords.\\n- Include words that researcher may include in their searches.", @@ -177,7 +177,7 @@ "location": "summary.keywords" }, { - "required": true, + "required": false, "title": "Controlled Keywords", "description": "Keywords that have been filtered and limited", "guidance": "", @@ -192,7 +192,7 @@ "location": "summary.controlledKeywords" }, { - "required": true, + "required": false, "title": "Contact Point", "description": "email of a person who can be the main contact point of this dataset", "guidance": "Organisations are expected to provide a dedicated email address associated with the data access request process. If no contact point is provided in this field, this field will be defaulted to the teams support email provided in the teams setting.\\n**Note:** An employee's email address can only be provided on a temporary basis and if one is provided, **you must obtain explicit consent for this purpose**.", @@ -205,7 +205,7 @@ "location": "summary.contactPoint" }, { - "required": true, + "required": false, "title": "Dataset type", "description": "What type of dataset is this?", "guidance": "", @@ -221,7 +221,7 @@ "location": "summary.datasetType" }, { - "required": true, + "required": false, "title": "Description", "description": "Longer description of the dataset in detail", "guidance": "", @@ -239,7 +239,7 @@ "location": "summary.description" }, { - "required": true, + "required": false, "title": "Organisation Name", "description": "Name of the organisation", "guidance": "", @@ -472,7 +472,7 @@ "location": "provenance.origin.imageContrast" }, { - "required": true, + "required": false, "title": "Start Date", "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", "guidance": "- The start of the time period that the dataset provides coverage for.\\n- If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", @@ -561,7 +561,7 @@ "location": "provenance.temporal.distributionReleaseDate" }, { - "required": true, + "required": false, "title": "Data Use Limitation", "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", "guidance": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used.\\n- **General research use**: This data use limitation indicates that use is allowed for general research use for any research purpose.\\n- **Genetic studies only**: This data use limitation indicates that use is limited to genetic studies only (i.e., no phenotype-only research).\\n- **No general methods research**: This data use limitation indicates that use includes methods development research(e.g., development of software or algorithms) only within the bounds of other use limitations.\\n- **No restriction**: This data use limitation indicates there is no restriction on use.\\n- **Research-specific restrictions**: This data use limitation indicates that use is limited to studies of a certain research type.\\n- **Research use only**: This data use limitation indicates that use is limited to research purposes (e.g., does not include its use in clinical care).\\n- **No linkage**: This data use limitation indicates there is a restriction on linking to any other datasets", @@ -576,7 +576,7 @@ "location": "accessibility.usage.dataUseLimitation" }, { - "required": true, + "required": false, "title": "Data Use Requirements", "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", "guidance": "- Please indicate if there are any additional conditions set for use if any, multiple requirements may be provided.\\n- Please ensure that these restrictions are documented in access rights information.\\n- **Collaboration required**: This requirement indicates that the requestor must either agree to join a research consortium or collaborate with the primary study investigator(s).\\n- **Ethics approval required**: This requirement indicates that the requestor must provide documentation of local institutional review board (IRB)/ ethics review board (ERB) approval.\\n- **Geographical restrictions**: This requirement indicates that use is limited to within a specific geographic region.\\n- **Institution-specific restrictions**: This requirement indicates that use is limited to use within an approved institution.\\n- **Not for profit use**: This requirement indicates that use of the data is limited to not-for-profit organizations and not-for-profit use, non-commercial use.\\n- **Project-specific restrictions**: This requirement indicates that use is limited to use within an approved project.\\n- **Publication moratorium**: This requirement indicates that requestor agrees not to publish results of studies until a specific date.\\n- **Publication required**: This requirement indicates that requestor agrees to make results of studies using the data available to the larger scientific community.\\n- **Return to database or resource**: This requirement indicates that the requestor must return derived/enriched data to the database/resource.\\n- **Time limit on use**: This requirement indicates that use is approved for a specific number of months.\\n- **User-specific restriction**: This requirement indicates that use is limited to use by approved users.", @@ -591,7 +591,7 @@ "location": "accessibility.usage.dataUseRequirement" }, { - "required": true, + "required": false, "title": "Organisation Name", "description": "Name of the organisation", "guidance": "", @@ -626,7 +626,7 @@ "location": "accessibility.usage.resourceCreator.rorId" }, { - "required": true, + "required": false, "title": "Access Rights", "description": "Please provide details for the data access rights", "guidance": "- The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both separated by a comma.\\n- If such a resource or the underlying process doesn\u2019t exist, please provide \u201cIn Progress\u201d, until both the process and the documentation are ready.", @@ -700,7 +700,7 @@ "location": "accessibility.access.deliveryLeadTime" }, { - "required": true, + "required": false, "title": "Jurisdiction", "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", "guidance": "A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/", @@ -715,10 +715,10 @@ "location": "accessibility.access.jurisdiction" }, { - "required": true, + "required": false, "title": "Data Controller", "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", - "guidance": "- Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.\\n- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.\\n- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.\\n- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.\\n- Example: NHS England'", + "guidance": "- Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.\\n- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.\\n- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.\\n- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.\\n- Example: NHS England", "examples": [ "NHS England" ], @@ -770,23 +770,6 @@ }, { "required": false, - "title": "Access mode", - "description": "Indication of the application type to enable research access.", - "guidance": "Indicate whether a Researcher will need to join a consortium to enable access to the dataset, or if a Researcher will need to submit an application which describes a specific project and required dataset fields required.", - "examples": null, - "is_list": false, - "is_optional": true, - "types": { - "type": "string", - "options": [ - "Join research consortium", - "New project" - ] - }, - "location": "accessibility.access.accessMode" - }, - { - "required": true, "title": "Controlled Vocabulary", "description": "Code value of the ontology vocabulary encoding", "guidance": "", @@ -803,7 +786,7 @@ "location": "accessibility.formatAndStandards.vocabularyEncodingSchemes" }, { - "required": true, + "required": false, "title": "Conforms To", "description": "What the vocabulary conforms to.", "guidance": "- List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.\\n- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.\\n- **HL7 FHIR**: .\\n- **HL7 V2**: .\\n- **HL7 CDA**: .\\n- **HL7 CCOW**: .\\n- **DICOM**: .\\n- **I2B2**: .\\n- **IHE**: .\\n- **OMOP**: .\\n- **openEHR**: .\\n- **Sentinel**: .\\n- **PCORnet**: .\\n- **CDISC**: .\\n- **Local**: In-house developed data model.\\n- **Other**: Other standardised data model.\\n- **NHS Data Dictionary**: .\\n- **NHS Scotland Data Dictionary**: .\\n- **NHS Wales Data Dictionary**: .", @@ -820,7 +803,7 @@ "location": "accessibility.formatAndStandards.conformsTo" }, { - "required": true, + "required": false, "title": "Language Code(s)", "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", "guidance": "", @@ -837,7 +820,7 @@ "location": "accessibility.formatAndStandards.languages" }, { - "required": true, + "required": false, "title": "Dataset Format", "description": "Format(s) the dataset can be made available in", "guidance": "", @@ -1064,7 +1047,7 @@ "required": true, "title": "Statistical Population", "description": "Please select one of the following statistical populations for you observation", - "guidance": "", + "guidance": "- **Persons**: Unique persons recorded in the dataset\\n- **Events**: Unique events such as procedures and prescriptions within the dataset\\n-**Findings**: Unique findings included in the dataset such as diagnoses'\\n-**Number of scans per modality**: Unique scans for a specified imaging method modality (e.g. 12 x-rays)", "examples": [ "PERSONS" ], @@ -1079,7 +1062,7 @@ "Number of scans per modality" ] }, - "location": "observations.measuredNode" + "location": "observations.observedNode" }, { "required": true, @@ -1425,7 +1408,7 @@ }, { "required": true, - "title": null, + "title": "Age bin", "description": null, "guidance": "", "examples": null, @@ -1464,7 +1447,7 @@ }, { "required": true, - "title": null, + "title": "Age count", "description": null, "guidance": "", "examples": null, @@ -1475,7 +1458,7 @@ }, { "required": true, - "title": null, + "title": "Ethnicity bin", "description": null, "guidance": "", "examples": null, @@ -1508,7 +1491,7 @@ }, { "required": true, - "title": null, + "title": "Ethnicity count", "description": null, "guidance": "", "examples": null, @@ -1519,7 +1502,7 @@ }, { "required": true, - "title": null, + "title": "Gender assigned at birth bin", "description": null, "guidance": "", "examples": null, @@ -1536,7 +1519,7 @@ }, { "required": true, - "title": null, + "title": "Gender assigned at birth count", "description": null, "guidance": "", "examples": null, @@ -1547,7 +1530,7 @@ }, { "required": true, - "title": null, + "title": "Disease code", "description": null, "guidance": "", "examples": null, @@ -1558,7 +1541,7 @@ }, { "required": true, - "title": null, + "title": "Disease code vocabulary", "description": null, "guidance": "", "examples": null, @@ -1576,7 +1559,7 @@ }, { "required": true, - "title": null, + "title": "Disease count", "description": null, "guidance": "", "examples": null, @@ -1586,7 +1569,7 @@ "location": "demographicFrequency.disease.count" }, { - "required": true, + "required": false, "title": "Omics assay", "description": "The specific 'omics assay that generated the dataset.", "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", @@ -1618,7 +1601,7 @@ "location": "omics.assay" }, { - "required": true, + "required": false, "title": "Omics Platform", "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", diff --git a/docs/GWDM/2.0.md b/docs/GWDM/2.0.md index d4d665c..1f5b2b1 100644 --- a/docs/GWDM/2.0.md +++ b/docs/GWDM/2.0.md @@ -128,7 +128,7 @@ A shorter descriptive title of the dataset | title | guidance | is_list | required | type | |:------------|:-----------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------| -| Short Title | | False | True | ["ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Short Title | | False | False | ["ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -141,7 +141,7 @@ DOI associated to this dataset | title | guidance | is_list | required | type | |:---------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------| -| DOI Name | - Please note: This is **not** the DOI of the publication(s) associated with the dataset.
- All HDR UK registered **datasets** should either have a **(DOI)** or be working towards obtaining one.
- If a DOI is available, please provide the DOI.
- **What happens if I do not have a DOI?**: Contact your academic organisation to find out if there is an existing relationship with a DOI provider. If that is not available, sites such as figshare offer free services to mint a DOI for your dataset. Subsequent versions of the Metadata Exchange will provide a DOI minting service. | False | True | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| DOI Name | - Please note: This is **not** the DOI of the publication(s) associated with the dataset.
- All HDR UK registered **datasets** should either have a **(DOI)** or be working towards obtaining one.
- If a DOI is available, please provide the DOI.
- **What happens if I do not have a DOI?**: Contact your academic organisation to find out if there is an existing relationship with a DOI provider. If that is not available, sites such as figshare offer free services to mint a DOI for your dataset. Subsequent versions of the Metadata Exchange will provide a DOI minting service. | False | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -167,7 +167,7 @@ Comma separated key words associated to this dataset. | title | guidance | is_list | required | type | |:---------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Keywords | - Please provide **relevant** and **specific keywords** that can **improve the search engine optimization** of your dataset.
- Please **enter one keyword at a time** and click **Add New Field** to add further keywords.
- Text from the title is automatically included in the search, there is no need to include this in the keywords.
- Include words that researcher may include in their searches. | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Keywords | - Please provide **relevant** and **specific keywords** that can **improve the search engine optimization** of your dataset.
- Please **enter one keyword at a time** and click **Add New Field** to add further keywords.
- Text from the title is automatically included in the search, there is no need to include this in the keywords.
- Include words that researcher may include in their searches. | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -180,7 +180,7 @@ Keywords that have been filtered and limited | title | guidance | is_list | required | type | |:--------------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Controlled Keywords | | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Controlled Keywords | | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -191,7 +191,7 @@ email of a person who can be the main contact point of this dataset | title | guidance | is_list | required | type | |:--------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------| -| Contact Point | Organisations are expected to provide a dedicated email address associated with the data access request process. If no contact point is provided in this field, this field will be defaulted to the teams support email provided in the teams setting.
**Note:** An employee's email address can only be provided on a temporary basis and if one is provided, **you must obtain explicit consent for this purpose**. | False | True | ['EmailStr', 'null'] | +| Contact Point | Organisations are expected to provide a dedicated email address associated with the data access request process. If no contact point is provided in this field, this field will be defaulted to the teams support email provided in the teams setting.
**Note:** An employee's email address can only be provided on a temporary basis and if one is provided, **you must obtain explicit consent for this purpose**. | False | False | ['EmailStr', 'null'] | Examples: @@ -204,7 +204,7 @@ What type of dataset is this? | title | guidance | is_list | required | type | |:-------------|:-----------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------| -| Dataset type | | False | True | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Dataset type | | False | False | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -215,7 +215,7 @@ Longer description of the dataset in detail | title | guidance | is_list | required | type | |:------------|:-----------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------| -| Description | | False | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Description | | False | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -237,7 +237,7 @@ Name of the organisation | title | guidance | is_list | required | type | |:------------------|:-----------|:----------|:-----------|:---------------------| -| Organisation Name | | False | True | ['Name[{}]', 'null'] | +| Organisation Name | | False | False | ['Name[{}]', 'null'] | @@ -444,7 +444,7 @@ The start of the time period that the dataset provides coverage for. If there ar | title | guidance | is_list | required | type | |:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------| -| Start Date | - The start of the time period that the dataset provides coverage for.
- If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. | False | True | ['date', 'datetime', 'null'] | +| Start Date | - The start of the time period that the dataset provides coverage for.
- If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. | False | False | ['date', 'datetime', 'null'] | @@ -517,7 +517,7 @@ Please provide an indication of consent permissions for datasets and/or material | title | guidance | is_list | required | type | |:--------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Data Use Limitation | Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used.
- **General research use**: This data use limitation indicates that use is allowed for general research use for any research purpose.
- **Genetic studies only**: This data use limitation indicates that use is limited to genetic studies only (i.e., no phenotype-only research).
- **No general methods research**: This data use limitation indicates that use includes methods development research(e.g., development of software or algorithms) only within the bounds of other use limitations.
- **No restriction**: This data use limitation indicates there is no restriction on use.
- **Research-specific restrictions**: This data use limitation indicates that use is limited to studies of a certain research type.
- **Research use only**: This data use limitation indicates that use is limited to research purposes (e.g., does not include its use in clinical care).
- **No linkage**: This data use limitation indicates there is a restriction on linking to any other datasets | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Data Use Limitation | Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used.
- **General research use**: This data use limitation indicates that use is allowed for general research use for any research purpose.
- **Genetic studies only**: This data use limitation indicates that use is limited to genetic studies only (i.e., no phenotype-only research).
- **No general methods research**: This data use limitation indicates that use includes methods development research(e.g., development of software or algorithms) only within the bounds of other use limitations.
- **No restriction**: This data use limitation indicates there is no restriction on use.
- **Research-specific restrictions**: This data use limitation indicates that use is limited to studies of a certain research type.
- **Research use only**: This data use limitation indicates that use is limited to research purposes (e.g., does not include its use in clinical care).
- **No linkage**: This data use limitation indicates there is a restriction on linking to any other datasets | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -528,7 +528,7 @@ Please indicate fit here are any additional conditions set for use if any, multi | title | guidance | is_list | required | type | |:----------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Data Use Requirements | - Please indicate if there are any additional conditions set for use if any, multiple requirements may be provided.
- Please ensure that these restrictions are documented in access rights information.
- **Collaboration required**: This requirement indicates that the requestor must either agree to join a research consortium or collaborate with the primary study investigator(s).
- **Ethics approval required**: This requirement indicates that the requestor must provide documentation of local institutional review board (IRB)/ ethics review board (ERB) approval.
- **Geographical restrictions**: This requirement indicates that use is limited to within a specific geographic region.
- **Institution-specific restrictions**: This requirement indicates that use is limited to use within an approved institution.
- **Not for profit use**: This requirement indicates that use of the data is limited to not-for-profit organizations and not-for-profit use, non-commercial use.
- **Project-specific restrictions**: This requirement indicates that use is limited to use within an approved project.
- **Publication moratorium**: This requirement indicates that requestor agrees not to publish results of studies until a specific date.
- **Publication required**: This requirement indicates that requestor agrees to make results of studies using the data available to the larger scientific community.
- **Return to database or resource**: This requirement indicates that the requestor must return derived/enriched data to the database/resource.
- **Time limit on use**: This requirement indicates that use is approved for a specific number of months.
- **User-specific restriction**: This requirement indicates that use is limited to use by approved users. | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Data Use Requirements | - Please indicate if there are any additional conditions set for use if any, multiple requirements may be provided.
- Please ensure that these restrictions are documented in access rights information.
- **Collaboration required**: This requirement indicates that the requestor must either agree to join a research consortium or collaborate with the primary study investigator(s).
- **Ethics approval required**: This requirement indicates that the requestor must provide documentation of local institutional review board (IRB)/ ethics review board (ERB) approval.
- **Geographical restrictions**: This requirement indicates that use is limited to within a specific geographic region.
- **Institution-specific restrictions**: This requirement indicates that use is limited to use within an approved institution.
- **Not for profit use**: This requirement indicates that use of the data is limited to not-for-profit organizations and not-for-profit use, non-commercial use.
- **Project-specific restrictions**: This requirement indicates that use is limited to use within an approved project.
- **Publication moratorium**: This requirement indicates that requestor agrees not to publish results of studies until a specific date.
- **Publication required**: This requirement indicates that requestor agrees to make results of studies using the data available to the larger scientific community.
- **Return to database or resource**: This requirement indicates that the requestor must return derived/enriched data to the database/resource.
- **Time limit on use**: This requirement indicates that use is approved for a specific number of months.
- **User-specific restriction**: This requirement indicates that use is limited to use by approved users. | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -550,7 +550,7 @@ Name of the organisation | title | guidance | is_list | required | type | |:------------------|:-----------|:----------|:-----------|:---------------------| -| Organisation Name | | False | True | ['Name[{}]', 'null'] | +| Organisation Name | | False | False | ['Name[{}]', 'null'] | @@ -592,7 +592,7 @@ Please provide details for the data access rights | title | guidance | is_list | required | type | |:--------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Access Rights | - The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both separated by a comma.
- If such a resource or the underlying process doesn’t exist, please provide “In Progress”, until both the process and the documentation are ready. | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Access Rights | - The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both separated by a comma.
- If such a resource or the underlying process doesn’t exist, please provide “In Progress”, until both the process and the documentation are ready. | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -638,7 +638,7 @@ Please use country code from ISO 3166-1 country codes and the associated ISO 316 | title | guidance | is_list | required | type | |:-------------|:----------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Jurisdiction | A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/ | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Jurisdiction | A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/ | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -647,9 +647,9 @@ Please use country code from ISO 3166-1 country codes and the associated ISO 316 Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed. -| title | guidance | is_list | required | type | -|:----------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------| -| Data Controller | - Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.
- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.
- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.
- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.
- Example: NHS England' | False | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| title | guidance | is_list | required | type | +|:----------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | - Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.
- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.
- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.
- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.
- Example: NHS England | False | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -683,17 +683,6 @@ Examples: * TRE/SDE -#### accessMode - -Indication of the application type to enable research access. - -| title | guidance | is_list | required | type | -|:------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------| -| Access mode | Indicate whether a Researcher will need to join a consortium to enable access to the dataset, or if a Researcher will need to submit an application which describes a specific project and required dataset fields required. | False | False | ["AccessMode['Join research consortium','New project']", 'null'] | - - - - ### formatAndStandards Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset. @@ -709,7 +698,7 @@ Code value of the ontology vocabulary encoding | title | guidance | is_list | required | type | |:----------------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Controlled Vocabulary | | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Controlled Vocabulary | | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -722,7 +711,7 @@ What the vocabulary conforms to. | title | guidance | is_list | required | type | |:------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Conforms To | - List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.
- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition.
- **HL7 FHIR**: .
- **HL7 V2**: .
- **HL7 CDA**: .
- **HL7 CCOW**: .
- **DICOM**: .
- **I2B2**: .
- **IHE**: .
- **OMOP**: .
- **openEHR**: .
- **Sentinel**: .
- **PCORnet**: .
- **CDISC**: .
- **Local**: In-house developed data model.
- **Other**: Other standardised data model.
- **NHS Data Dictionary**: .
- **NHS Scotland Data Dictionary**: .
- **NHS Wales Data Dictionary**: . | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Conforms To | - List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.
- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition.
- **HL7 FHIR**: .
- **HL7 V2**: .
- **HL7 CDA**: .
- **HL7 CCOW**: .
- **DICOM**: .
- **I2B2**: .
- **IHE**: .
- **OMOP**: .
- **openEHR**: .
- **Sentinel**: .
- **PCORnet**: .
- **CDISC**: .
- **Local**: In-house developed data model.
- **Other**: Other standardised data model.
- **NHS Data Dictionary**: .
- **NHS Scotland Data Dictionary**: .
- **NHS Wales Data Dictionary**: . | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -735,7 +724,7 @@ Language code(s) of the language of the dataset metadata and underlying data is | title | guidance | is_list | required | type | |:-----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Language Code(s) | | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Language Code(s) | | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -748,7 +737,7 @@ Format(s) the dataset can be made available in | title | guidance | is_list | required | type | |:---------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Dataset Format | | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Dataset Format | | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -937,13 +926,13 @@ Multiple observations about the dataset may be provided and users are expected t -### measuredNode +### observedNode Please select one of the following statistical populations for you observation -| title | guidance | is_list | required | type | -|:-----------------------|:-----------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------| -| Statistical Population | | False | True | ["StatisticalPopulationConstrainedV2['Persons','Events','Findings','Number of scans per modality']"] | +| title | guidance | is_list | required | type | +|:-----------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------| +| Statistical Population | - **Persons**: Unique persons recorded in the dataset
- **Events**: Unique events such as procedures and prescriptions within the dataset
-**Findings**: Unique findings included in the dataset such as diagnoses'
-**Number of scans per modality**: Unique scans for a specified imaging method modality (e.g. 12 x-rays) | False | True | ["StatisticalPopulationConstrainedV2['Persons','Events','Findings','Number of scans per modality']"] | Examples: @@ -1009,7 +998,7 @@ The name of a table in a dataset. | title | guidance | is_list | required | type | |:-----------|:-----------|:----------|:-----------|:----------------| -| Table Name | | False | True | ['str', 'null'] | +| Table Name | | False | False | ['str', 'null'] | @@ -1400,9 +1389,9 @@ Array of age bins and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| | | False | True | ["AgeBin['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']"] | +| title | guidance | is_list | required | type | +|:--------|:-----------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Age bin | | False | True | ["AgeEnum['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']"] | @@ -1411,9 +1400,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------| -| | | False | True | ['int'] | +| title | guidance | is_list | required | type | +|:----------|:-----------|:----------|:-----------|:--------| +| Age count | | False | True | ['int'] | @@ -1431,9 +1420,9 @@ Array of ethnicity bins and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| | | False | True | ["EthnicityBin['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']"] | +| title | guidance | is_list | required | type | +|:--------------|:-----------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Ethnicity bin | | False | True | ["EthnicityEnum['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']"] | @@ -1442,9 +1431,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------| -| | | False | True | ['int'] | +| title | guidance | is_list | required | type | +|:----------------|:-----------|:----------|:-----------|:--------| +| Ethnicity count | | False | True | ['int'] | @@ -1462,9 +1451,9 @@ Gender assigned at birth, male or female, and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:-------------------------------| -| | | False | True | ["GenderBin['male','female']"] | +| title | guidance | is_list | required | type | +|:-----------------------------|:-----------|:----------|:-----------|:--------------------------------| +| Gender assigned at birth bin | | False | True | ["GenderEnum['male','female']"] | @@ -1473,9 +1462,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------| -| | | False | True | ['int'] | +| title | guidance | is_list | required | type | +|:-------------------------------|:-----------|:----------|:-----------|:--------| +| Gender assigned at birth count | | False | True | ['int'] | @@ -1493,9 +1482,9 @@ Array of diseases and their corresponding counts. None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:---------------| -| | | False | True | ['str', 'int'] | +| title | guidance | is_list | required | type | +|:-------------|:-----------|:----------|:-----------|:---------------| +| Disease code | | False | True | ['str', 'int'] | @@ -1504,9 +1493,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:------------------------------------------------------| -| | | False | True | ["DiseaseCodeVocabulary['ICD10','SNOMED CT','MeSH']"] | +| title | guidance | is_list | required | type | +|:------------------------|:-----------|:----------|:-----------|:------------------------------------------------| +| Disease code vocabulary | | False | True | ["DiseaseCodeEnum['ICD10','SNOMED CT','MeSH']"] | @@ -1515,9 +1504,9 @@ None None -| title | guidance | is_list | required | type | -|:--------|:-----------|:----------|:-----------|:--------| -| | | False | True | ['int'] | +| title | guidance | is_list | required | type | +|:--------------|:-----------|:----------|:-----------|:--------| +| Disease count | | False | True | ['int'] | @@ -1537,7 +1526,7 @@ The specific 'omics assay that generated the dataset. | title | guidance | is_list | required | type | |:------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Omics assay | The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry. | False | True | ["Assay['NMR spectroscopy','mass-spectrometry','whole genome sequencing','exome sequencing','genotyping by array','transcriptome profiling by high-throughput sequencing','transcriptome profiling by array','amplicon sequencing','methylation binding domain sequencing','methylation profiling by high-throughput sequencing','genomic variant calling','chromatin accessibility profiling by high-throughput sequencing','histone modification profiling by high-throughput sequencing','chromatin immunoprecipitation sequencing','whole genome shotgun sequencing','whole transcriptome sequencing','targeted mutation analysis']", 'null'] | +| Omics assay | The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry. | False | False | ["Assay['NMR spectroscopy','mass-spectrometry','whole genome sequencing','exome sequencing','genotyping by array','transcriptome profiling by high-throughput sequencing','transcriptome profiling by array','amplicon sequencing','methylation binding domain sequencing','methylation profiling by high-throughput sequencing','genomic variant calling','chromatin accessibility profiling by high-throughput sequencing','histone modification profiling by high-throughput sequencing','chromatin immunoprecipitation sequencing','whole genome shotgun sequencing','whole transcriptome sequencing','targeted mutation analysis']", 'null'] | @@ -1548,7 +1537,7 @@ The specific technology or infrastructure used to perform the assay. If the omic | title | guidance | is_list | required | type | |:---------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------------| -| Omics Platform | The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf. | False | True | ["Platform['Other','NMR Nightingale','Metabolon','Biocrates','Illumina','Oxford Nanopore','454','Hi-C','HiFi']", 'null'] | +| Omics Platform | The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf. | False | False | ["Platform['Other','NMR Nightingale','Metabolon','Biocrates','Illumina','Oxford Nanopore','454','Hi-C','HiFi']", 'null'] | diff --git a/docs/GWDM/2.0.structure.json b/docs/GWDM/2.0.structure.json index e558dd8..14c2b76 100644 --- a/docs/GWDM/2.0.structure.json +++ b/docs/GWDM/2.0.structure.json @@ -158,7 +158,7 @@ }, { "name": "shortTitle", - "required": true, + "required": false, "title": "Short Title", "description": "A shorter descriptive title of the dataset", "guidance": "", @@ -175,7 +175,7 @@ }, { "name": "doiName", - "required": true, + "required": false, "title": "DOI Name", "description": "DOI associated to this dataset", "guidance": "Please note: This is not the DOI of the publication(s) associated with the dataset.- All HDR UK registered datasets should either have a (DOI) or be working towards obtaining one.- If a DOI is available, please provide the DOI.- What happens if I do not have a DOI?: Contact your academic organisation to find out if there is an existing relationship with a DOI provider. If that is not available, sites such as figshare offer free services to mint a DOI for your dataset. Subsequent versions of the Metadata Exchange will provide a DOI minting service.", @@ -208,7 +208,7 @@ }, { "name": "keywords", - "required": true, + "required": false, "title": "Keywords", "description": "Comma separated key words associated to this dataset.", "guidance": "Please provide relevant and specific keywords that can improve the search engine optimization of your dataset.- Please enter one keyword at a time and click Add New Field to add further keywords.- Text from the title is automatically included in the search, there is no need to include this in the keywords.- Include words that researcher may include in their searches.", @@ -225,7 +225,7 @@ }, { "name": "controlledKeywords", - "required": true, + "required": false, "title": "Controlled Keywords", "description": "Keywords that have been filtered and limited", "guidance": "", @@ -240,7 +240,7 @@ }, { "name": "contactPoint", - "required": true, + "required": false, "title": "Contact Point", "description": "email of a person who can be the main contact point of this dataset", "guidance": "Organisations are expected to provide a dedicated email address associated with the data access request process. If no contact point is provided in this field, this field will be defaulted to the teams support email provided in the teams setting.Note: An employee's email address can only be provided on a temporary basis and if one is provided, you must obtain explicit consent for this purpose.", @@ -256,7 +256,7 @@ }, { "name": "datasetType", - "required": true, + "required": false, "title": "Dataset type", "description": "What type of dataset is this?", "guidance": "", @@ -271,7 +271,7 @@ }, { "name": "description", - "required": true, + "required": false, "title": "Description", "description": "Longer description of the dataset in detail", "guidance": "", @@ -288,7 +288,7 @@ }, { "name": "publisher", - "required": true, + "required": false, "title": "Publisher", "description": "Link to details about the publisher of this dataset", "guidance": "", @@ -302,7 +302,7 @@ "subItems": [ { "name": "name", - "required": true, + "required": false, "title": "Organisation Name", "description": "Name of the organisation", "guidance": "", @@ -590,7 +590,7 @@ "subItems": [ { "name": "startDate", - "required": true, + "required": false, "title": "Start Date", "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", "guidance": "The start of the time period that the dataset provides coverage for.- If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", @@ -665,7 +665,7 @@ }, { "name": "accessibility", - "required": false, + "required": true, "title": "Accessibility", "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", "guidance": "", @@ -692,7 +692,7 @@ "subItems": [ { "name": "dataUseLimitation", - "required": true, + "required": false, "title": "Data Use Limitation", "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", "guidance": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used.- General research use: This data use limitation indicates that use is allowed for general research use for any research purpose.- Genetic studies only: This data use limitation indicates that use is limited to genetic studies only (i.e., no phenotype-only research).- No general methods research: This data use limitation indicates that use includes methods development research(e.g., development of software or algorithms) only within the bounds of other use limitations.- No restriction: This data use limitation indicates there is no restriction on use.- Research-specific restrictions: This data use limitation indicates that use is limited to studies of a certain research type.- Research use only: This data use limitation indicates that use is limited to research purposes (e.g., does not include its use in clinical care).- No linkage: This data use limitation indicates there is a restriction on linking to any other datasets", @@ -707,7 +707,7 @@ }, { "name": "dataUseRequirement", - "required": true, + "required": false, "title": "Data Use Requirements", "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", "guidance": "Please indicate if there are any additional conditions set for use if any, multiple requirements may be provided.- Please ensure that these restrictions are documented in access rights information.- Collaboration required: This requirement indicates that the requestor must either agree to join a research consortium or collaborate with the primary study investigator(s).- Ethics approval required: This requirement indicates that the requestor must provide documentation of local institutional review board (IRB)/ ethics review board (ERB) approval.- Geographical restrictions: This requirement indicates that use is limited to within a specific geographic region.- Institution-specific restrictions: This requirement indicates that use is limited to use within an approved institution.- Not for profit use: This requirement indicates that use of the data is limited to not-for-profit organizations and not-for-profit use, non-commercial use.- Project-specific restrictions: This requirement indicates that use is limited to use within an approved project.- Publication moratorium: This requirement indicates that requestor agrees not to publish results of studies until a specific date.- Publication required: This requirement indicates that requestor agrees to make results of studies using the data available to the larger scientific community.- Return to database or resource: This requirement indicates that the requestor must return derived/enriched data to the database/resource.- Time limit on use: This requirement indicates that use is approved for a specific number of months.- User-specific restriction: This requirement indicates that use is limited to use by approved users.", @@ -738,7 +738,7 @@ "subItems": [ { "name": "name", - "required": true, + "required": false, "title": "Organisation Name", "description": "Name of the organisation", "guidance": "", @@ -798,7 +798,7 @@ "subItems": [ { "name": "accessRights", - "required": true, + "required": false, "title": "Access Rights", "description": "Please provide details for the data access rights", "guidance": "The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both separated by a comma.- If such a resource or the underlying process doesn\u2019t exist, please provide \u201cIn Progress\u201d, until both the process and the documentation are ready.", @@ -859,7 +859,7 @@ }, { "name": "jurisdiction", - "required": true, + "required": false, "title": "Jurisdiction", "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", "guidance": "A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/", @@ -874,10 +874,10 @@ }, { "name": "dataController", - "required": true, + "required": false, "title": "Data Controller", "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", - "guidance": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.- Example: NHS England'", + "guidance": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.- Example: NHS England", "examples": [ "NHS England" ], @@ -923,20 +923,6 @@ "is_list": false, "is_optional": true, "subItems": [] - }, - { - "name": "accessMode", - "required": false, - "title": "Access mode", - "description": "Indication of the application type to enable research access.", - "guidance": "Indicate whether a Researcher will need to join a consortium to enable access to the dataset, or if a Researcher will need to submit an application which describes a specific project and required dataset fields required.", - "examples": null, - "type": [ - "AccessMode['Join research consortium','New project']", - "null" - ], - "is_list": false, - "is_optional": true } ] }, @@ -956,7 +942,7 @@ "subItems": [ { "name": "vocabularyEncodingSchemes", - "required": true, + "required": false, "title": "Controlled Vocabulary", "description": "Code value of the ontology vocabulary encoding", "guidance": "", @@ -973,7 +959,7 @@ }, { "name": "conformsTo", - "required": true, + "required": false, "title": "Conforms To", "description": "What the vocabulary conforms to.", "guidance": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.- HL7 FHIR: https://www.hl7.org/fhir/.- HL7 V2: https://www.hl7.org/implement/standards/product_section.cfm?section=13.- HL7 CDA: https://www.hl7.org/implement/standards/product_section.cfm?section=10.- HL7 CCOW: https://www.hl7.org/implement/standards/product_section.cfm?section=16.- DICOM: https://www.dicomstandard.org/.- I2B2: https://www.i2b2.org/.- IHE: https://www.ihe.net/resources/profiles/.- OMOP: https://www.ohdsi.org/data-standardization/the-common-data-model/.- openEHR: https://www.openehr.org/.- Sentinel: https://www.sentinelinitiative.org/sentinel/data/distributed-database-common-data-model.- PCORnet: https://pcornet.org/data-driven-common-model/.- CDISC: https://www.cdisc.org/standards/data-exchange/odm.- Local: In-house developed data model.- Other: Other standardised data model.- NHS Data Dictionary: https://www.datadictionary.nhs.uk/.- NHS Scotland Data Dictionary: https://www.ndc.scot.nhs.uk/Data-Dictionary/.- NHS Wales Data Dictionary: https://www.datadictionary.wales.nhs.uk/.", @@ -990,7 +976,7 @@ }, { "name": "languages", - "required": true, + "required": false, "title": "Language Code(s)", "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", "guidance": "", @@ -1007,7 +993,7 @@ }, { "name": "formats", - "required": true, + "required": false, "title": "Dataset Format", "description": "Format(s) the dataset can be made available in", "guidance": "", @@ -1277,11 +1263,11 @@ "is_optional": true, "subItems": [ { - "name": "measuredNode", + "name": "observedNode", "required": true, "title": "Statistical Population", "description": "Please select one of the following statistical populations for you observation", - "guidance": "", + "guidance": "Persons: Unique persons recorded in the dataset- Events: Unique events such as procedures and prescriptions within the dataset-Findings: Unique findings included in the dataset such as diagnoses'-Number of scans per modality: Unique scans for a specified imaging method modality (e.g. 12 x-rays)", "examples": [ "PERSONS" ], @@ -1364,7 +1350,7 @@ "subItems": [ { "name": "name", - "required": true, + "required": false, "title": "Table Name", "description": "The name of a table in a dataset.", "guidance": "", @@ -1892,7 +1878,7 @@ "subItems": [ { "name": "age", - "required": true, + "required": false, "title": "Age", "description": "Array of age bins and their corresponding counts.", "guidance": "", @@ -1906,12 +1892,12 @@ { "name": "bin", "required": true, - "title": null, + "title": "Age bin", "description": null, "guidance": "", "examples": null, "type": [ - "AgeBin['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']" + "AgeEnum['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-14 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','65-99 years','100+ years']" ], "is_list": false, "is_optional": false @@ -1919,7 +1905,7 @@ { "name": "count", "required": true, - "title": null, + "title": "Age count", "description": null, "guidance": "", "examples": null, @@ -1933,7 +1919,7 @@ }, { "name": "ethnicity", - "required": true, + "required": false, "title": "Ethnicity", "description": "Array of ethnicity bins and their corresponding counts.", "guidance": "", @@ -1947,12 +1933,12 @@ { "name": "bin", "required": true, - "title": null, + "title": "Ethnicity bin", "description": null, "guidance": "", "examples": null, "type": [ - "EthnicityBin['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']" + "EthnicityEnum['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']" ], "is_list": false, "is_optional": false @@ -1960,7 +1946,7 @@ { "name": "count", "required": true, - "title": null, + "title": "Ethnicity count", "description": null, "guidance": "", "examples": null, @@ -1974,7 +1960,7 @@ }, { "name": "genderAssignedAtBirth", - "required": true, + "required": false, "title": "Gender assigned at birth", "description": "Gender assigned at birth, male or female, and their corresponding counts.", "guidance": "", @@ -1988,12 +1974,12 @@ { "name": "bin", "required": true, - "title": null, + "title": "Gender assigned at birth bin", "description": null, "guidance": "", "examples": null, "type": [ - "GenderBin['male','female']" + "GenderEnum['male','female']" ], "is_list": false, "is_optional": false @@ -2001,7 +1987,7 @@ { "name": "count", "required": true, - "title": null, + "title": "Gender assigned at birth count", "description": null, "guidance": "", "examples": null, @@ -2015,7 +2001,7 @@ }, { "name": "disease", - "required": true, + "required": false, "title": "Disease", "description": "Array of diseases and their corresponding counts.", "guidance": "", @@ -2029,7 +2015,7 @@ { "name": "diseaseCode", "required": true, - "title": null, + "title": "Disease code", "description": null, "guidance": "", "examples": null, @@ -2043,12 +2029,12 @@ { "name": "diseaseCodeVocabulary", "required": true, - "title": null, + "title": "Disease code vocabulary", "description": null, "guidance": "", "examples": null, "type": [ - "DiseaseCodeVocabulary['ICD10','SNOMED CT','MeSH']" + "DiseaseCodeEnum['ICD10','SNOMED CT','MeSH']" ], "is_list": false, "is_optional": false @@ -2056,7 +2042,7 @@ { "name": "count", "required": true, - "title": null, + "title": "Disease count", "description": null, "guidance": "", "examples": null, @@ -2086,7 +2072,7 @@ "subItems": [ { "name": "assay", - "required": true, + "required": false, "title": "Omics assay", "description": "The specific 'omics assay that generated the dataset.", "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", @@ -2100,7 +2086,7 @@ }, { "name": "platform", - "required": true, + "required": false, "title": "Omics Platform", "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 5f2ee42..087cac4 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -55,7 +55,7 @@ "location": "revisions.version" }, { - "required": true, + "required": false, "title": "revision url", "description": "Some url with a reference to the record of a previous version of this dataset", "guidance": "", @@ -132,7 +132,7 @@ "location": "summary.abstract" }, { - "required": false, + "required": true, "title": "identifier", "description": "Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation.", "guidance": "**Example**: https://ror.org/053fq8t95\\nIf your organisation does not have a ROR identifier please use the \u201csuggest and institute\u201d function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform", @@ -236,7 +236,7 @@ "location": "summary.populationSize" }, { - "required": true, + "required": false, "title": "Keywords", "description": "Please provide a list of relevant and specific keywords that can improve the search engine optimisation (SEO) of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", "guidance": "- Please provide **relevant** and **specific keywords** that can **improve the search engine optimization** of your dataset.\\n- Please **enter one keyword at a time** and click **Add New Field** to add further keywords.\\n- Text from the title is automatically included in the search, there is no need to include this in the keywords.\\n- Include words that researcher may include in their searches.", @@ -308,7 +308,7 @@ "location": "summary.alternateIdentifiers" }, { - "required": false, + "required": true, "title": "Description", "description": "A free-text description of the dataset.\\nA URL can also be provided as the description of the dataset.\\nGateway Feature: Keywords and text may be extracted out of the description and indexed for search.", "guidance": "- An HTML account of the data that **provides context and scope** of the data, **limited to 10000 characters, and/or a resolvable URL** that describes the dataset.\\n- Additional information can be recorded and included using the Associated media field.", @@ -358,7 +358,7 @@ "location": "documentation.inPipeline" }, { - "required": false, + "required": true, "title": "Geographic coverage", "description": "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:\\n- For locations in the UK: [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about)\\n- For locations in other countries: [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes)", "guidance": "- The geographical area covered by the dataset.\\n- Please provide a valid location.\\n- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about).\\n- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes).", @@ -568,7 +568,7 @@ "location": "provenance.origin.datasetType" }, { - "required": true, + "required": false, "title": "Dataset sub-type", "description": "The sub-type of the dataset content. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType ('proteomics', 'transcriptomics', 'epigenomics', 'metabolomics', 'metagenomics', 'genomics', 'lipidomics') is selected", "guidance": "Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.\\n- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.\\n- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.\\n- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.\\n- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.\\n- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.\\n- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.\\n- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.\\n- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.\\n- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.\\n- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.\\n- *Information and communication**: Includes any data related to the study or application of information and communication.\\n- *Politics**: Includes any data related to political views, activities, voting, etc.", @@ -974,7 +974,7 @@ "location": "accessibility.access.deliveryLeadTime" }, { - "required": true, + "required": false, "title": "Jurisdiction", "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", "guidance": "A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/", @@ -989,7 +989,7 @@ "location": "accessibility.access.jurisdiction" }, { - "required": true, + "required": false, "title": "Data Controller", "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", "guidance": "- Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.\\n- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.\\n- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.\\n- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.\\n- Example: NHS England", @@ -1333,7 +1333,7 @@ "location": "accessibility.formatAndStandards.format" }, { - "required": true, + "required": false, "title": "Persistent identifier of a dataset", "description": null, "guidance": "", @@ -1349,7 +1349,7 @@ "location": "enrichmentAndLinkage.derivedFrom.pid" }, { - "required": true, + "required": false, "title": "Title of a dataset", "description": null, "guidance": "", @@ -1365,7 +1365,7 @@ "location": "enrichmentAndLinkage.derivedFrom.title" }, { - "required": true, + "required": false, "title": "Url of a dataset", "description": null, "guidance": "", @@ -1381,7 +1381,7 @@ "location": "enrichmentAndLinkage.derivedFrom.url" }, { - "required": true, + "required": false, "title": "Persistent identifier of a dataset", "description": null, "guidance": "", @@ -1397,7 +1397,7 @@ "location": "enrichmentAndLinkage.isPartOf.pid" }, { - "required": true, + "required": false, "title": "Title of a dataset", "description": null, "guidance": "", @@ -1413,7 +1413,7 @@ "location": "enrichmentAndLinkage.isPartOf.title" }, { - "required": true, + "required": false, "title": "Url of a dataset", "description": null, "guidance": "", @@ -1429,7 +1429,7 @@ "location": "enrichmentAndLinkage.isPartOf.url" }, { - "required": true, + "required": false, "title": "Persistent identifier of a dataset", "description": null, "guidance": "", @@ -1445,7 +1445,7 @@ "location": "enrichmentAndLinkage.linkableDatasets.pid" }, { - "required": true, + "required": false, "title": "Title of a dataset", "description": null, "guidance": "", @@ -1461,7 +1461,7 @@ "location": "enrichmentAndLinkage.linkableDatasets.title" }, { - "required": true, + "required": false, "title": "Url of a dataset", "description": null, "guidance": "", @@ -1477,7 +1477,7 @@ "location": "enrichmentAndLinkage.linkableDatasets.url" }, { - "required": true, + "required": false, "title": "Persistent identifier of a dataset", "description": null, "guidance": "", @@ -1493,7 +1493,7 @@ "location": "enrichmentAndLinkage.similarToDatasets.pid" }, { - "required": true, + "required": false, "title": "Title of a dataset", "description": null, "guidance": "", @@ -1509,7 +1509,7 @@ "location": "enrichmentAndLinkage.similarToDatasets.title" }, { - "required": true, + "required": false, "title": "Url of a dataset", "description": null, "guidance": "", @@ -1825,7 +1825,7 @@ "location": "demographicFrequency.disease.count" }, { - "required": true, + "required": false, "title": "Omics assay", "description": "The specific 'omics assay that generated the dataset.", "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", @@ -1857,7 +1857,7 @@ "location": "omics.assay" }, { - "required": true, + "required": false, "title": "Omics Platform", "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index 4212032..19378b8 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -54,7 +54,7 @@ Some url with a reference to the record of a previous version of this dataset | title | guidance | is_list | required | type | |:-------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| -| revision url | | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| revision url | | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -134,7 +134,7 @@ Please provide a Research Organization Registry (ROR) identifier (see https://ro | title | guidance | is_list | required | type | |:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------| -| identifier | **Example**: https://ror.org/053fq8t95
If your organisation does not have a ROR identifier please use the “suggest and institute” function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform | False | False | ['str', 'int'] | +| identifier | **Example**: https://ror.org/053fq8t95
If your organisation does not have a ROR identifier please use the “suggest and institute” function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform | False | True | ['str', 'int'] | @@ -211,7 +211,7 @@ Please provide a list of relevant and specific keywords that can improve the sea | title | guidance | is_list | required | type | |:---------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------| -| Keywords | - Please provide **relevant** and **specific keywords** that can **improve the search engine optimization** of your dataset.
- Please **enter one keyword at a time** and click **Add New Field** to add further keywords.
- Text from the title is automatically included in the search, there is no need to include this in the keywords.
- Include words that researcher may include in their searches. | True | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | +| Keywords | - Please provide **relevant** and **specific keywords** that can **improve the search engine optimization** of your dataset.
- Please **enter one keyword at a time** and click **Add New Field** to add further keywords.
- Text from the title is automatically included in the search, there is no need to include this in the keywords.
- Include words that researcher may include in their searches. | True | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | Examples: @@ -275,7 +275,7 @@ A free-text description of the dataset.
A URL can also be provided as the des | title | guidance | is_list | required | type | |:------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------| -| Description | - An HTML account of the data that **provides context and scope** of the data, **limited to 10000 characters, and/or a resolvable URL** that describes the dataset.
- Additional information can be recorded and included using the Associated media field. | False | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]"] | +| Description | - An HTML account of the data that **provides context and scope** of the data, **limited to 10000 characters, and/or a resolvable URL** that describes the dataset.
- Additional information can be recorded and included using the Associated media field. | False | True | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]"] | @@ -319,7 +319,7 @@ The geographical area covered by the dataset. It is recommended that links are t | title | guidance | is_list | required | type | |:--------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Geographic coverage | - The geographical area covered by the dataset.
- Please provide a valid location.
- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about).
- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes). | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List'] | +| Geographic coverage | - The geographical area covered by the dataset.
- Please provide a valid location.
- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about).
- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes). | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List'] | Examples: @@ -455,7 +455,7 @@ The sub-type of the dataset content. Multiomics is selected on behalf of the sub | title | guidance | is_list | required | type | |:-----------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Dataset sub-type | Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.
- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.
- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- *Information and communication**: Includes any data related to the study or application of information and communication.
- *Politics**: Includes any data related to political views, activities, voting, etc. | True | True | ["DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']"] | +| Dataset sub-type | Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.
- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.
- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- *Information and communication**: Includes any data related to the study or application of information and communication.
- *Politics**: Includes any data related to political views, activities, voting, etc. | True | False | ["DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']"] | @@ -696,7 +696,7 @@ Data Controller means a person/entity who (either alone or jointly or in common | title | guidance | is_list | required | type | |:----------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------| -| Data Controller | - Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.
- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.
- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.
- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.
- Example: NHS England | False | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Data Controller | - Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.
- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.
- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.
- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.
- Example: NHS England | False | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -801,7 +801,7 @@ None | title | guidance | is_list | required | type | |:-----------------------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| Persistent identifier of a dataset | | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | +| Persistent identifier of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | @@ -812,7 +812,7 @@ None | title | guidance | is_list | required | type | |:-------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| Title of a dataset | | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | +| Title of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | @@ -823,7 +823,7 @@ None | title | guidance | is_list | required | type | |:-----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| -| Url of a dataset | | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Url of a dataset | | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -845,7 +845,7 @@ None | title | guidance | is_list | required | type | |:-----------------------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| Persistent identifier of a dataset | | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | +| Persistent identifier of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | @@ -856,7 +856,7 @@ None | title | guidance | is_list | required | type | |:-------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| Title of a dataset | | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | +| Title of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | @@ -867,7 +867,7 @@ None | title | guidance | is_list | required | type | |:-----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| -| Url of a dataset | | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Url of a dataset | | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -887,7 +887,7 @@ None | title | guidance | is_list | required | type | |:-----------------------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| Persistent identifier of a dataset | | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | +| Persistent identifier of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | @@ -898,7 +898,7 @@ None | title | guidance | is_list | required | type | |:-------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| Title of a dataset | | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | +| Title of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | @@ -909,7 +909,7 @@ None | title | guidance | is_list | required | type | |:-----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| -| Url of a dataset | | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Url of a dataset | | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -929,7 +929,7 @@ None | title | guidance | is_list | required | type | |:-----------------------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| Persistent identifier of a dataset | | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | +| Persistent identifier of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | @@ -940,7 +940,7 @@ None | title | guidance | is_list | required | type | |:-------------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| Title of a dataset | | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | +| Title of a dataset | | False | False | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", 'null'] | @@ -951,7 +951,7 @@ None | title | guidance | is_list | required | type | |:-----------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| -| Url of a dataset | | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | +| Url of a dataset | | False | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -1094,7 +1094,7 @@ The name of a table in a dataset. | title | guidance | is_list | required | type | |:-----------|:-----------|:----------|:-----------|:----------------| -| Table name | | False | True | ['str', 'null'] | +| Table name | | False | False | ['str', 'null'] | @@ -1375,7 +1375,7 @@ The specific 'omics assay that generated the dataset. | title | guidance | is_list | required | type | |:------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Omics assay | The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry. | False | True | ["Assay['NMR spectroscopy','mass-spectrometry','whole genome sequencing','exome sequencing','genotyping by array','transcriptome profiling by high-throughput sequencing','transcriptome profiling by array','amplicon sequencing','methylation binding domain sequencing','methylation profiling by high-throughput sequencing','genomic variant calling','chromatin accessibility profiling by high-throughput sequencing','histone modification profiling by high-throughput sequencing','chromatin immunoprecipitation sequencing','whole genome shotgun sequencing','whole transcriptome sequencing','targeted mutation analysis']", 'null'] | +| Omics assay | The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry. | False | False | ["Assay['NMR spectroscopy','mass-spectrometry','whole genome sequencing','exome sequencing','genotyping by array','transcriptome profiling by high-throughput sequencing','transcriptome profiling by array','amplicon sequencing','methylation binding domain sequencing','methylation profiling by high-throughput sequencing','genomic variant calling','chromatin accessibility profiling by high-throughput sequencing','histone modification profiling by high-throughput sequencing','chromatin immunoprecipitation sequencing','whole genome shotgun sequencing','whole transcriptome sequencing','targeted mutation analysis']", 'null'] | @@ -1386,7 +1386,7 @@ The specific technology or infrastructure used to perform the assay. If the omic | title | guidance | is_list | required | type | |:---------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------------| -| Omics Platform | The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf. | False | True | ["Platform['Other','NMR Nightingale','Metabolon','Biocrates','Illumina','Oxford Nanopore','454','Hi-C','HiFi']", 'null'] | +| Omics Platform | The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf. | False | False | ["Platform['Other','NMR Nightingale','Metabolon','Biocrates','Illumina','Oxford Nanopore','454','Hi-C','HiFi']", 'null'] | diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index b8adf49..4ea1506 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -65,7 +65,7 @@ }, { "name": "url", - "required": true, + "required": false, "title": "revision url", "description": "Some url with a reference to the record of a previous version of this dataset", "guidance": "", @@ -169,7 +169,7 @@ "subItems": [ { "name": "identifier", - "required": false, + "required": true, "title": "identifier", "description": "Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation.", "guidance": "Example: https://ror.org/053fq8t95If your organisation does not have a ROR identifier please use the \u201csuggest and institute\u201d function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform", @@ -271,7 +271,7 @@ }, { "name": "keywords", - "required": true, + "required": false, "title": "Keywords", "description": "Please provide a list of relevant and specific keywords that can improve the search engine optimisation (SEO) of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", "guidance": "Please provide relevant and specific keywords that can improve the search engine optimization of your dataset.- Please enter one keyword at a time and click Add New Field to add further keywords.- Text from the title is automatically included in the search, there is no need to include this in the keywords.- Include words that researcher may include in their searches.", @@ -357,7 +357,7 @@ "subItems": [ { "name": "description", - "required": false, + "required": true, "title": "Description", "description": "A free-text description of the dataset.A URL can also be provided as the description of the dataset.Gateway Feature: Keywords and text may be extracted out of the description and indexed for search.", "guidance": "An HTML account of the data that provides context and scope of the data, limited to 10000 characters, and/or a resolvable URL that describes the dataset.- Additional information can be recorded and included using the Associated media field.", @@ -419,7 +419,7 @@ "subItems": [ { "name": "spatial", - "required": false, + "required": true, "title": "Geographic coverage", "description": "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:- For locations in the UK: ONS standards- For locations in other countries: ISO 3166-1 & ISO 3166-2", "guidance": "The geographical area covered by the dataset.- Please provide a valid location.- For locations in the UK, this location should conform to ONS standards.- For locations in other countries we use ISO 3166-1 & ISO 3166-2.", @@ -596,7 +596,7 @@ }, { "name": "datasetSubType", - "required": true, + "required": false, "title": "Dataset sub-type", "description": "The sub-type of the dataset content. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType ('proteomics', 'transcriptomics', 'epigenomics', 'metabolomics', 'metagenomics', 'genomics', 'lipidomics') is selected", "guidance": "Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.- Health and disease: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.- Treatments/Interventions: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.- Measurements/Tests- Includes any data related to laboratory or other diagnostics.- Imaging types: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.- *Imaging area of the body: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.- Omics: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.- Socioeconomic: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.- *Lifestyle: Includes any data related to smoking, physical activity, dietary habits or alcohol.- Registry: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.- Environment and energy: Includes any data related to the monitoring or study of environmental or energy factors or events.- *Information and communication: Includes any data related to the study or application of information and communication.- Politics*: Includes any data related to political views, activities, voting, etc.", @@ -651,7 +651,7 @@ }, { "name": "temporal", - "required": false, + "required": true, "title": "Temporal Coverage", "description": "Dates and other temporal coverage information.", "guidance": "", @@ -902,7 +902,7 @@ }, { "name": "jurisdiction", - "required": true, + "required": false, "title": "Jurisdiction", "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", "guidance": "A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/", @@ -918,7 +918,7 @@ }, { "name": "dataController", - "required": true, + "required": false, "title": "Data Controller", "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", "guidance": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.- Notes: For most organisations this will be the same as the Data Custodian of the dataset. If this is not the case, please indicate that there is a different controller.- If there is a different controller please complete the Data Processor attribute to indicate if the Data Custodian is a Processor rather than the Data Controller.- In some cases, there may be multiple Data Controllers i.e. GP data. If this is the case, please indicate the fact in a free-text field and describe the data sharing arrangement or a link to it, so that this can be understood by research users.- Example: NHS England", @@ -1060,7 +1060,7 @@ "subItems": [ { "name": "pid", - "required": true, + "required": false, "title": "Persistent identifier of a dataset", "description": null, "guidance": "", @@ -1075,7 +1075,7 @@ }, { "name": "title", - "required": true, + "required": false, "title": "Title of a dataset", "description": null, "guidance": "", @@ -1090,7 +1090,7 @@ }, { "name": "url", - "required": true, + "required": false, "title": "Url of a dataset", "description": null, "guidance": "", @@ -1122,7 +1122,7 @@ "subItems": [ { "name": "pid", - "required": true, + "required": false, "title": "Persistent identifier of a dataset", "description": null, "guidance": "", @@ -1137,7 +1137,7 @@ }, { "name": "title", - "required": true, + "required": false, "title": "Title of a dataset", "description": null, "guidance": "", @@ -1152,7 +1152,7 @@ }, { "name": "url", - "required": true, + "required": false, "title": "Url of a dataset", "description": null, "guidance": "", @@ -1182,7 +1182,7 @@ "subItems": [ { "name": "pid", - "required": true, + "required": false, "title": "Persistent identifier of a dataset", "description": null, "guidance": "", @@ -1197,7 +1197,7 @@ }, { "name": "title", - "required": true, + "required": false, "title": "Title of a dataset", "description": null, "guidance": "", @@ -1212,7 +1212,7 @@ }, { "name": "url", - "required": true, + "required": false, "title": "Url of a dataset", "description": null, "guidance": "", @@ -1242,7 +1242,7 @@ "subItems": [ { "name": "pid", - "required": true, + "required": false, "title": "Persistent identifier of a dataset", "description": null, "guidance": "", @@ -1257,7 +1257,7 @@ }, { "name": "title", - "required": true, + "required": false, "title": "Title of a dataset", "description": null, "guidance": "", @@ -1272,7 +1272,7 @@ }, { "name": "url", - "required": true, + "required": false, "title": "Url of a dataset", "description": null, "guidance": "", @@ -1464,7 +1464,7 @@ "subItems": [ { "name": "name", - "required": true, + "required": false, "title": "Table name", "description": "The name of a table in a dataset.", "guidance": "", @@ -1650,7 +1650,7 @@ "subItems": [ { "name": "age", - "required": true, + "required": false, "title": "Age", "description": "Array of age bins and their corresponding counts.", "guidance": "", @@ -1691,7 +1691,7 @@ }, { "name": "ethnicity", - "required": true, + "required": false, "title": "Ethnicity", "description": "Array of ethnicity bins and their corresponding counts.", "guidance": "", @@ -1732,7 +1732,7 @@ }, { "name": "genderAssignedAtBirth", - "required": true, + "required": false, "title": "Gender assigned at birth", "description": "Gender assigned at birth, male or female, and their corresponding counts.", "guidance": "", @@ -1773,7 +1773,7 @@ }, { "name": "disease", - "required": true, + "required": false, "title": "Disease", "description": "Array of diseases and their corresponding counts.", "guidance": "", @@ -1844,7 +1844,7 @@ "subItems": [ { "name": "assay", - "required": true, + "required": false, "title": "Omics assay", "description": "The specific 'omics assay that generated the dataset.", "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", @@ -1858,7 +1858,7 @@ }, { "name": "platform", - "required": true, + "required": false, "title": "Omics Platform", "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index a523138..7221a7a 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -25,6 +25,7 @@ "type": "null" } ], + "default": null, "description": "Please provide details for the data access rights", "title": "Access Rights" }, @@ -79,6 +80,7 @@ "type": "null" } ], + "default": null, "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", "title": "Jurisdiction" }, @@ -91,6 +93,7 @@ "type": "null" } ], + "default": null, "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", "title": "Data Controller" }, @@ -124,11 +127,6 @@ "title": "Access/governance requirements" } }, - "required": [ - "accessRights", - "jurisdiction", - "dataController" - ], "title": "Access", "type": "object" }, @@ -424,6 +422,7 @@ "type": "null" } ], + "default": null, "description": "The name of a table in a dataset.", "title": "Table Name" }, @@ -452,7 +451,6 @@ } }, "required": [ - "name", "columns" ], "title": "DataTable", @@ -615,6 +613,7 @@ "type": "null" } ], + "default": null, "description": "Array of age bins and their corresponding counts.", "title": "Age" }, @@ -630,6 +629,7 @@ "type": "null" } ], + "default": null, "description": "Array of ethnicity bins and their corresponding counts.", "title": "Ethnicity" }, @@ -645,6 +645,7 @@ "type": "null" } ], + "default": null, "description": "Gender assigned at birth, male or female, and their corresponding counts.", "title": "Gender assigned at birth" }, @@ -660,16 +661,11 @@ "type": "null" } ], + "default": null, "description": "Array of diseases and their corresponding counts.", "title": "Disease" } }, - "required": [ - "age", - "ethnicity", - "genderAssignedAtBirth", - "disease" - ], "title": "DemographicFrequency", "type": "object" }, @@ -799,6 +795,7 @@ "type": "null" } ], + "default": null, "description": "Code value of the ontology vocabulary encoding", "examples": [ "OPCS4,NHS NATIONAL CODES,ICD10,OTHER" @@ -814,6 +811,7 @@ "type": "null" } ], + "default": null, "description": "What the vocabulary conforms to.", "examples": [ "LOCAL,NHS DATA DICTIONARY" @@ -829,6 +827,7 @@ "type": "null" } ], + "default": null, "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", "examples": [ "en" @@ -844,6 +843,7 @@ "type": "null" } ], + "default": null, "description": "Format(s) the dataset can be made available in", "examples": [ "CSV,JSON,SQL database table" @@ -851,12 +851,6 @@ "title": "Dataset Format" } }, - "required": [ - "vocabularyEncodingSchemes", - "conformsTo", - "languages", - "formats" - ], "title": "FormatAndStandards", "type": "object" }, @@ -1152,6 +1146,7 @@ "type": "null" } ], + "default": null, "description": "The specific 'omics assay that generated the dataset.", "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", "title": "Omics assay" @@ -1165,15 +1160,12 @@ "type": "null" } ], + "default": null, "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", "title": "Omics Platform" } }, - "required": [ - "assay", - "platform" - ], "title": "Omics", "type": "object" }, @@ -1188,6 +1180,7 @@ "type": "null" } ], + "default": null, "description": "Name of the organisation", "title": "Organisation Name" }, @@ -1222,9 +1215,6 @@ "title": "Research Organization Registry Identifier" } }, - "required": [ - "name" - ], "title": "Organisation", "type": "object" }, @@ -1554,6 +1544,7 @@ "type": "null" } ], + "default": null, "description": "A shorter descriptive title of the dataset", "examples": [ "ONS 2011 Census Wales (CENW)" @@ -1569,6 +1560,7 @@ "type": "null" } ], + "default": null, "description": "DOI associated to this dataset", "examples": [ "10.1093/ije/dyx196" @@ -1596,6 +1588,7 @@ "type": "null" } ], + "default": null, "description": "Comma separated key words associated to this dataset.", "examples": [ "Preprints,Papers,HDR UK" @@ -1611,6 +1604,7 @@ "type": "null" } ], + "default": null, "description": "Keywords that have been filtered and limited", "title": "Controlled Keywords" }, @@ -1624,6 +1618,7 @@ "type": "null" } ], + "default": null, "description": "email of a person who can be the main contact point of this dataset", "example": [ "blah.blah@blah.com" @@ -1639,6 +1634,7 @@ "type": "null" } ], + "default": null, "description": "What type of dataset is this?", "title": "Dataset type" }, @@ -1651,6 +1647,7 @@ "type": "null" } ], + "default": null, "description": "Longer description of the dataset in detail", "examples": [ "Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations\\n\\nThis will include:\\n- Papers\\n- COVID-19 Papers\\n- COVID-19 Preprint" @@ -1666,6 +1663,7 @@ "type": "null" } ], + "default": null, "description": "Link to details about the publisher of this dataset", "title": "Publisher" }, @@ -1711,15 +1709,7 @@ }, "required": [ "title", - "shortTitle", - "doiName", - "abstract", - "keywords", - "controlledKeywords", - "contactPoint", - "datasetType", - "description", - "publisher" + "abstract" ], "title": "Summary", "type": "object" @@ -1741,6 +1731,7 @@ "type": "null" } ], + "default": null, "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", "title": "Start Date" }, @@ -1799,7 +1790,6 @@ } }, "required": [ - "startDate", "timeLag", "accrualPeriodicity" ], @@ -2128,6 +2118,7 @@ "type": "null" } ], + "default": null, "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", "title": "Data Use Limitation" }, @@ -2140,6 +2131,7 @@ "type": "null" } ], + "default": null, "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", "title": "Data Use Requirements" }, @@ -2157,10 +2149,6 @@ "title": "Citation Requirements'" } }, - "required": [ - "dataUseLimitation", - "dataUseRequirement" - ], "title": "Usage", "type": "object" } @@ -2215,9 +2203,7 @@ "$ref": "#/$defs/Accessibility" } ], - "default": null, - "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", - "title": "Accessibility" + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets." }, "linkage": { "anyOf": [ @@ -2309,7 +2295,8 @@ }, "required": [ "required", - "summary" + "summary", + "accessibility" ], "title": "Gwdm20", "type": "object" diff --git a/hdr_schemata/models/GWDM/v2_0/Access.py b/hdr_schemata/models/GWDM/v2_0/Access.py index c95bc89..dd6f57b 100644 --- a/hdr_schemata/models/GWDM/v2_0/Access.py +++ b/hdr_schemata/models/GWDM/v2_0/Access.py @@ -12,7 +12,7 @@ class Config: extra = "forbid" accessRights: Optional[CommaSeparatedValues] = Field( - ..., **an.accessRights.__dict__ + None, **an.accessRights.__dict__ ) accessService: Optional[LongDescription] = Field(None, **an.accessService.__dict__) @@ -26,14 +26,13 @@ class Config: ) jurisdiction: Optional[CommaSeparatedValues] = Field( - ..., **an.jurisdiction.__dict__ + None, **an.jurisdiction.__dict__ ) - dataController: Optional[LongDescription] = Field(..., **an.dataController.__dict__) + dataController: Optional[LongDescription] = Field(None, **an.dataController.__dict__) dataProcessor: Optional[LongDescription] = Field(None, **an.dataProcessor.__dict__) accessServiceCategory: Optional[CommaSeparatedValues] = Field( None, **an.accessServiceCategory.__dict__ ) - \ No newline at end of file diff --git a/hdr_schemata/models/GWDM/v2_0/DataTable.py b/hdr_schemata/models/GWDM/v2_0/DataTable.py index 2ea7ab1..22b513d 100644 --- a/hdr_schemata/models/GWDM/v2_0/DataTable.py +++ b/hdr_schemata/models/GWDM/v2_0/DataTable.py @@ -14,7 +14,7 @@ class Config: extra = "forbid" name: Optional[constr(min_length=1, max_length=500)] = Field( - ..., **an.name.__dict__ + None, **an.name.__dict__ ) description: Optional[constr(min_length=1, max_length=20000)] = Field( None, **an.description.__dict__ diff --git a/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py b/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py index 51a6381..173c56d 100644 --- a/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py +++ b/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py @@ -16,28 +16,28 @@ class Config: extra = "forbid" age: Optional[List[Age]] = Field( - ..., + None, title=an.age.title, description=an.age.description, # json_schema_extra={"guidance": an.age.guidance} ) ethnicity: Optional[List[Ethnicity]] = Field( - ..., + None, title=an.ethnicity.title, description=an.ethnicity.description, # json_schema_extra={"guidance": an.ethnicity.guidance} ) genderAssignedAtBirth: Optional[List[GenderAssignedAtBirth]] = Field( - ..., + None, title=an.genderAssignedAtBirth.title, description=an.genderAssignedAtBirth.description, # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} ) disease: Optional[List[Disease]] = Field( - ..., + None, title=an.disease.title, description=an.disease.description, # json_schema_extra={"guidance": an.disease.guidance} diff --git a/hdr_schemata/models/GWDM/v2_0/FormatAndStandards.py b/hdr_schemata/models/GWDM/v2_0/FormatAndStandards.py index a54b87a..6c273fb 100644 --- a/hdr_schemata/models/GWDM/v2_0/FormatAndStandards.py +++ b/hdr_schemata/models/GWDM/v2_0/FormatAndStandards.py @@ -12,11 +12,11 @@ class Config: extra = "forbid" vocabularyEncodingSchemes: Optional[CommaSeparatedValues] = Field( - ..., **an.vocabularyEncodingSchemes.__dict__ + None, **an.vocabularyEncodingSchemes.__dict__ ) - conformsTo: Optional[CommaSeparatedValues] = Field(..., **an.conformsTo.__dict__) + conformsTo: Optional[CommaSeparatedValues] = Field(None, **an.conformsTo.__dict__) - languages: Optional[CommaSeparatedValues] = Field(..., **an.languages.__dict__) + languages: Optional[CommaSeparatedValues] = Field(None, **an.languages.__dict__) - formats: Optional[CommaSeparatedValues] = Field(..., **an.formats.__dict__) + formats: Optional[CommaSeparatedValues] = Field(None, **an.formats.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/Omics.py b/hdr_schemata/models/GWDM/v2_0/Omics.py index 7068319..52200f9 100644 --- a/hdr_schemata/models/GWDM/v2_0/Omics.py +++ b/hdr_schemata/models/GWDM/v2_0/Omics.py @@ -12,13 +12,13 @@ class Config: extra = "forbid" assay: Optional[Assay] = Field( - ..., + None, **an.assay.__dict__, json_schema_extra={"guidance": an.assay.guidance} ) platform: Optional[Platform] = Field( - ..., + None, **an.platform.__dict__, json_schema_extra={"guidance": an.platform.guidance} ) diff --git a/hdr_schemata/models/GWDM/v2_0/Organisation.py b/hdr_schemata/models/GWDM/v2_0/Organisation.py index 6d6bd33..bdfc2e1 100644 --- a/hdr_schemata/models/GWDM/v2_0/Organisation.py +++ b/hdr_schemata/models/GWDM/v2_0/Organisation.py @@ -8,7 +8,7 @@ class Organisation(BaseModel): - name: Optional[Name] = Field(..., **an.name.__dict__) + name: Optional[Name] = Field(None, **an.name.__dict__) gatewayId: Optional[constr(min_length=2, max_length=50)] = Field( None, **an.gatewayId.__dict__ diff --git a/hdr_schemata/models/GWDM/v2_0/Provenance.py b/hdr_schemata/models/GWDM/v2_0/Provenance.py index 1866455..33383b1 100644 --- a/hdr_schemata/models/GWDM/v2_0/Provenance.py +++ b/hdr_schemata/models/GWDM/v2_0/Provenance.py @@ -9,5 +9,5 @@ class Provenance(BaseModel): class Config: extra = 'forbid' - origin: Optional[Origin] = None + origin: Optional[Origin] = Field(None) temporal: Temporal diff --git a/hdr_schemata/models/GWDM/v2_0/Publisher.py b/hdr_schemata/models/GWDM/v2_0/Publisher.py index 89bfeb3..f3af2df 100644 --- a/hdr_schemata/models/GWDM/v2_0/Publisher.py +++ b/hdr_schemata/models/GWDM/v2_0/Publisher.py @@ -10,7 +10,7 @@ class Publisher(BaseModel): - publisherName: Optional[Name] = Field(..., **an.publisherName.__dict__) + publisherName: Optional[Name] = Field(None, **an.publisherName.__dict__) publisherGatewayId: Optional[constr(min_length=2, max_length=50)] = Field( None, **an.publisherGatewayId.__dict__ diff --git a/hdr_schemata/models/GWDM/v2_0/Summary.py b/hdr_schemata/models/GWDM/v2_0/Summary.py index adf1340..8d3e880 100644 --- a/hdr_schemata/models/GWDM/v2_0/Summary.py +++ b/hdr_schemata/models/GWDM/v2_0/Summary.py @@ -17,26 +17,26 @@ class Summary(BaseModel): title: TwoHundredFiftyFiveCharacters = Field(..., **an.title.__dict__) - shortTitle: Optional[ShortTitle] = Field(..., **an.shortTitle.__dict__) + shortTitle: Optional[ShortTitle] = Field(None, **an.shortTitle.__dict__) - doiName: Optional[Doi] = Field(..., **an.doiName.__dict__) + doiName: Optional[Doi] = Field(None, **an.doiName.__dict__) abstract: LongAbstractText = Field(..., **an.abstract.__dict__) - keywords: Optional[CommaSeparatedValues] = Field(..., **an.keywords.__dict__) + keywords: Optional[CommaSeparatedValues] = Field(None, **an.keywords.__dict__) controlledKeywords: Optional[CommaSeparatedValues] = Field( - ..., **an.controlledKeywords.__dict__ + None, **an.controlledKeywords.__dict__ ) - contactPoint: Optional[EmailStr] = Field(..., **an.contactPoint.__dict__) + contactPoint: Optional[EmailStr] = Field(None, **an.contactPoint.__dict__) - datasetType: Optional[DatasetType] = Field(..., **an.datasetType.__dict__) + datasetType: Optional[DatasetType] = Field(None, **an.datasetType.__dict__) - description: Optional[LongDescription] = Field(..., **an.description.__dict__) + description: Optional[LongDescription] = Field(None, **an.description.__dict__) publisher: Optional[Organisation] = Field( - ..., + None, description=an.publisher.description, title=an.publisher.title, ) diff --git a/hdr_schemata/models/GWDM/v2_0/Temporal.py b/hdr_schemata/models/GWDM/v2_0/Temporal.py index b93351e..773f999 100644 --- a/hdr_schemata/models/GWDM/v2_0/Temporal.py +++ b/hdr_schemata/models/GWDM/v2_0/Temporal.py @@ -12,7 +12,7 @@ class Temporal(BaseModel): class Config: extra = "forbid" - startDate: Optional[Union[date, datetime]] = Field(..., **an.startDate.__dict__) + startDate: Optional[Union[date, datetime]] = Field(None, **an.startDate.__dict__) endDate: Optional[Union[date, datetime]] = Field(None, **an.endDate.__dict__) timeLag: TimeLag = Field(..., **an.timeLag.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/Usage.py b/hdr_schemata/models/GWDM/v2_0/Usage.py index 8e34172..b02ebc2 100644 --- a/hdr_schemata/models/GWDM/v2_0/Usage.py +++ b/hdr_schemata/models/GWDM/v2_0/Usage.py @@ -14,11 +14,11 @@ class Config: extra = "forbid" dataUseLimitation: Optional[CommaSeparatedValues] = Field( - ..., **an.dataUseLimitation.__dict__ + None, **an.dataUseLimitation.__dict__ ) dataUseRequirement: Optional[CommaSeparatedValues] = Field( - ..., **an.dataUseRequirements.__dict__ + None, **an.dataUseRequirements.__dict__ ) resourceCreator: Optional[Organisation] = Field(None, **an.resourceCreator.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/__init__.py b/hdr_schemata/models/GWDM/v2_0/__init__.py index 157ede4..d166d2b 100644 --- a/hdr_schemata/models/GWDM/v2_0/__init__.py +++ b/hdr_schemata/models/GWDM/v2_0/__init__.py @@ -48,7 +48,7 @@ class Config: ) accessibility: Accessibility = Field( - None, description=an.accessibility.description, title=an.accessibility.title + ..., description=an.accessibility.description, title=an.accessibility.title ) linkage: Optional[Linkage] = Field( diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index eac6685..5ca018f 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -101,6 +101,7 @@ "type": "null" } ], + "default": null, "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", "guidance": "A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/", "title": "Jurisdiction" @@ -114,6 +115,7 @@ "type": "null" } ], + "default": null, "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", "examples": [ "NHS England" @@ -141,9 +143,7 @@ } }, "required": [ - "accessRights", - "jurisdiction", - "dataController" + "accessRights" ], "title": "Access", "type": "object" @@ -349,7 +349,6 @@ "type": "array" } ], - "default": null, "description": "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:- For locations in the UK: ONS standards- For locations in other countries: ISO 3166-1 & ISO 3166-2", "examples": [ "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" @@ -471,6 +470,9 @@ "title": "Gender" } }, + "required": [ + "spatial" + ], "title": "Coverage", "type": "object" }, @@ -549,6 +551,7 @@ "type": "null" } ], + "default": null, "description": "The name of a table in a dataset.", "title": "Table name" }, @@ -577,7 +580,6 @@ } }, "required": [ - "name", "columns" ], "title": "DataTable", @@ -670,6 +672,7 @@ "type": "null" } ], + "default": null, "title": "Persistent identifier of a dataset" }, "title": { @@ -681,6 +684,7 @@ "type": "null" } ], + "default": null, "title": "Title of a dataset" }, "url": { @@ -692,14 +696,10 @@ "type": "null" } ], + "default": null, "title": "Url of a dataset" } }, - "required": [ - "pid", - "title", - "url" - ], "title": "DatasetDescriptor", "type": "object" }, @@ -819,6 +819,7 @@ "type": "null" } ], + "default": null, "description": "Array of age bins and their corresponding counts.", "title": "Age" }, @@ -834,6 +835,7 @@ "type": "null" } ], + "default": null, "description": "Array of ethnicity bins and their corresponding counts.", "title": "Ethnicity" }, @@ -849,6 +851,7 @@ "type": "null" } ], + "default": null, "description": "Gender assigned at birth, male or female, and their corresponding counts.", "title": "Gender assigned at birth" }, @@ -864,16 +867,11 @@ "type": "null" } ], + "default": null, "description": "Array of diseases and their corresponding counts.", "title": "Disease" } }, - "required": [ - "age", - "ethnicity", - "genderAssignedAtBirth", - "disease" - ], "title": "DemographicFrequency", "type": "object" }, @@ -942,10 +940,8 @@ "$ref": "#/$defs/Description" } ], - "default": null, "description": "A free-text description of the dataset.A URL can also be provided as the description of the dataset.Gateway Feature: Keywords and text may be extracted out of the description and indexed for search.", - "guidance": "An HTML account of the data that provides context and scope of the data, limited to 10000 characters, and/or a resolvable URL that describes the dataset.- Additional information can be recorded and included using the Associated media field.", - "title": "Description" + "guidance": "An HTML account of the data that provides context and scope of the data, limited to 10000 characters, and/or a resolvable URL that describes the dataset.- Additional information can be recorded and included using the Associated media field." }, "associatedMedia": { "anyOf": [ @@ -992,6 +988,9 @@ "title": "Dataset pipeline status" } }, + "required": [ + "description" + ], "title": "Documentation", "type": "object" }, @@ -1275,24 +1274,7 @@ "type": "array" }, "format": { -<<<<<<< HEAD "default": null, -======= - "anyOf": [ - { - "items": { - "$ref": "#/$defs/Format" - }, - "type": "array" - }, - { - "$ref": "#/$defs/CommaSeparatedValues" - }, - { - "type": "null" - } - ], ->>>>>>> 5c24292 (debugging from onboarding form) "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", "examples": [ "text/tab-separated-values", @@ -1687,6 +1669,7 @@ "type": "null" } ], + "default": null, "description": "The specific 'omics assay that generated the dataset.", "guidance": "The specific 'omics assay that generated the dataset. If the assay used to generate your dataset is not listed, please contract the gateway team by submitting an enquiry.", "title": "Omics assay" @@ -1700,15 +1683,12 @@ "type": "null" } ], + "default": null, "description": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", "guidance": "The specific technology or infrastructure used to perform the assay. If the omics platform used to create your dataset is not listed, please select other, a member of the gateway team will contact you to add an appropriate term(s) both to your record and to the metadata schema on your behalf.", "title": "Omics Platform" } }, - "required": [ - "assay", - "platform" - ], "title": "Omics", "type": "object" }, @@ -1732,7 +1712,6 @@ "type": "integer" } ], - "default": null, "description": "Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation.", "guidance": "Example: https://ror.org/053fq8t95If your organisation does not have a ROR identifier please use the \u201csuggest and institute\u201d function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform", "title": "identifier" @@ -1803,6 +1782,7 @@ } }, "required": [ + "identifier", "name", "contactPoint" ], @@ -1850,6 +1830,7 @@ "type": "null" } ], + "default": null, "description": "The sub-type of the dataset content. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType ('proteomics', 'transcriptomics', 'epigenomics', 'metabolomics', 'metagenomics', 'genomics', 'lipidomics') is selected", "guidance": "Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.- Health and disease: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.- Treatments/Interventions: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.- Measurements/Tests- Includes any data related to laboratory or other diagnostics.- Imaging types: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.- *Imaging area of the body: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.- Omics: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.- Socioeconomic: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.- *Lifestyle: Includes any data related to smoking, physical activity, dietary habits or alcohol.- Registry: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.- Environment and energy: Includes any data related to the monitoring or study of environmental or energy factors or events.- *Information and communication: Includes any data related to the study or application of information and communication.- Politics*: Includes any data related to political views, activities, voting, etc.", "title": "Dataset sub-type" @@ -1904,8 +1885,7 @@ } }, "required": [ - "datasetType", - "datasetSubType" + "datasetType" ], "title": "Origin", "type": "object" @@ -1975,11 +1955,13 @@ "$ref": "#/$defs/Temporal" } ], - "default": null, "description": "Dates and other temporal coverage information.", "title": "Temporal Coverage" } }, + "required": [ + "temporal" + ], "title": "Provenance", "type": "object" }, @@ -2023,6 +2005,7 @@ "type": "null" } ], + "default": null, "description": "Some url with a reference to the record of a previous version of this dataset", "examples": [ "https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561" @@ -2031,8 +2014,7 @@ } }, "required": [ - "version", - "url" + "version" ], "title": "Revision", "type": "object" @@ -2231,6 +2213,7 @@ "type": "null" } ], + "default": null, "description": "Please provide a list of relevant and specific keywords that can improve the search engine optimisation (SEO) of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", "examples": [ "Outpatient Care", @@ -2306,7 +2289,6 @@ "abstract", "dataCustodian", "populationSize", - "keywords", "contactPoint" ], "title": "Summary", diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Access.py b/hdr_schemata/models/HDRUK/v3_0_0/Access.py index b7511f7..b72b98a 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Access.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Access.py @@ -34,11 +34,11 @@ class Config: ) jurisdiction: Optional[List[Isocountrycode]] = Field( - ..., **an.jurisdiction.__dict__, json_schema_extra={"guidance": an.jurisdiction.guidance} + None, **an.jurisdiction.__dict__, json_schema_extra={"guidance": an.jurisdiction.guidance} ) dataController: Optional[LongDescription] = Field( - ..., **an.dataController.__dict__, json_schema_extra={"guidance": an.dataController.guidance} + None, **an.dataController.__dict__, json_schema_extra={"guidance": an.dataController.guidance} ) dataProcessor: Optional[LongDescription] = Field( diff --git a/hdr_schemata/models/HDRUK/v3_0_0/DataTable.py b/hdr_schemata/models/HDRUK/v3_0_0/DataTable.py index 2ea7ab1..22b513d 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/DataTable.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/DataTable.py @@ -14,7 +14,7 @@ class Config: extra = "forbid" name: Optional[constr(min_length=1, max_length=500)] = Field( - ..., **an.name.__dict__ + None, **an.name.__dict__ ) description: Optional[constr(min_length=1, max_length=20000)] = Field( None, **an.description.__dict__ diff --git a/hdr_schemata/models/HDRUK/v3_0_0/DatasetDescriptor.py b/hdr_schemata/models/HDRUK/v3_0_0/DatasetDescriptor.py index 50195b7..7de1aea 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/DatasetDescriptor.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/DatasetDescriptor.py @@ -7,7 +7,7 @@ an = annotations.datasetDescriptor class DatasetDescriptor(BaseModel): - pid: Optional[OneHundredFiftyCharacters] = Field(..., **an.pid.__dict__) - title: Optional[OneHundredFiftyCharacters] = Field(..., **an.title.__dict__) - url: Optional[Url] = Field(..., **an.url.__dict__) + pid: Optional[OneHundredFiftyCharacters] = Field(None, **an.pid.__dict__) + title: Optional[OneHundredFiftyCharacters] = Field(None, **an.title.__dict__) + url: Optional[Url] = Field(None, **an.url.__dict__) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py b/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py index 51a6381..173c56d 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py @@ -16,28 +16,28 @@ class Config: extra = "forbid" age: Optional[List[Age]] = Field( - ..., + None, title=an.age.title, description=an.age.description, # json_schema_extra={"guidance": an.age.guidance} ) ethnicity: Optional[List[Ethnicity]] = Field( - ..., + None, title=an.ethnicity.title, description=an.ethnicity.description, # json_schema_extra={"guidance": an.ethnicity.guidance} ) genderAssignedAtBirth: Optional[List[GenderAssignedAtBirth]] = Field( - ..., + None, title=an.genderAssignedAtBirth.title, description=an.genderAssignedAtBirth.description, # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} ) disease: Optional[List[Disease]] = Field( - ..., + None, title=an.disease.title, description=an.disease.description, # json_schema_extra={"guidance": an.disease.guidance} diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Documentation.py b/hdr_schemata/models/HDRUK/v3_0_0/Documentation.py index fb6cba1..016ee6b 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Documentation.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Documentation.py @@ -12,7 +12,7 @@ class Config: extra = "forbid" description: Description = Field( - None, **an.description.__dict__, json_schema_extra={"guidance": an.description.guidance} + ..., **an.description.__dict__, json_schema_extra={"guidance": an.description.guidance} ) associatedMedia: Optional[ diff --git a/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py b/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py index 9a6be6e..4ac4cdc 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py @@ -29,11 +29,6 @@ class Config: json_schema_extra={"guidance": an.language.guidance} ) -<<<<<<< HEAD format: List[Format] = Field( None, **an.format.__dict__, json_schema_extra={"guidance": an.format.guidance} -======= - format: Union[List[Format], Optional[CommaSeparatedValues]] = Field( - ..., **an.format.__dict__, json_schema_extra={"guidance": an.format.guidance} ->>>>>>> 5c24292 (debugging from onboarding form) ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Omics.py b/hdr_schemata/models/HDRUK/v3_0_0/Omics.py index 7068319..52200f9 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Omics.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Omics.py @@ -12,13 +12,13 @@ class Config: extra = "forbid" assay: Optional[Assay] = Field( - ..., + None, **an.assay.__dict__, json_schema_extra={"guidance": an.assay.guidance} ) platform: Optional[Platform] = Field( - ..., + None, **an.platform.__dict__, json_schema_extra={"guidance": an.platform.guidance} ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py index 985826f..691dc7d 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py @@ -12,7 +12,7 @@ class Organisation(BaseOrganisation): identifier: Union[constr(min_length=2, max_length=50), int] = Field( - None, **an.identifier.__dict__, json_schema_extra={"guidance": an.identifier.guidance} + ..., **an.identifier.__dict__, json_schema_extra={"guidance": an.identifier.guidance} ) name: OneHundredFiftyCharacters = Field( diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Origin.py b/hdr_schemata/models/HDRUK/v3_0_0/Origin.py index 7e1b448..5798e11 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Origin.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Origin.py @@ -20,7 +20,7 @@ class Config: ) datasetSubType: Optional[List[DatasetSubType]] = Field( - ..., **an.datasetSubType.__dict__, json_schema_extra={"guidance": an.datasetSubType.guidance} + None, **an.datasetSubType.__dict__, json_schema_extra={"guidance": an.datasetSubType.guidance} ) source: Optional[List[SourceV2]] = Field( diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Provenance.py b/hdr_schemata/models/HDRUK/v3_0_0/Provenance.py index 44285d9..3457329 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Provenance.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Provenance.py @@ -17,5 +17,5 @@ class Config: ) temporal: Temporal = Field( - None, description=an.temporal.description, title=an.temporal.title + ..., description=an.temporal.description, title=an.temporal.title ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Revision.py b/hdr_schemata/models/HDRUK/v3_0_0/Revision.py index c31fef8..243a183 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Revision.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Revision.py @@ -13,4 +13,4 @@ class Config: extra = "forbid" version: Semver = Field(..., **an.version.__dict__) - url: Optional[Url] = Field(..., **an.url.__dict__) + url: Optional[Url] = Field(None, **an.url.__dict__) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Summary.py b/hdr_schemata/models/HDRUK/v3_0_0/Summary.py index a7efd5c..e8d591a 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Summary.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Summary.py @@ -29,12 +29,8 @@ class Config: ..., **an.populationSize.__dict__, json_schema_extra={"guidance": an.populationSize.guidance} ) -<<<<<<< HEAD - keywords: Optional[List[OneHundredFiftyCharacters]] = ( -======= - keywords: Optional[Union[List[OneHundredFiftyCharacters], CommaSeparatedValues]] = ( ->>>>>>> 5c24292 (debugging from onboarding form) - Field(..., **an.keywords.__dict__, json_schema_extra={"guidance": an.keywords.guidance}) + keywords: Optional[Union[List[OneHundredFiftyCharacters], CommaSeparatedValues]] = Field( + None, **an.keywords.__dict__, json_schema_extra={"guidance": an.keywords.guidance} ) doiName: Optional[Doi] = Field( diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index 3cd4542..65b98e9 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -258,4 +258,4 @@ def remove_types(data): # create_markdown(Gwdm10, dir_path+"/../../docs/GWDM", "1.0") # create_markdown(Gwdm11, dir_path+"/../../docs/GWDM", "1.1") # create_markdown(Gwdm12, dir_path+"/../../docs/GWDM", "1.2") -# create_markdown(Gwdm20, dir_path+"/../../docs/GWDM", "2.0") +create_markdown(Gwdm20, dir_path+"/../../docs/GWDM", "2.0") From 5643041ecf5b20ee87c069128610208a9e2af1f2 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Mon, 19 Aug 2024 10:02:35 +0100 Subject: [PATCH 09/23] fixing up after rebase --- docs/HDRUK/3.0.0.form.json | 14 ++++++------- docs/HDRUK/3.0.0.md | 20 +++++++++---------- docs/HDRUK/3.0.0.structure.json | 18 ++++++++--------- hdr_schemata/models/HDRUK/3.0.0/schema.json | 20 ++++++++++--------- hdr_schemata/models/HDRUK/v3_0_0/Coverage.py | 6 +++--- .../models/HDRUK/v3_0_0/FormatAndStandards.py | 8 ++++---- hdr_schemata/models/HDRUK/v3_0_0/Temporal.py | 4 ++-- 7 files changed, 45 insertions(+), 45 deletions(-) diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 087cac4..58b0070 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -358,7 +358,7 @@ "location": "documentation.inPipeline" }, { - "required": true, + "required": false, "title": "Geographic coverage", "description": "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:\\n- For locations in the UK: [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about)\\n- For locations in other countries: [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes)", "guidance": "- The geographical area covered by the dataset.\\n- Please provide a valid location.\\n- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about).\\n- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes).", @@ -725,7 +725,7 @@ "location": "provenance.origin.imageContrast" }, { - "required": false, + "required": true, "title": "Publishing frequency", "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", "guidance": "Please indicate the frequency of publishing.\\n- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.\\n- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.\\n- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.\\n- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.\\n- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.\\n- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/.\\n\\n Options:\\n- **Static**: Dataset published once.\\n- **Irregular**: Dataset published at uneven intervals.\\n- **Continuous**: Dataset published without interruption.\\n- **Biennial**: Dataset published every two years.\\n- **Annual**: Dataset published occurs once a year.\\n- **Biannual**: Dataset published twice a year.\\n- **Quarterly**: Dataset published every three months.\\n- **Bimonthly**: Dataset published every two months.\\n- **Monthly**: Dataset published once a month.\\n- **Biweekly**: Dataset published every two weeks.\\n- **Weekly**: Dataset published once a week.\\n- **Twice weekly**: Dataset published twice a week.\\n- **Daily**: Dataset published once a day.\\n- **Other**: Dataset published using other interval.", @@ -788,7 +788,7 @@ "location": "provenance.temporal.endDate" }, { - "required": false, + "required": true, "title": "Time lag", "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.", "guidance": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.\\n- **Less than 1 week**: Typical time lag of less than a week.\\n- **1-2 weeks**: Typical time-lag of one to two weeks.\\n- **2-4 weeks**: Typical time-lag of two to four weeks.\\n- **1-2 months**: Typical time-lag of one to two months.\\n- **2-6 months**: Typical time-lag of two to six months.\\n- **6 months plus**: Typical time-lag of more than six months.\\n- **Variable**: Variable time-lag.\\n- **Not applicable**: Not Applicable i.e. static dataset.\\n- **Other**: Other time-lag.", @@ -1026,7 +1026,7 @@ "location": "accessibility.access.dataProcessor" }, { - "required": false, + "required": true, "title": "Controlled vocabulary", "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", "guidance": "- List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.\\n- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.\\n- Notes: More than one vocabulary may be provided.\\n- **Local**: Local Coding Standard.\\n- **OPCS4**: .\\n- **READ**: .\\n- **SNOMED CT**: .\\n- **SNOMED RT**: .\\n- **DM+D**: .\\n- **NHS National Codes**: .\\n- **ODS**: .\\n- **LOINC**: .\\n- **ICD10**: .\\n- **ICD10CM**: .\\n- **ICD10PCS**: .\\n- **ICD9CM**: .\\n- **ICD9**: .\\n- **ICDO3**: .\\n- **AMT**: .\\n- **APC**: .\\n- **ATC**: .\\n- **CIEL**: .\\n- **HPO**: .\\n- **CPT4**: .\\n- **DPD**: .\\n- **DRG**: .\\n- **HEMONC**: .\\n- **JMDC**: .\\n- **KCD7**: .\\n- **MULTUM**: .\\n- **NAACCR**: .\\n- **NDC**: .\\n- **NDFRT** <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.\\n- **OXMIS**: .\\n- **RXNORM**: .\\n- **RXNORM EXTENSION**: .\\n- **SPL**: .\\n- **Other**: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.\\n- **NHS Scotland National Codes**: .\\n- **NHS Wales National Codes**: ", @@ -1079,7 +1079,7 @@ "location": "accessibility.formatAndStandards.vocabularyEncodingScheme" }, { - "required": false, + "required": true, "title": "Alignment with standardised data models", "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", "guidance": "- List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.\\n- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.\\n- **HL7 FHIR**: .\\n- **HL7 V2**: .\\n- **HL7 CDA**: .\\n- **HL7 CCOW**: .\\n- **DICOM**: .\\n- **I2B2**: .\\n- **IHE**: .\\n- **OMOP**: .\\n- **openEHR**: .\\n- **Sentinel**: .\\n- **PCORnet**: .\\n- **CDISC**: .\\n- **Local**: In-house developed data model.\\n- **Other**: Other standardised data model.\\n- **NHS Data Dictionary**: .\\n- **NHS Scotland Data Dictionary**: .\\n- **NHS Wales Data Dictionary**: .", @@ -1114,7 +1114,7 @@ "location": "accessibility.formatAndStandards.conformsTo" }, { - "required": false, + "required": true, "title": "Language", "description": "This should list all the languages in which the dataset metadata and underlying data is made available complaint with ISO 639.", "guidance": "https://www.iso.org/iso-639-language-code\\n- **aa**: Afar\\n- **ab**: Abkhazian\\n- **af**: Afrikaans\\n- **ak**: Akan\\n- **sq**: Albanian\\n- **am**: Amharic\\n- **ar**: Arabic\\n- **an**: Aragonese\\n- **hy**: Armenian\\n- **as**: Assamese\\n- **av**: Avaric\\n- **ae**: Avestan\\n- **ay**: Aymara\\n- **az**: Azerbaijani\\n- **ba**: Bashkir\\n- **bm**: Bambara\\n- **eu**: Basque\\n- **be**: Belarusian\\n- **bn**: Bengali\\n- **bh**: Bihari languages\\n- **bi**: Bislama\\n- **bo**: Tibetan\\n- **bs**: Bosnian\\n- **br**: Breton\\n- **bg**: Bulgarian\\n- **my**: Burmese\\n- **ca**: Catalan; Valencian\\n- **cs**: Czech\\n- **ch**: Chamorro\\n- **ce**: Chechen\\n- **zh**: Chinese\\n- **cu**: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic\\n- **cv**: Chuvash\\n- **kw**: Cornish\\n- **co**: Corsican\\n- **cr**: Cree\\n- **cy**: Welsh\\n- **cs**: Czech\\n- **da**: Danish\\n- **de**: German\\n- **dv**: Divehi; Dhivehi; Maldivian\\n- **nl**: Dutch; Flemish\\n- **dz**: Dzongkha\\n- **el**: Greek, Modern (1453-)\\n- **en**: English\\n- **eo**: Esperanto\\n- **et**: Estonian\\n- **eu**: Basque\\n- **ee**: Ewe\\n- **fo**: Faroese\\n- **fa**: Persian\\n- **fj**: Fijian\\n- **fi**: Finnish\\n- **fr**: French\\n- **fy**: Western Frisian\\n- **ff**: Fulah\\n- **ka**: Georgian\\n- **de**: German\\n- **gd**: Gaelic; Scottish Gaelic\\n- **ga**: Irish\\n- **gl**: Galician\\n- **gv**: Manx\\n- **el**: Greek, Modern (1453-)\\n- **gn**: Guarani\\n- **gu**: Gujarati\\n- **ht**: Haitian; Haitian Creole\\n- **ha**: Hausa\\n- **ho**: Hiri Motu\\n- **hr**: Croatian\\n- **hu**: Hungarian\\n- **hy**: Armenian\\n- **ig**: Igbo\\n- **is**: Icelandic\\n- **io**: Ido\\n- **ii**: Sichuan Yi; Nuosu\\n- **iu**: Inuktitut\\n- **ie**: Interlingue; Occidental\\n- **ia**: Interlingua (International Auxiliary Language Association)\\n- **id**: Indonesian\\n- **ik**: Inupiaq\\n- **is**: Icelandic\\n- **it**: Italian\\n- **jv**: Javanese\\n- **ja**: Japanese\\n- **kl**: Kalaallisut; Greenlandic\\n- **kn**: Kannada\\n- **ks**: Kashmiri\\n- **ka**: Georgian\\n- **kr**: Kanuri\\n- **kk**: Kazakh\\n- **km**: Central Khmer\\n- **ki**: Kikuyu; Gikuyu\\n- **rw**: Kinyarwanda\\n- **ky**: Kirghiz; Kyrgyz\\n- **kv**: Komi\\n- **kg**: Kongo\\n- **ko**: Korean\\n- **kj**: Kuanyama; Kwanyama\\n- **ku**: Kurdish\\n- **lo**: Lao\\n- **la**: Latin\\n- **lv**: Latvian\\n- **li**: Limburgan; Limburger; limburgish\\n- **ln**: Lingala\\n- **lt**: Lithuanian\\n- **lb**: Luxembourgish; Letzeburgesch\\n- **lu**: Luba-Katanga\\n- **lg**: Ganda\\n- **mk**: Macedonian\\n- **mh**: Marshallese\\n- **ml**: Malayalam\\n- **mi**: Maori\\n- **mr**: Marathi\\n- **ms**: Malay\\n- **mk**: Macedonian\\n- **mg**: Malagasy\\n- **mt**: Maltese\\n- **mn**: Mongolian\\n- **mi**: Maori\\n- **ms**: Malay\\n- **my**: Burmese\\n- **na**: Nauru\\n- **nv**: Navajo; Navaho\\n- **nr**: Ndebele, South; South Ndebele\\n- **nd**: Ndebele, North; North Ndebele\\n- **ng**: Ndonga\\n- **ne**: Nepali\\n- **nl**: Dutch; Flemish\\n- **nn**: Norwegian Nynorsk; Nynorsk, Norwegian\\n- **nb**: Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l\\n- **no**: Norwegian\\n- **ny**: Chichewa; Chewa; Nyanja\\n- **oc**: Occitan (post 1500)\\n- **oj**: Ojibwa\\n- **or**: Oriya\\n- **om**: Oromo\\n- **os**: Ossetian; Ossetic\\n- **pa**: Panjabi; Punjabi\\n- **fa**: Persian\\n- **pi**: Pali\\n- **pl**: Polish\\n- **pt**: Portuguese\\n- **ps**: Pushto; Pashto\\n- **qu**: Quechua\\n- **rm**: Romansh\\n- **ro**: Romanian; Moldavian; Moldovan\\n- **rn**: Rundi\\n- **ru**: Russian\\n- **sg**: Sango\\n- **sa**: Sanskrit\\n- **si**: Sinhala; Sinhalese\\n- **sk**: Slovak\\n- **sl**: Slovenian\\n- **se**: Northern Sami\\n- **sm**: Samoan\\n- **sn**: Shona\\n- **sd**: Sindhi\\n- **so**: Somali\\n- **st**: Sotho, Southern\\n- **es**: Spanish; Castilian\\n- **sq**: Albanian\\n- **sc**: Sardinian\\n- **sr**: Serbian\\n- **ss**: Swati\\n- **su**: Sundanese\\n- **sw**: Swahili\\n- **sv**: Swedish\\n- **ty**: Tahitian\\n- **ta**: Tamil\\n- **tt**: Tatar\\n- **te**: Telugu\\n- **tg**: Tajik\\n- **tl**: Tagalog\\n- **th**: Thai\\n- **bo**: Tibetan\\n- **ti**: Tigrinya\\n- **to**: Tonga (Tonga Islands)\\n- **tn**: Tswana\\n- **ts**: Tsonga\\n- **tk**: Turkmen\\n- **tr**: Turkish\\n- **tw**: Twi\\n- **ug**: Uighur; Uyghur\\n- **uk**: Ukrainian\\n- **ur**: Urdu\\n- **uz**: Uzbek\\n- **ve**: Venda\\n- **vi**: Vietnamese\\n- **vo**: Volap\u00fck\\n- **cy**: Welsh\\n- **wa**: Walloon\\n- **wo**: Wolof\\n- **xh**: Xhosa\\n- **yi**: Yiddish\\n- **yo**: Yoruba\\n- **za**: Zhuang; Chuang\\n- **zh**: Chinese\\n- **zu**: Zulu", @@ -1313,7 +1313,7 @@ "location": "accessibility.formatAndStandards.language" }, { - "required": false, + "required": true, "title": "Format", "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", "guidance": "- If multiple formats are available, please specify. See application, audio, image, message, model, multipart, text, video, .\\n- Please **enter one format type at a time** and click **Add New Field** to add further keywords.\\n- Note: If your file format is not included in the current list of formats, please indicate other.\\n- **Example**: text/tab-separated-values, application/sql, text/csv, image/diacom-rle", diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index 19378b8..0136587 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -319,7 +319,7 @@ The geographical area covered by the dataset. It is recommended that links are t | title | guidance | is_list | required | type | |:--------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------| -| Geographic coverage | - The geographical area covered by the dataset.
- Please provide a valid location.
- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about).
- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes). | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List'] | +| Geographic coverage | - The geographical area covered by the dataset.
- Please provide a valid location.
- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/ons::index-of-place-names-in-great-britain-november-2021/about).
- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes). | False | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List'] | Examples: @@ -508,7 +508,7 @@ Please indicate the frequency of distribution release. If a dataset is distribut | title | guidance | is_list | required | type | |:---------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Publishing frequency | Please indicate the frequency of publishing.
- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.
- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.
- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.
- If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null.
- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.
- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/.

Options:
- **Static**: Dataset published once.
- **Irregular**: Dataset published at uneven intervals.
- **Continuous**: Dataset published without interruption.
- **Biennial**: Dataset published every two years.
- **Annual**: Dataset published occurs once a year.
- **Biannual**: Dataset published twice a year.
- **Quarterly**: Dataset published every three months.
- **Bimonthly**: Dataset published every two months.
- **Monthly**: Dataset published once a month.
- **Biweekly**: Dataset published every two weeks.
- **Weekly**: Dataset published once a week.
- **Twice weekly**: Dataset published twice a week.
- **Daily**: Dataset published once a day.
- **Other**: Dataset published using other interval. | False | False | ["PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]"] | +| Publishing frequency | Please indicate the frequency of publishing.
- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.
- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.
- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.
- If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null.
- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.
- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/.

Options:
- **Static**: Dataset published once.
- **Irregular**: Dataset published at uneven intervals.
- **Continuous**: Dataset published without interruption.
- **Biennial**: Dataset published every two years.
- **Annual**: Dataset published occurs once a year.
- **Biannual**: Dataset published twice a year.
- **Quarterly**: Dataset published every three months.
- **Bimonthly**: Dataset published every two months.
- **Monthly**: Dataset published once a month.
- **Biweekly**: Dataset published every two weeks.
- **Weekly**: Dataset published once a week.
- **Twice weekly**: Dataset published twice a week.
- **Daily**: Dataset published once a day.
- **Other**: Dataset published using other interval. | False | True | ["PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]"] | @@ -552,7 +552,7 @@ Please indicate the typical time-lag between an event and the data for that even | title | guidance | is_list | required | type | |:---------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------| -| Time lag | Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.
- **Less than 1 week**: Typical time lag of less than a week.
- **1-2 weeks**: Typical time-lag of one to two weeks.
- **2-4 weeks**: Typical time-lag of two to four weeks.
- **1-2 months**: Typical time-lag of one to two months.
- **2-6 months**: Typical time-lag of two to six months.
- **6 months plus**: Typical time-lag of more than six months.
- **Variable**: Variable time-lag.
- **Not applicable**: Not Applicable i.e. static dataset.
- **Other**: Other time-lag. | False | False | ["TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']"] | +| Time lag | Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.
- **Less than 1 week**: Typical time lag of less than a week.
- **1-2 weeks**: Typical time-lag of one to two weeks.
- **2-4 weeks**: Typical time-lag of two to four weeks.
- **1-2 months**: Typical time-lag of one to two months.
- **2-6 months**: Typical time-lag of two to six months.
- **6 months plus**: Typical time-lag of more than six months.
- **Variable**: Variable time-lag.
- **Not applicable**: Not Applicable i.e. static dataset.
- **Other**: Other time-lag. | False | True | ["TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']"] | @@ -683,9 +683,9 @@ Please provide an indication of the typical processing times based on the types Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored. -| title | guidance | is_list | required | type | -|:-------------|:----------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------| -| Jurisdiction | A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/ | False | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'List', 'null'] | +| title | guidance | is_list | required | type | +|:-------------|:----------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------| +| Jurisdiction | A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/ | True | False | ["Isocountrycode[{'pattern': '^[A-Z]{2}(-[A-Z]{2,3})?$', 'type': 'string'}]"] | @@ -732,7 +732,7 @@ List any relevant terminologies / ontologies / controlled vocabularies, such as | title | guidance | is_list | required | type | |:----------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Controlled vocabulary | - List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.
- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition.
- Notes: More than one vocabulary may be provided.
- **Local**: Local Coding Standard.
- **OPCS4**: .
- **READ**: .
- **SNOMED CT**: .
- **SNOMED RT**: .
- **DM+D**: .
- **NHS National Codes**: .
- **ODS**: .
- **LOINC**: .
- **ICD10**: .
- **ICD10CM**: .
- **ICD10PCS**: .
- **ICD9CM**: .
- **ICD9**: .
- **ICDO3**: .
- **AMT**: .
- **APC**: .
- **ATC**: .
- **CIEL**: .
- **HPO**: .
- **CPT4**: .
- **DPD**: .
- **DRG**: .
- **HEMONC**: .
- **JMDC**: .
- **KCD7**: .
- **MULTUM**: .
- **NAACCR**: .
- **NDC**: .
- **NDFRT** <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.
- **OXMIS**: .
- **RXNORM**: .
- **RXNORM EXTENSION**: .
- **SPL**: .
- **Other**: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.
- **NHS Scotland National Codes**: .
- **NHS Wales National Codes**: | True | False | ["ControlledVocabularyEnum['LOCAL','OPCS4','READ','SNOMED CT','SNOMED RT','DM PLUS D','DM+D','NHS NATIONAL CODES','NHS SCOTLAND NATIONAL CODES','NHS WALES NATIONAL CODES','ODS','LOINC','ICD10','ICD10CM','ICD10PCS','ICD9CM','ICD9','ICDO3','AMT','APC','ATC','CIEL','HPO','CPT4','DPD','DRG','HEMONC','JMDC','KCD7','MULTUM','NAACCR','NDC','NDFRT','OXMIS','RXNORM','RXNORM EXTENSION','SPL','OTHER']"] | +| Controlled vocabulary | - List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.
- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition.
- Notes: More than one vocabulary may be provided.
- **Local**: Local Coding Standard.
- **OPCS4**: .
- **READ**: .
- **SNOMED CT**: .
- **SNOMED RT**: .
- **DM+D**: .
- **NHS National Codes**: .
- **ODS**: .
- **LOINC**: .
- **ICD10**: .
- **ICD10CM**: .
- **ICD10PCS**: .
- **ICD9CM**: .
- **ICD9**: .
- **ICDO3**: .
- **AMT**: .
- **APC**: .
- **ATC**: .
- **CIEL**: .
- **HPO**: .
- **CPT4**: .
- **DPD**: .
- **DRG**: .
- **HEMONC**: .
- **JMDC**: .
- **KCD7**: .
- **MULTUM**: .
- **NAACCR**: .
- **NDC**: .
- **NDFRT** <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.
- **OXMIS**: .
- **RXNORM**: .
- **RXNORM EXTENSION**: .
- **SPL**: .
- **Other**: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.
- **NHS Scotland National Codes**: .
- **NHS Wales National Codes**: | True | True | ["ControlledVocabularyEnum['LOCAL','OPCS4','READ','SNOMED CT','SNOMED RT','DM PLUS D','DM+D','NHS NATIONAL CODES','NHS SCOTLAND NATIONAL CODES','NHS WALES NATIONAL CODES','ODS','LOINC','ICD10','ICD10CM','ICD10PCS','ICD9CM','ICD9','ICDO3','AMT','APC','ATC','CIEL','HPO','CPT4','DPD','DRG','HEMONC','JMDC','KCD7','MULTUM','NAACCR','NDC','NDFRT','OXMIS','RXNORM','RXNORM EXTENSION','SPL','OTHER']"] | @@ -743,7 +743,7 @@ List standardised data models that the dataset has been stored in or transformed | title | guidance | is_list | required | type | |:----------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Alignment with standardised data models | - List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.
- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition.
- **HL7 FHIR**: .
- **HL7 V2**: .
- **HL7 CDA**: .
- **HL7 CCOW**: .
- **DICOM**: .
- **I2B2**: .
- **IHE**: .
- **OMOP**: .
- **openEHR**: .
- **Sentinel**: .
- **PCORnet**: .
- **CDISC**: .
- **Local**: In-house developed data model.
- **Other**: Other standardised data model.
- **NHS Data Dictionary**: .
- **NHS Scotland Data Dictionary**: .
- **NHS Wales Data Dictionary**: . | True | False | ["StandardisedDataModelsEnum['HL7 FHIR','HL7 V2','HL7 CDA','HL7 CCOW','LOINC','DICOM','I2B2','IHE','OMOP','OPENEHR','SENTINEL','PCORNET','CDISC','NHS DATA DICTIONARY','NHS SCOTLAND DATA DICTIONARY','NHS WALES DATA DICTIONARY','LOCAL','OTHER']"] | +| Alignment with standardised data models | - List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.
- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition.
- **HL7 FHIR**: .
- **HL7 V2**: .
- **HL7 CDA**: .
- **HL7 CCOW**: .
- **DICOM**: .
- **I2B2**: .
- **IHE**: .
- **OMOP**: .
- **openEHR**: .
- **Sentinel**: .
- **PCORnet**: .
- **CDISC**: .
- **Local**: In-house developed data model.
- **Other**: Other standardised data model.
- **NHS Data Dictionary**: .
- **NHS Scotland Data Dictionary**: .
- **NHS Wales Data Dictionary**: . | True | True | ["StandardisedDataModelsEnum['HL7 FHIR','HL7 V2','HL7 CDA','HL7 CCOW','LOINC','DICOM','I2B2','IHE','OMOP','OPENEHR','SENTINEL','PCORNET','CDISC','NHS DATA DICTIONARY','NHS SCOTLAND DATA DICTIONARY','NHS WALES DATA DICTIONARY','LOCAL','OTHER']"] | Examples: @@ -756,7 +756,7 @@ This should list all the languages in which the dataset metadata and underlying | title | guidance | is_list | required | type | |:---------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Language | https://www.iso.org/iso-639-language-code
- **aa**: Afar
- **ab**: Abkhazian
- **af**: Afrikaans
- **ak**: Akan
- **sq**: Albanian
- **am**: Amharic
- **ar**: Arabic
- **an**: Aragonese
- **hy**: Armenian
- **as**: Assamese
- **av**: Avaric
- **ae**: Avestan
- **ay**: Aymara
- **az**: Azerbaijani
- **ba**: Bashkir
- **bm**: Bambara
- **eu**: Basque
- **be**: Belarusian
- **bn**: Bengali
- **bh**: Bihari languages
- **bi**: Bislama
- **bo**: Tibetan
- **bs**: Bosnian
- **br**: Breton
- **bg**: Bulgarian
- **my**: Burmese
- **ca**: Catalan; Valencian
- **cs**: Czech
- **ch**: Chamorro
- **ce**: Chechen
- **zh**: Chinese
- **cu**: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic
- **cv**: Chuvash
- **kw**: Cornish
- **co**: Corsican
- **cr**: Cree
- **cy**: Welsh
- **cs**: Czech
- **da**: Danish
- **de**: German
- **dv**: Divehi; Dhivehi; Maldivian
- **nl**: Dutch; Flemish
- **dz**: Dzongkha
- **el**: Greek, Modern (1453-)
- **en**: English
- **eo**: Esperanto
- **et**: Estonian
- **eu**: Basque
- **ee**: Ewe
- **fo**: Faroese
- **fa**: Persian
- **fj**: Fijian
- **fi**: Finnish
- **fr**: French
- **fy**: Western Frisian
- **ff**: Fulah
- **ka**: Georgian
- **de**: German
- **gd**: Gaelic; Scottish Gaelic
- **ga**: Irish
- **gl**: Galician
- **gv**: Manx
- **el**: Greek, Modern (1453-)
- **gn**: Guarani
- **gu**: Gujarati
- **ht**: Haitian; Haitian Creole
- **ha**: Hausa
- **ho**: Hiri Motu
- **hr**: Croatian
- **hu**: Hungarian
- **hy**: Armenian
- **ig**: Igbo
- **is**: Icelandic
- **io**: Ido
- **ii**: Sichuan Yi; Nuosu
- **iu**: Inuktitut
- **ie**: Interlingue; Occidental
- **ia**: Interlingua (International Auxiliary Language Association)
- **id**: Indonesian
- **ik**: Inupiaq
- **is**: Icelandic
- **it**: Italian
- **jv**: Javanese
- **ja**: Japanese
- **kl**: Kalaallisut; Greenlandic
- **kn**: Kannada
- **ks**: Kashmiri
- **ka**: Georgian
- **kr**: Kanuri
- **kk**: Kazakh
- **km**: Central Khmer
- **ki**: Kikuyu; Gikuyu
- **rw**: Kinyarwanda
- **ky**: Kirghiz; Kyrgyz
- **kv**: Komi
- **kg**: Kongo
- **ko**: Korean
- **kj**: Kuanyama; Kwanyama
- **ku**: Kurdish
- **lo**: Lao
- **la**: Latin
- **lv**: Latvian
- **li**: Limburgan; Limburger; limburgish
- **ln**: Lingala
- **lt**: Lithuanian
- **lb**: Luxembourgish; Letzeburgesch
- **lu**: Luba-Katanga
- **lg**: Ganda
- **mk**: Macedonian
- **mh**: Marshallese
- **ml**: Malayalam
- **mi**: Maori
- **mr**: Marathi
- **ms**: Malay
- **mk**: Macedonian
- **mg**: Malagasy
- **mt**: Maltese
- **mn**: Mongolian
- **mi**: Maori
- **ms**: Malay
- **my**: Burmese
- **na**: Nauru
- **nv**: Navajo; Navaho
- **nr**: Ndebele, South; South Ndebele
- **nd**: Ndebele, North; North Ndebele
- **ng**: Ndonga
- **ne**: Nepali
- **nl**: Dutch; Flemish
- **nn**: Norwegian Nynorsk; Nynorsk, Norwegian
- **nb**: Bokmål, Norwegian; Norwegian Bokmål
- **no**: Norwegian
- **ny**: Chichewa; Chewa; Nyanja
- **oc**: Occitan (post 1500)
- **oj**: Ojibwa
- **or**: Oriya
- **om**: Oromo
- **os**: Ossetian; Ossetic
- **pa**: Panjabi; Punjabi
- **fa**: Persian
- **pi**: Pali
- **pl**: Polish
- **pt**: Portuguese
- **ps**: Pushto; Pashto
- **qu**: Quechua
- **rm**: Romansh
- **ro**: Romanian; Moldavian; Moldovan
- **rn**: Rundi
- **ru**: Russian
- **sg**: Sango
- **sa**: Sanskrit
- **si**: Sinhala; Sinhalese
- **sk**: Slovak
- **sl**: Slovenian
- **se**: Northern Sami
- **sm**: Samoan
- **sn**: Shona
- **sd**: Sindhi
- **so**: Somali
- **st**: Sotho, Southern
- **es**: Spanish; Castilian
- **sq**: Albanian
- **sc**: Sardinian
- **sr**: Serbian
- **ss**: Swati
- **su**: Sundanese
- **sw**: Swahili
- **sv**: Swedish
- **ty**: Tahitian
- **ta**: Tamil
- **tt**: Tatar
- **te**: Telugu
- **tg**: Tajik
- **tl**: Tagalog
- **th**: Thai
- **bo**: Tibetan
- **ti**: Tigrinya
- **to**: Tonga (Tonga Islands)
- **tn**: Tswana
- **ts**: Tsonga
- **tk**: Turkmen
- **tr**: Turkish
- **tw**: Twi
- **ug**: Uighur; Uyghur
- **uk**: Ukrainian
- **ur**: Urdu
- **uz**: Uzbek
- **ve**: Venda
- **vi**: Vietnamese
- **vo**: Volapük
- **cy**: Welsh
- **wa**: Walloon
- **wo**: Wolof
- **xh**: Xhosa
- **yi**: Yiddish
- **yo**: Yoruba
- **za**: Zhuang; Chuang
- **zh**: Chinese
- **zu**: Zulu | True | False | ["LanguageEnum['aa','ab','ae','af','ak','am','an','ar','as','av','ay','az','ba','be','bg','bh','bi','bm','bn','bo','br','bs','ca','ce','ch','co','cr','cs','cu','cv','cy','da','de','dv','dz','ee','el','en','eo','es','et','eu','fa','ff','fi','fj','fo','fr','fy','ga','gd','gl','gn','gu','gv','ha','he','hi','ho','hr','ht','hu','hy','hz','ia','id','ie','ig','ii','ik','io','is','it','iu','ja','jv','ka','kg','ki','kj','kk','kl','km','kn','ko','kr','ks','ku','kv','kw','ky','la','lb','lg','li','ln','lo','lt','lu','lv','mg','mh','mi','mk','ml','mn','mr','ms','mt','my','na','nb','nd','ne','ng','nl','nn','no','nr','nv','ny','oc','oj','om','or','os','pa','pi','pl','ps','pt','qu','rm','rn','ro','ru','rw','sa','sc','sd','se','sg','si','sk','sl','sm','sn','so','sq','sr','ss','st','su','sv','sw','ta','te','tg','th','ti','tk','tl','tn','to','tr','ts','tt','tw','ty','ug','uk','ur','uz','ve','vi','vo','wa','wo','xh','yi','yo','za','zh','zu']"] | +| Language | https://www.iso.org/iso-639-language-code
- **aa**: Afar
- **ab**: Abkhazian
- **af**: Afrikaans
- **ak**: Akan
- **sq**: Albanian
- **am**: Amharic
- **ar**: Arabic
- **an**: Aragonese
- **hy**: Armenian
- **as**: Assamese
- **av**: Avaric
- **ae**: Avestan
- **ay**: Aymara
- **az**: Azerbaijani
- **ba**: Bashkir
- **bm**: Bambara
- **eu**: Basque
- **be**: Belarusian
- **bn**: Bengali
- **bh**: Bihari languages
- **bi**: Bislama
- **bo**: Tibetan
- **bs**: Bosnian
- **br**: Breton
- **bg**: Bulgarian
- **my**: Burmese
- **ca**: Catalan; Valencian
- **cs**: Czech
- **ch**: Chamorro
- **ce**: Chechen
- **zh**: Chinese
- **cu**: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic
- **cv**: Chuvash
- **kw**: Cornish
- **co**: Corsican
- **cr**: Cree
- **cy**: Welsh
- **cs**: Czech
- **da**: Danish
- **de**: German
- **dv**: Divehi; Dhivehi; Maldivian
- **nl**: Dutch; Flemish
- **dz**: Dzongkha
- **el**: Greek, Modern (1453-)
- **en**: English
- **eo**: Esperanto
- **et**: Estonian
- **eu**: Basque
- **ee**: Ewe
- **fo**: Faroese
- **fa**: Persian
- **fj**: Fijian
- **fi**: Finnish
- **fr**: French
- **fy**: Western Frisian
- **ff**: Fulah
- **ka**: Georgian
- **de**: German
- **gd**: Gaelic; Scottish Gaelic
- **ga**: Irish
- **gl**: Galician
- **gv**: Manx
- **el**: Greek, Modern (1453-)
- **gn**: Guarani
- **gu**: Gujarati
- **ht**: Haitian; Haitian Creole
- **ha**: Hausa
- **ho**: Hiri Motu
- **hr**: Croatian
- **hu**: Hungarian
- **hy**: Armenian
- **ig**: Igbo
- **is**: Icelandic
- **io**: Ido
- **ii**: Sichuan Yi; Nuosu
- **iu**: Inuktitut
- **ie**: Interlingue; Occidental
- **ia**: Interlingua (International Auxiliary Language Association)
- **id**: Indonesian
- **ik**: Inupiaq
- **is**: Icelandic
- **it**: Italian
- **jv**: Javanese
- **ja**: Japanese
- **kl**: Kalaallisut; Greenlandic
- **kn**: Kannada
- **ks**: Kashmiri
- **ka**: Georgian
- **kr**: Kanuri
- **kk**: Kazakh
- **km**: Central Khmer
- **ki**: Kikuyu; Gikuyu
- **rw**: Kinyarwanda
- **ky**: Kirghiz; Kyrgyz
- **kv**: Komi
- **kg**: Kongo
- **ko**: Korean
- **kj**: Kuanyama; Kwanyama
- **ku**: Kurdish
- **lo**: Lao
- **la**: Latin
- **lv**: Latvian
- **li**: Limburgan; Limburger; limburgish
- **ln**: Lingala
- **lt**: Lithuanian
- **lb**: Luxembourgish; Letzeburgesch
- **lu**: Luba-Katanga
- **lg**: Ganda
- **mk**: Macedonian
- **mh**: Marshallese
- **ml**: Malayalam
- **mi**: Maori
- **mr**: Marathi
- **ms**: Malay
- **mk**: Macedonian
- **mg**: Malagasy
- **mt**: Maltese
- **mn**: Mongolian
- **mi**: Maori
- **ms**: Malay
- **my**: Burmese
- **na**: Nauru
- **nv**: Navajo; Navaho
- **nr**: Ndebele, South; South Ndebele
- **nd**: Ndebele, North; North Ndebele
- **ng**: Ndonga
- **ne**: Nepali
- **nl**: Dutch; Flemish
- **nn**: Norwegian Nynorsk; Nynorsk, Norwegian
- **nb**: Bokmål, Norwegian; Norwegian Bokmål
- **no**: Norwegian
- **ny**: Chichewa; Chewa; Nyanja
- **oc**: Occitan (post 1500)
- **oj**: Ojibwa
- **or**: Oriya
- **om**: Oromo
- **os**: Ossetian; Ossetic
- **pa**: Panjabi; Punjabi
- **fa**: Persian
- **pi**: Pali
- **pl**: Polish
- **pt**: Portuguese
- **ps**: Pushto; Pashto
- **qu**: Quechua
- **rm**: Romansh
- **ro**: Romanian; Moldavian; Moldovan
- **rn**: Rundi
- **ru**: Russian
- **sg**: Sango
- **sa**: Sanskrit
- **si**: Sinhala; Sinhalese
- **sk**: Slovak
- **sl**: Slovenian
- **se**: Northern Sami
- **sm**: Samoan
- **sn**: Shona
- **sd**: Sindhi
- **so**: Somali
- **st**: Sotho, Southern
- **es**: Spanish; Castilian
- **sq**: Albanian
- **sc**: Sardinian
- **sr**: Serbian
- **ss**: Swati
- **su**: Sundanese
- **sw**: Swahili
- **sv**: Swedish
- **ty**: Tahitian
- **ta**: Tamil
- **tt**: Tatar
- **te**: Telugu
- **tg**: Tajik
- **tl**: Tagalog
- **th**: Thai
- **bo**: Tibetan
- **ti**: Tigrinya
- **to**: Tonga (Tonga Islands)
- **tn**: Tswana
- **ts**: Tsonga
- **tk**: Turkmen
- **tr**: Turkish
- **tw**: Twi
- **ug**: Uighur; Uyghur
- **uk**: Ukrainian
- **ur**: Urdu
- **uz**: Uzbek
- **ve**: Venda
- **vi**: Vietnamese
- **vo**: Volapük
- **cy**: Welsh
- **wa**: Walloon
- **wo**: Wolof
- **xh**: Xhosa
- **yi**: Yiddish
- **yo**: Yoruba
- **za**: Zhuang; Chuang
- **zh**: Chinese
- **zu**: Zulu | True | True | ["LanguageEnum['aa','ab','ae','af','ak','am','an','ar','as','av','ay','az','ba','be','bg','bh','bi','bm','bn','bo','br','bs','ca','ce','ch','co','cr','cs','cu','cv','cy','da','de','dv','dz','ee','el','en','eo','es','et','eu','fa','ff','fi','fj','fo','fr','fy','ga','gd','gl','gn','gu','gv','ha','he','hi','ho','hr','ht','hu','hy','hz','ia','id','ie','ig','ii','ik','io','is','it','iu','ja','jv','ka','kg','ki','kj','kk','kl','km','kn','ko','kr','ks','ku','kv','kw','ky','la','lb','lg','li','ln','lo','lt','lu','lv','mg','mh','mi','mk','ml','mn','mr','ms','mt','my','na','nb','nd','ne','ng','nl','nn','no','nr','nv','ny','oc','oj','om','or','os','pa','pi','pl','ps','pt','qu','rm','rn','ro','ru','rw','sa','sc','sd','se','sg','si','sk','sl','sm','sn','so','sq','sr','ss','st','su','sv','sw','ta','te','tg','th','ti','tk','tl','tn','to','tr','ts','tt','tw','ty','ug','uk','ur','uz','ve','vi','vo','wa','wo','xh','yi','yo','za','zh','zu']"] | @@ -767,7 +767,7 @@ If multiple formats are available please specify. See application, audio, image, | title | guidance | is_list | required | type | |:--------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------| -| Format | - If multiple formats are available, please specify. See application, audio, image, message, model, multipart, text, video, .
- Please **enter one format type at a time** and click **Add New Field** to add further keywords.
- Note: If your file format is not included in the current list of formats, please indicate other.
- **Example**: text/tab-separated-values, application/sql, text/csv, image/diacom-rle | True | False | ["Format[{'minLength': 1, 'type': 'string'}]"] | +| Format | - If multiple formats are available, please specify. See application, audio, image, message, model, multipart, text, video, .
- Please **enter one format type at a time** and click **Add New Field** to add further keywords.
- Note: If your file format is not included in the current list of formats, please indicate other.
- **Example**: text/tab-separated-values, application/sql, text/csv, image/diacom-rle | True | True | ["Format[{'minLength': 1, 'type': 'string'}]"] | Examples: diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index 4ea1506..d56ed37 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -419,7 +419,7 @@ "subItems": [ { "name": "spatial", - "required": true, + "required": false, "title": "Geographic coverage", "description": "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:- For locations in the UK: ONS standards- For locations in other countries: ISO 3166-1 & ISO 3166-2", "guidance": "The geographical area covered by the dataset.- Please provide a valid location.- For locations in the UK, this location should conform to ONS standards.- For locations in other countries we use ISO 3166-1 & ISO 3166-2.", @@ -664,7 +664,7 @@ "subItems": [ { "name": "publishingFrequency", - "required": false, + "required": true, "title": "Publishing frequency", "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", "guidance": "Please indicate the frequency of publishing.- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/. Options:- Static: Dataset published once.- Irregular: Dataset published at uneven intervals.- Continuous: Dataset published without interruption.- Biennial: Dataset published every two years.- Annual: Dataset published occurs once a year.- Biannual: Dataset published twice a year.- Quarterly: Dataset published every three months.- Bimonthly: Dataset published every two months.- Monthly: Dataset published once a month.- Biweekly: Dataset published every two weeks.- Weekly: Dataset published once a week.- Twice weekly: Dataset published twice a week.- Daily: Dataset published once a day.- Other: Dataset published using other interval.", @@ -722,7 +722,7 @@ }, { "name": "timeLag", - "required": false, + "required": true, "title": "Time lag", "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.", "guidance": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.- Less than 1 week: Typical time lag of less than a week.- 1-2 weeks: Typical time-lag of one to two weeks.- 2-4 weeks: Typical time-lag of two to four weeks.- 1-2 months: Typical time-lag of one to two months.- 2-6 months: Typical time-lag of two to six months.- 6 months plus: Typical time-lag of more than six months.- Variable: Variable time-lag.- Not applicable: Not Applicable i.e. static dataset.- Other: Other time-lag.", @@ -908,9 +908,7 @@ "guidance": "A full list of country codes can be found here (alpha-2 column): https://www.iso.org/obp/ui/#search/code/", "examples": null, "type": [ - "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", - "List", - "null" + "Isocountrycode[{'pattern': '^[A-Z]{2}(-[A-Z]{2,3})?$', 'type': 'string'}]" ], "is_list": true, "is_optional": true, @@ -969,7 +967,7 @@ "subItems": [ { "name": "vocabularyEncodingScheme", - "required": false, + "required": true, "title": "Controlled vocabulary", "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", "guidance": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.- Notes: More than one vocabulary may be provided.- Local: Local Coding Standard.- OPCS4: https://www.datadictionary.nhs.uk/web_site_content/supporting_information/clinical_coding/opcs_classification_of_interventions_and_procedures.asp.- READ: https://digital.nhs.uk/services/terminology-and-classifications/read-codes.- SNOMED CT: http://www.snomed.org/.- SNOMED RT: https://confluence.ihtsdotools.org/display/DOCGLOSS/SNOMED+RT.- DM+D: https://digital.nhs.uk/data-and-information/information-standards/information-standards-and-data-collections-including-extractions/publications-and-notifications/standards-and-collections/scci0052-dictionary-of-medicines-and-devices-dm-d.- NHS National Codes: https://www.datadictionary.nhs.uk/.- ODS: https://digital.nhs.uk/services/organisation-data-service.- LOINC: https://loinc.org/.- ICD10: https://www.who.int/classifications/icd/icdonlineversions/en/.- ICD10CM: https://www.cdc.gov/nchs/icd/icd10cm.htm.- ICD10PCS: https://ec.europa.eu/eip/ageing/standards/healthcare/e-health/icd-10-pcs_en.- ICD9CM: https://www.cdc.gov/nchs/icd/icd9cm.htm.- ICD9: https://www.cdc.gov/nchs/icd/icd9.htm.- ICDO3: https://www.who.int/classifications/icd/adaptations/oncology/en/.- AMT: https://www.digitalhealth.gov.au/about-the-agency/tenders-and-offers/community-pharmacy-software-industry-partnership-offer/Webinar%20-%20Australian%20Medicines%20Terminology%20(AMT)%20and%20Implementation%20Options%2001032017.pdf.- APC: https://www.acep.org/administration/reimbursement/reimbursement-faqs/apc-ambulatory-payment-classifications-faq/.- ATC: https://www.whocc.no/atc_ddd_index/.- CIEL: https://github.com/OpenConceptLab/ocl_web/wiki/CIEL.- HPO: https://hpo.jax.org/app/.- CPT4: https://www.cms.gov/Regulations-and-Guidance/Legislation/CLIA/Downloads/SubjecttoCLIA.pdf.- DPD: https://health-products.canada.ca/dpd-bdpp/index-eng.jsp.- DRG: http://www.euro.who.int/__data/assets/pdf_file/0004/162265/e96538.pdf.- HEMONC: https://hemonc.org/wiki/Main_Page.- JMDC: https://www.jmdc.co.jp/en/.- KCD7: https://forums.ohdsi.org/t/adding-kcd7-code-korean-icd-10-to-the-omop-vocabulary/7576.- MULTUM: https://www.cerner.com/solutions/drug-database.- NAACCR: https://www.naaccr.org/.- NDC: https://www.fda.gov/drugs/drug-approvals-and-databases/national-drug-code-directory.- NDFRT <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.- OXMIS: https://oxrisk.com/oxmis/.- RXNORM: https://www.nlm.nih.gov/research/umls/rxnorm/index.html.- RXNORM EXTENSION: https://www.nlm.nih.gov/research/umls/rxnorm/index.html.- SPL: https://www.fda.gov/industry/fda-resources-data-standards/structured-product-labeling-resources.- Other: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.- NHS Scotland National Codes: https://www.ndc.scot.nhs.uk/Data-Dictionary/.- NHS Wales National Codes: http://www.datadictionary.wales.nhs.uk/", @@ -982,7 +980,7 @@ }, { "name": "conformsTo", - "required": false, + "required": true, "title": "Alignment with standardised data models", "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", "guidance": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.- HL7 FHIR: https://www.hl7.org/fhir/.- HL7 V2: https://www.hl7.org/implement/standards/product_section.cfm?section=13.- HL7 CDA: https://www.hl7.org/implement/standards/product_section.cfm?section=10.- HL7 CCOW: https://www.hl7.org/implement/standards/product_section.cfm?section=16.- DICOM: https://www.dicomstandard.org/.- I2B2: https://www.i2b2.org/.- IHE: https://www.ihe.net/resources/profiles/.- OMOP: https://www.ohdsi.org/data-standardization/the-common-data-model/.- openEHR: https://www.openehr.org/.- Sentinel: https://www.sentinelinitiative.org/sentinel/data/distributed-database-common-data-model.- PCORnet: https://pcornet.org/data-driven-common-model/.- CDISC: https://www.cdisc.org/standards/data-exchange/odm.- Local: In-house developed data model.- Other: Other standardised data model.- NHS Data Dictionary: https://www.datadictionary.nhs.uk/.- NHS Scotland Data Dictionary: https://www.ndc.scot.nhs.uk/Data-Dictionary/.- NHS Wales Data Dictionary: https://www.datadictionary.wales.nhs.uk/.", @@ -997,7 +995,7 @@ }, { "name": "language", - "required": false, + "required": true, "title": "Language", "description": "This should list all the languages in which the dataset metadata and underlying data is made available complaint with ISO 639.", "guidance": "https://www.iso.org/iso-639-language-code- aa: Afar- ab: Abkhazian- af: Afrikaans- ak: Akan- sq: Albanian- am: Amharic- ar: Arabic- an: Aragonese- hy: Armenian- as: Assamese- av: Avaric- ae: Avestan- ay: Aymara- az: Azerbaijani- ba: Bashkir- bm: Bambara- eu: Basque- be: Belarusian- bn: Bengali- bh: Bihari languages- bi: Bislama- bo: Tibetan- bs: Bosnian- br: Breton- bg: Bulgarian- my: Burmese- ca: Catalan; Valencian- cs: Czech- ch: Chamorro- ce: Chechen- zh: Chinese- cu: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic- cv: Chuvash- kw: Cornish- co: Corsican- cr: Cree- cy: Welsh- cs: Czech- da: Danish- de: German- dv: Divehi; Dhivehi; Maldivian- nl: Dutch; Flemish- dz: Dzongkha- el: Greek, Modern (1453-)- en: English- eo: Esperanto- et: Estonian- eu: Basque- ee: Ewe- fo: Faroese- fa: Persian- fj: Fijian- fi: Finnish- fr: French- fy: Western Frisian- ff: Fulah- ka: Georgian- de: German- gd: Gaelic; Scottish Gaelic- ga: Irish- gl: Galician- gv: Manx- el: Greek, Modern (1453-)- gn: Guarani- gu: Gujarati- ht: Haitian; Haitian Creole- ha: Hausa- ho: Hiri Motu- hr: Croatian- hu: Hungarian- hy: Armenian- ig: Igbo- is: Icelandic- io: Ido- ii: Sichuan Yi; Nuosu- iu: Inuktitut- ie: Interlingue; Occidental- ia: Interlingua (International Auxiliary Language Association)- id: Indonesian- ik: Inupiaq- is: Icelandic- it: Italian- jv: Javanese- ja: Japanese- kl: Kalaallisut; Greenlandic- kn: Kannada- ks: Kashmiri- ka: Georgian- kr: Kanuri- kk: Kazakh- km: Central Khmer- ki: Kikuyu; Gikuyu- rw: Kinyarwanda- ky: Kirghiz; Kyrgyz- kv: Komi- kg: Kongo- ko: Korean- kj: Kuanyama; Kwanyama- ku: Kurdish- lo: Lao- la: Latin- lv: Latvian- li: Limburgan; Limburger; limburgish- ln: Lingala- lt: Lithuanian- lb: Luxembourgish; Letzeburgesch- lu: Luba-Katanga- lg: Ganda- mk: Macedonian- mh: Marshallese- ml: Malayalam- mi: Maori- mr: Marathi- ms: Malay- mk: Macedonian- mg: Malagasy- mt: Maltese- mn: Mongolian- mi: Maori- ms: Malay- my: Burmese- na: Nauru- nv: Navajo; Navaho- nr: Ndebele, South; South Ndebele- nd: Ndebele, North; North Ndebele- ng: Ndonga- ne: Nepali- nl: Dutch; Flemish- nn: Norwegian Nynorsk; Nynorsk, Norwegian- nb: Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l- no: Norwegian- ny: Chichewa; Chewa; Nyanja- oc: Occitan (post 1500)- oj: Ojibwa- or: Oriya- om: Oromo- os: Ossetian; Ossetic- pa: Panjabi; Punjabi- fa: Persian- pi: Pali- pl: Polish- pt: Portuguese- ps: Pushto; Pashto- qu: Quechua- rm: Romansh- ro: Romanian; Moldavian; Moldovan- rn: Rundi- ru: Russian- sg: Sango- sa: Sanskrit- si: Sinhala; Sinhalese- sk: Slovak- sl: Slovenian- se: Northern Sami- sm: Samoan- sn: Shona- sd: Sindhi- so: Somali- st: Sotho, Southern- es: Spanish; Castilian- sq: Albanian- sc: Sardinian- sr: Serbian- ss: Swati- su: Sundanese- sw: Swahili- sv: Swedish- ty: Tahitian- ta: Tamil- tt: Tatar- te: Telugu- tg: Tajik- tl: Tagalog- th: Thai- bo: Tibetan- ti: Tigrinya- to: Tonga (Tonga Islands)- tn: Tswana- ts: Tsonga- tk: Turkmen- tr: Turkish- tw: Twi- ug: Uighur; Uyghur- uk: Ukrainian- ur: Urdu- uz: Uzbek- ve: Venda- vi: Vietnamese- vo: Volap\u00fck- cy: Welsh- wa: Walloon- wo: Wolof- xh: Xhosa- yi: Yiddish- yo: Yoruba- za: Zhuang; Chuang- zh: Chinese- zu: Zulu", @@ -1010,7 +1008,7 @@ }, { "name": "format", - "required": false, + "required": true, "title": "Format", "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", "guidance": "If multiple formats are available, please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml.- Please enter one format type at a time and click Add New Field to add further keywords.- Note: If your file format is not included in the current list of formats, please indicate other.- Example: text/tab-separated-values, application/sql, text/csv, image/diacom-rle", diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 5ca018f..48f113c 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -349,6 +349,7 @@ "type": "array" } ], + "default": null, "description": "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:- For locations in the UK: ONS standards- For locations in other countries: ISO 3166-1 & ISO 3166-2", "examples": [ "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" @@ -470,9 +471,6 @@ "title": "Gender" } }, - "required": [ - "spatial" - ], "title": "Coverage", "type": "object" }, @@ -1241,7 +1239,6 @@ "additionalProperties": false, "properties": { "vocabularyEncodingScheme": { - "default": null, "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", "guidance": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.- Notes: More than one vocabulary may be provided.- Local: Local Coding Standard.- OPCS4: https://www.datadictionary.nhs.uk/web_site_content/supporting_information/clinical_coding/opcs_classification_of_interventions_and_procedures.asp.- READ: https://digital.nhs.uk/services/terminology-and-classifications/read-codes.- SNOMED CT: http://www.snomed.org/.- SNOMED RT: https://confluence.ihtsdotools.org/display/DOCGLOSS/SNOMED+RT.- DM+D: https://digital.nhs.uk/data-and-information/information-standards/information-standards-and-data-collections-including-extractions/publications-and-notifications/standards-and-collections/scci0052-dictionary-of-medicines-and-devices-dm-d.- NHS National Codes: https://www.datadictionary.nhs.uk/.- ODS: https://digital.nhs.uk/services/organisation-data-service.- LOINC: https://loinc.org/.- ICD10: https://www.who.int/classifications/icd/icdonlineversions/en/.- ICD10CM: https://www.cdc.gov/nchs/icd/icd10cm.htm.- ICD10PCS: https://ec.europa.eu/eip/ageing/standards/healthcare/e-health/icd-10-pcs_en.- ICD9CM: https://www.cdc.gov/nchs/icd/icd9cm.htm.- ICD9: https://www.cdc.gov/nchs/icd/icd9.htm.- ICDO3: https://www.who.int/classifications/icd/adaptations/oncology/en/.- AMT: https://www.digitalhealth.gov.au/about-the-agency/tenders-and-offers/community-pharmacy-software-industry-partnership-offer/Webinar%20-%20Australian%20Medicines%20Terminology%20(AMT)%20and%20Implementation%20Options%2001032017.pdf.- APC: https://www.acep.org/administration/reimbursement/reimbursement-faqs/apc-ambulatory-payment-classifications-faq/.- ATC: https://www.whocc.no/atc_ddd_index/.- CIEL: https://github.com/OpenConceptLab/ocl_web/wiki/CIEL.- HPO: https://hpo.jax.org/app/.- CPT4: https://www.cms.gov/Regulations-and-Guidance/Legislation/CLIA/Downloads/SubjecttoCLIA.pdf.- DPD: https://health-products.canada.ca/dpd-bdpp/index-eng.jsp.- DRG: http://www.euro.who.int/__data/assets/pdf_file/0004/162265/e96538.pdf.- HEMONC: https://hemonc.org/wiki/Main_Page.- JMDC: https://www.jmdc.co.jp/en/.- KCD7: https://forums.ohdsi.org/t/adding-kcd7-code-korean-icd-10-to-the-omop-vocabulary/7576.- MULTUM: https://www.cerner.com/solutions/drug-database.- NAACCR: https://www.naaccr.org/.- NDC: https://www.fda.gov/drugs/drug-approvals-and-databases/national-drug-code-directory.- NDFRT <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.- OXMIS: https://oxrisk.com/oxmis/.- RXNORM: https://www.nlm.nih.gov/research/umls/rxnorm/index.html.- RXNORM EXTENSION: https://www.nlm.nih.gov/research/umls/rxnorm/index.html.- SPL: https://www.fda.gov/industry/fda-resources-data-standards/structured-product-labeling-resources.- Other: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.- NHS Scotland National Codes: https://www.ndc.scot.nhs.uk/Data-Dictionary/.- NHS Wales National Codes: http://www.datadictionary.wales.nhs.uk/", "items": { @@ -1251,7 +1248,6 @@ "type": "array" }, "conformsTo": { - "default": null, "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", "examples": [ "LOCAL,NHS DATA DICTIONARY" @@ -1264,7 +1260,6 @@ "type": "array" }, "language": { - "default": null, "description": "This should list all the languages in which the dataset metadata and underlying data is made available complaint with ISO 639.", "guidance": "https://www.iso.org/iso-639-language-code- aa: Afar- ab: Abkhazian- af: Afrikaans- ak: Akan- sq: Albanian- am: Amharic- ar: Arabic- an: Aragonese- hy: Armenian- as: Assamese- av: Avaric- ae: Avestan- ay: Aymara- az: Azerbaijani- ba: Bashkir- bm: Bambara- eu: Basque- be: Belarusian- bn: Bengali- bh: Bihari languages- bi: Bislama- bo: Tibetan- bs: Bosnian- br: Breton- bg: Bulgarian- my: Burmese- ca: Catalan; Valencian- cs: Czech- ch: Chamorro- ce: Chechen- zh: Chinese- cu: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic- cv: Chuvash- kw: Cornish- co: Corsican- cr: Cree- cy: Welsh- cs: Czech- da: Danish- de: German- dv: Divehi; Dhivehi; Maldivian- nl: Dutch; Flemish- dz: Dzongkha- el: Greek, Modern (1453-)- en: English- eo: Esperanto- et: Estonian- eu: Basque- ee: Ewe- fo: Faroese- fa: Persian- fj: Fijian- fi: Finnish- fr: French- fy: Western Frisian- ff: Fulah- ka: Georgian- de: German- gd: Gaelic; Scottish Gaelic- ga: Irish- gl: Galician- gv: Manx- el: Greek, Modern (1453-)- gn: Guarani- gu: Gujarati- ht: Haitian; Haitian Creole- ha: Hausa- ho: Hiri Motu- hr: Croatian- hu: Hungarian- hy: Armenian- ig: Igbo- is: Icelandic- io: Ido- ii: Sichuan Yi; Nuosu- iu: Inuktitut- ie: Interlingue; Occidental- ia: Interlingua (International Auxiliary Language Association)- id: Indonesian- ik: Inupiaq- is: Icelandic- it: Italian- jv: Javanese- ja: Japanese- kl: Kalaallisut; Greenlandic- kn: Kannada- ks: Kashmiri- ka: Georgian- kr: Kanuri- kk: Kazakh- km: Central Khmer- ki: Kikuyu; Gikuyu- rw: Kinyarwanda- ky: Kirghiz; Kyrgyz- kv: Komi- kg: Kongo- ko: Korean- kj: Kuanyama; Kwanyama- ku: Kurdish- lo: Lao- la: Latin- lv: Latvian- li: Limburgan; Limburger; limburgish- ln: Lingala- lt: Lithuanian- lb: Luxembourgish; Letzeburgesch- lu: Luba-Katanga- lg: Ganda- mk: Macedonian- mh: Marshallese- ml: Malayalam- mi: Maori- mr: Marathi- ms: Malay- mk: Macedonian- mg: Malagasy- mt: Maltese- mn: Mongolian- mi: Maori- ms: Malay- my: Burmese- na: Nauru- nv: Navajo; Navaho- nr: Ndebele, South; South Ndebele- nd: Ndebele, North; North Ndebele- ng: Ndonga- ne: Nepali- nl: Dutch; Flemish- nn: Norwegian Nynorsk; Nynorsk, Norwegian- nb: Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l- no: Norwegian- ny: Chichewa; Chewa; Nyanja- oc: Occitan (post 1500)- oj: Ojibwa- or: Oriya- om: Oromo- os: Ossetian; Ossetic- pa: Panjabi; Punjabi- fa: Persian- pi: Pali- pl: Polish- pt: Portuguese- ps: Pushto; Pashto- qu: Quechua- rm: Romansh- ro: Romanian; Moldavian; Moldovan- rn: Rundi- ru: Russian- sg: Sango- sa: Sanskrit- si: Sinhala; Sinhalese- sk: Slovak- sl: Slovenian- se: Northern Sami- sm: Samoan- sn: Shona- sd: Sindhi- so: Somali- st: Sotho, Southern- es: Spanish; Castilian- sq: Albanian- sc: Sardinian- sr: Serbian- ss: Swati- su: Sundanese- sw: Swahili- sv: Swedish- ty: Tahitian- ta: Tamil- tt: Tatar- te: Telugu- tg: Tajik- tl: Tagalog- th: Thai- bo: Tibetan- ti: Tigrinya- to: Tonga (Tonga Islands)- tn: Tswana- ts: Tsonga- tk: Turkmen- tr: Turkish- tw: Twi- ug: Uighur; Uyghur- uk: Ukrainian- ur: Urdu- uz: Uzbek- ve: Venda- vi: Vietnamese- vo: Volap\u00fck- cy: Welsh- wa: Walloon- wo: Wolof- xh: Xhosa- yi: Yiddish- yo: Yoruba- za: Zhuang; Chuang- zh: Chinese- zu: Zulu", "items": { @@ -1274,7 +1269,6 @@ "type": "array" }, "format": { - "default": null, "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", "examples": [ "text/tab-separated-values", @@ -1290,6 +1284,12 @@ "type": "array" } }, + "required": [ + "vocabularyEncodingScheme", + "conformsTo", + "language", + "format" + ], "title": "FormatAndStandards", "type": "object" }, @@ -2303,7 +2303,6 @@ "$ref": "#/$defs/PeriodicityV2" } ], - "default": null, "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", "guidance": "Please indicate the frequency of publishing.- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/. Options:- Static: Dataset published once.- Irregular: Dataset published at uneven intervals.- Continuous: Dataset published without interruption.- Biennial: Dataset published every two years.- Annual: Dataset published occurs once a year.- Biannual: Dataset published twice a year.- Quarterly: Dataset published every three months.- Bimonthly: Dataset published every two months.- Monthly: Dataset published once a month.- Biweekly: Dataset published every two weeks.- Weekly: Dataset published once a week.- Twice weekly: Dataset published twice a week.- Daily: Dataset published once a day.- Other: Dataset published using other interval.", "title": "Publishing frequency" @@ -2371,12 +2370,15 @@ "$ref": "#/$defs/TimeLagV2" } ], - "default": null, "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.", "guidance": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.- Less than 1 week: Typical time lag of less than a week.- 1-2 weeks: Typical time-lag of one to two weeks.- 2-4 weeks: Typical time-lag of two to four weeks.- 1-2 months: Typical time-lag of one to two months.- 2-6 months: Typical time-lag of two to six months.- 6 months plus: Typical time-lag of more than six months.- Variable: Variable time-lag.- Not applicable: Not Applicable i.e. static dataset.- Other: Other time-lag.", "title": "Time lag" } }, + "required": [ + "publishingFrequency", + "timeLag" + ], "title": "Temporal", "type": "object" }, diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py b/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py index 06c99a3..8324b1e 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py @@ -41,10 +41,10 @@ class Config: json_schema_extra={"guidance": an.materialType.guidance} ) - followup: Optional[Followup] = Field( + followUp: Optional[Followup] = Field( "UNKNOWN", - **an.followup.__dict__, - json_schema_extra={"guidance": an.followup.guidance} + **an.followUp.__dict__, + json_schema_extra={"guidance": an.followUp.guidance} ) pathway: Optional[Description] = Field( diff --git a/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py b/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py index 4ac4cdc..0f3f726 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/FormatAndStandards.py @@ -12,23 +12,23 @@ class Config: extra = "forbid" vocabularyEncodingScheme: List[ControlledVocabularyEnum] = Field( - None, + ..., **an.vocabularyEncodingScheme.__dict__, json_schema_extra={"guidance": an.vocabularyEncodingScheme.guidance} ) conformsTo: List[StandardisedDataModelsEnum] = Field( - None, + ..., **an.conformsTo.__dict__, json_schema_extra={"guidance": an.conformsTo.guidance} ) language: List[LanguageEnum] = Field( - None, + ..., **an.language.__dict__, json_schema_extra={"guidance": an.language.guidance} ) format: List[Format] = Field( - None, **an.format.__dict__, json_schema_extra={"guidance": an.format.guidance} + ..., **an.format.__dict__, json_schema_extra={"guidance": an.format.guidance} ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Temporal.py b/hdr_schemata/models/HDRUK/v3_0_0/Temporal.py index 60f6c3a..0504b7a 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Temporal.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Temporal.py @@ -14,7 +14,7 @@ class Config: extra = "forbid" publishingFrequency: PeriodicityV2 = Field( - None, + ..., **an.publishingFrequency.__dict__, json_schema_extra={"guidance": an.publishingFrequency.guidance} ) @@ -36,5 +36,5 @@ class Config: ) timeLag: TimeLagV2 = Field( - None, **an.timeLag.__dict__, json_schema_extra={"guidance": an.timeLag.guidance} + ..., **an.timeLag.__dict__, json_schema_extra={"guidance": an.timeLag.guidance} ) From 808a30b64c6cb8aec9489d04ad4ce0be4f2eeda7 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Mon, 19 Aug 2024 14:06:19 +0100 Subject: [PATCH 10/23] use same enums in gwdm v2 as hdr v3 --- docs/GWDM/2.0.form.json | 67 ++++++++--------- docs/GWDM/2.0.md | 18 ++--- docs/GWDM/2.0.structure.json | 6 +- hdr_schemata/models/GWDM/2.0/schema.json | 90 +++++++++++------------ hdr_schemata/models/GWDM/v2_0/Access.py | 2 +- hdr_schemata/models/GWDM/v2_0/Temporal.py | 4 +- 6 files changed, 92 insertions(+), 95 deletions(-) diff --git a/docs/GWDM/2.0.form.json b/docs/GWDM/2.0.form.json index 4058d53..fde4d6c 100644 --- a/docs/GWDM/2.0.form.json +++ b/docs/GWDM/2.0.form.json @@ -504,17 +504,15 @@ "types": { "type": "string", "options": [ - "LESS 1 WEEK", - "1-2 WEEKS", - "2-4 WEEKS", - "1-2 MONTHS", - "2-6 MONTHS", - "MORE 6 MONTHS", - "VARIABLE", - "NO TIMELAG", - "NOT APPLICABLE", - "OTHER", - null + "Less than 1 week", + "1-2 weeks", + "2-4 weeks", + "1-2 months", + "2-6 months", + "6 months plus", + "Variable", + "Not applicable", + "Other" ] }, "location": "provenance.temporal.timeLag" @@ -530,20 +528,20 @@ "types": { "type": "string", "options": [ - "STATIC", - "IRREGULAR", - "CONTINUOUS", - "BIENNIAL", - "ANNUAL", - "BIANNUAL", - "QUARTERLY", - "BIMONTHLY", - "MONTHLY", - "BIWEEKLY", - "WEEKLY", - "SEMIWEEKLY", - "DAILY", - "OTHER", + "Static", + "Irregular", + "Continuous", + "Biennial", + "Annual", + "Biannual", + "Quarterly", + "Bimonthly", + "Monthly", + "Biweekly", + "Weekly", + "Twice a week", + "Daily", + "Other", null ] }, @@ -685,16 +683,15 @@ "types": { "type": "string", "options": [ - "LESS 1 WEEK", - "1-2 WEEKS", - "2-4 WEEKS", - "1-2 MONTHS", - "2-6 MONTHS", - "MORE 6 MONTHS", - "VARIABLE", - "NOT APPLICABLE", - "OTHER", - null + "Less than 1 week", + "1-2 weeks", + "2-4 weeks", + "1-2 months", + "2-6 months", + "More than 6 months", + "Variable", + "Not applicable", + "Other" ] }, "location": "accessibility.access.deliveryLeadTime" diff --git a/docs/GWDM/2.0.md b/docs/GWDM/2.0.md index 1f5b2b1..97f2de0 100644 --- a/docs/GWDM/2.0.md +++ b/docs/GWDM/2.0.md @@ -464,9 +464,9 @@ The end of the time period that the dataset provides coverage for. If the datase Please indicate the typical time-lag between an event and the data for that event appearing in the dataset -| title | guidance | is_list | required | type | -|:---------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------| -| Time Lag | Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.
- **Less than 1 week**: Typical time lag of less than a week.
- **1-2 weeks**: Typical time-lag of one to two weeks.
- **2-4 weeks**: Typical time-lag of two to four weeks.
- **1-2 months**: Typical time-lag of one to two months.
- **2-6 months**: Typical time-lag of two to six months.
- **6 months plus**: Typical time-lag of more than six months.
- **Variable**: Variable time-lag.
- **Not applicable**: Not Applicable i.e. static dataset.
- **Other**: Other time-lag. | False | True | ["TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]"] | +| title | guidance | is_list | required | type | +|:---------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.
- **Less than 1 week**: Typical time lag of less than a week.
- **1-2 weeks**: Typical time-lag of one to two weeks.
- **2-4 weeks**: Typical time-lag of two to four weeks.
- **1-2 months**: Typical time-lag of one to two months.
- **2-6 months**: Typical time-lag of two to six months.
- **6 months plus**: Typical time-lag of more than six months.
- **Variable**: Variable time-lag.
- **Not applicable**: Not Applicable i.e. static dataset.
- **Other**: Other time-lag. | False | True | ["TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']"] | @@ -475,9 +475,9 @@ Please indicate the typical time-lag between an event and the data for that even Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/ -| title | guidance | is_list | required | type | -|:------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Periodicity | | False | True | ["Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]"] | +| title | guidance | is_list | required | type | +|:------------|:-----------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Periodicity | | False | True | ["PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]"] | @@ -625,9 +625,9 @@ Please provide link(s) to a webpage detailing the commercial model for processin Please provide an indication of the typical processing times based on the types of requests typically received. -| title | guidance | is_list | required | type | -|:------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------| -| Access Request Duration | - **Less than 1 week**: Access request process typically processed in less than a week.
- **1-2 weeks**: Access request process typically processed in one to two weeks.
- **2-4 weeks**: Access request process typically processed in two to four weeks.
- **1-2 months**: Access request process typically processed in one to two months.
- **2-6 months**: Access request process typically processed in two to six months.
- **More than 6 months**: Access request process typically processed in more than six months.
- **Variable**: Access request lead time is variable.
- **Not applicable**: Access request process duration is not applicable.
- **Other**: If the typical timeframe does not fit into the broad ranges i.e. lightweight application vs linked data application, please choose “Other” and indicate the typical timeframe within the description for the dataset. | False | False | ["DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", 'null'] | +| title | guidance | is_list | required | type | +|:------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Access Request Duration | - **Less than 1 week**: Access request process typically processed in less than a week.
- **1-2 weeks**: Access request process typically processed in one to two weeks.
- **2-4 weeks**: Access request process typically processed in two to four weeks.
- **1-2 months**: Access request process typically processed in one to two months.
- **2-6 months**: Access request process typically processed in two to six months.
- **More than 6 months**: Access request process typically processed in more than six months.
- **Variable**: Access request lead time is variable.
- **Not applicable**: Access request process duration is not applicable.
- **Other**: If the typical timeframe does not fit into the broad ranges i.e. lightweight application vs linked data application, please choose “Other” and indicate the typical timeframe within the description for the dataset. | False | False | ["DeliveryLeadTimeV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']", 'null'] | diff --git a/docs/GWDM/2.0.structure.json b/docs/GWDM/2.0.structure.json index 14c2b76..ab053b1 100644 --- a/docs/GWDM/2.0.structure.json +++ b/docs/GWDM/2.0.structure.json @@ -626,7 +626,7 @@ "guidance": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.- Less than 1 week: Typical time lag of less than a week.- 1-2 weeks: Typical time-lag of one to two weeks.- 2-4 weeks: Typical time-lag of two to four weeks.- 1-2 months: Typical time-lag of one to two months.- 2-6 months: Typical time-lag of two to six months.- 6 months plus: Typical time-lag of more than six months.- Variable: Variable time-lag.- Not applicable: Not Applicable i.e. static dataset.- Other: Other time-lag.", "examples": null, "type": [ - "TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]" + "TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']" ], "is_list": false, "is_optional": false @@ -639,7 +639,7 @@ "guidance": "", "examples": null, "type": [ - "Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]" + "PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]" ], "is_list": false, "is_optional": false @@ -851,7 +851,7 @@ "guidance": "Less than 1 week: Access request process typically processed in less than a week.- 1-2 weeks: Access request process typically processed in one to two weeks.- 2-4 weeks: Access request process typically processed in two to four weeks.- 1-2 months: Access request process typically processed in one to two months.- 2-6 months: Access request process typically processed in two to six months.- More than 6 months: Access request process typically processed in more than six months.- Variable: Access request lead time is variable.- Not applicable: Access request process duration is not applicable.- Other: If the typical timeframe does not fit into the broad ranges i.e. lightweight application vs linked data application, please choose \u201cOther\u201d and indicate the typical timeframe within the description for the dataset.", "examples": null, "type": [ - "DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", + "DeliveryLeadTimeV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']", "null" ], "is_list": false, diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index 7221a7a..1cf68f4 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -61,7 +61,7 @@ "deliveryLeadTime": { "anyOf": [ { - "$ref": "#/$defs/DeliveryLeadTime" + "$ref": "#/$defs/DeliveryLeadTimeV2" }, { "type": "null" @@ -583,20 +583,20 @@ ], "title": "DatasetType" }, - "DeliveryLeadTime": { + "DeliveryLeadTimeV2": { "enum": [ - "LESS 1 WEEK", - "1-2 WEEKS", - "2-4 WEEKS", - "1-2 MONTHS", - "2-6 MONTHS", - "MORE 6 MONTHS", - "VARIABLE", - "NOT APPLICABLE", - "OTHER", - null + "Less than 1 week", + "1-2 weeks", + "2-4 weeks", + "1-2 months", + "2-6 months", + "More than 6 months", + "Variable", + "Not applicable", + "Other" ], - "title": "DeliveryLeadTime" + "title": "DeliveryLeadTimeV2", + "type": "string" }, "DemographicFrequency": { "additionalProperties": false, @@ -1277,25 +1277,25 @@ "title": "Origin", "type": "object" }, - "Periodicity": { + "PeriodicityV2": { "enum": [ - "STATIC", - "IRREGULAR", - "CONTINUOUS", - "BIENNIAL", - "ANNUAL", - "BIANNUAL", - "QUARTERLY", - "BIMONTHLY", - "MONTHLY", - "BIWEEKLY", - "WEEKLY", - "SEMIWEEKLY", - "DAILY", - "OTHER", + "Static", + "Irregular", + "Continuous", + "Biennial", + "Annual", + "Biannual", + "Quarterly", + "Bimonthly", + "Monthly", + "Biweekly", + "Weekly", + "Twice a week", + "Daily", + "Other", null ], - "title": "Periodicity" + "title": "PeriodicityV2" }, "Pipeline": { "enum": [ @@ -1756,7 +1756,7 @@ "timeLag": { "allOf": [ { - "$ref": "#/$defs/TimeLag" + "$ref": "#/$defs/TimeLagV2" } ], "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", @@ -1765,10 +1765,11 @@ "accrualPeriodicity": { "allOf": [ { - "$ref": "#/$defs/Periodicity" + "$ref": "#/$defs/PeriodicityV2" } ], - "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/" + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "title": "Periodicity" }, "distributionReleaseDate": { "anyOf": [ @@ -1805,21 +1806,20 @@ "title": "Ternary", "type": "string" }, - "TimeLag": { + "TimeLagV2": { "enum": [ - "LESS 1 WEEK", - "1-2 WEEKS", - "2-4 WEEKS", - "1-2 MONTHS", - "2-6 MONTHS", - "MORE 6 MONTHS", - "VARIABLE", - "NO TIMELAG", - "NOT APPLICABLE", - "OTHER", - null + "Less than 1 week", + "1-2 weeks", + "2-4 weeks", + "1-2 months", + "2-6 months", + "6 months plus", + "Variable", + "Not applicable", + "Other" ], - "title": "TimeLag" + "title": "TimeLagV2", + "type": "string" }, "TissueSampleMetadata": { "properties": { diff --git a/hdr_schemata/models/GWDM/v2_0/Access.py b/hdr_schemata/models/GWDM/v2_0/Access.py index dd6f57b..563f64f 100644 --- a/hdr_schemata/models/GWDM/v2_0/Access.py +++ b/hdr_schemata/models/GWDM/v2_0/Access.py @@ -21,7 +21,7 @@ class Config: None, **an.accessRequestCost.__dict__ ) - deliveryLeadTime: Optional[DeliveryLeadTime] = Field( + deliveryLeadTime: Optional[DeliveryLeadTimeV2] = Field( None, **an.deliveryLeadTime.__dict__ ) diff --git a/hdr_schemata/models/GWDM/v2_0/Temporal.py b/hdr_schemata/models/GWDM/v2_0/Temporal.py index 773f999..24cd72e 100644 --- a/hdr_schemata/models/GWDM/v2_0/Temporal.py +++ b/hdr_schemata/models/GWDM/v2_0/Temporal.py @@ -14,9 +14,9 @@ class Config: startDate: Optional[Union[date, datetime]] = Field(None, **an.startDate.__dict__) endDate: Optional[Union[date, datetime]] = Field(None, **an.endDate.__dict__) - timeLag: TimeLag = Field(..., **an.timeLag.__dict__) + timeLag: TimeLagV2 = Field(..., **an.timeLag.__dict__) - accrualPeriodicity: Periodicity = Field(..., **an.accrualPeriodicity.__dict__) + accrualPeriodicity: PeriodicityV2 = Field(..., **an.accrualPeriodicity.__dict__) distributionReleaseDate: Optional[Union[date, datetime]] = Field( None, **an.distributionReleaseDate.__dict__ From 8db84e8db958a51aebafe3284da136a5ad372bc0 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Tue, 20 Aug 2024 09:40:11 +0100 Subject: [PATCH 11/23] update docs on data custodian --- docs/HDRUK/3.0.0.form.json | 2 +- docs/HDRUK/3.0.0.md | 2 +- docs/HDRUK/3.0.0.structure.json | 2 +- hdr_schemata/models/HDRUK/3.0.0/schema.json | 2 +- hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml | 8 ++++---- 5 files changed, 8 insertions(+), 8 deletions(-) diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 58b0070..1273f4a 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -193,7 +193,7 @@ { "required": true, "title": "contact point", - "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers.", + "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata.", "guidance": "", "examples": null, "is_list": false, diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index 0136587..d776a89 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -174,7 +174,7 @@ Please provide a URL that describes the organisation. #### contactPoint -Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. +Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata. | title | guidance | is_list | required | type | |:--------------|:-----------|:----------|:-----------|:-----------------------------------------------------------------------------------------------| diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index d56ed37..b42153b 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -229,7 +229,7 @@ "name": "contactPoint", "required": true, "title": "contact point", - "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers.", + "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata.", "guidance": "", "examples": null, "type": [ diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 48f113c..7d43e57 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -1764,7 +1764,7 @@ "type": "array" } ], - "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers.", + "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata.", "title": "contact point" }, "memberOf": { diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index f9b9f6a..83343bf 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -53,16 +53,16 @@ summary: description: "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata." logo: title: "Organisation Logo" - description: "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg." + description: "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. If an logo is not submitted this will default to the logo for the team submitting the metadata." description: title: "Organisation Description" - description: "Please provide a URL that describes the organisation." + description: "Please provide a URL that describes the organisation. If an description is not provided this will default to the description of the team submitting the metadata." contactPoint: title: "contact point" - description: "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers." + description: "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata." memberOf: title: "Organisation Membership" - description: "Please indicate if the organisation is an Alliance Member or a Hub." + description: "Please indicate if the organisation is an Alliance Member or a Hub. If this field is not submitted this will default to the membership for the team submitting the metadata." alternateIdentifiers: title: "Alternate dataset identifiers" description: "Alternate dataset identifiers or local identifiers." From 89e966878ab3d4b980506bbb7398878c9da7bd54 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Tue, 20 Aug 2024 09:46:57 +0100 Subject: [PATCH 12/23] correct spelling --- hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index 83343bf..e2875b9 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -53,10 +53,10 @@ summary: description: "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata." logo: title: "Organisation Logo" - description: "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. If an logo is not submitted this will default to the logo for the team submitting the metadata." + description: "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. If a logo is not submitted this will default to the logo for the team submitting the metadata." description: title: "Organisation Description" - description: "Please provide a URL that describes the organisation. If an description is not provided this will default to the description of the team submitting the metadata." + description: "Please provide a URL that describes the organisation. If a description is not provided this will default to the description of the team submitting the metadata." contactPoint: title: "contact point" description: "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata." From 1d7180266f12b6689ccbe72471c312679d45755f Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Tue, 20 Aug 2024 10:14:48 +0100 Subject: [PATCH 13/23] update org with annotations --- docs/HDRUK/3.0.0.form.json | 6 ++--- docs/HDRUK/3.0.0.md | 6 ++--- docs/HDRUK/3.0.0.structure.json | 6 ++--- hdr_schemata/models/HDRUK/3.0.0/schema.json | 6 ++--- .../models/HDRUK/v3_0_0/Organisation.py | 23 +++++++++++++++---- 5 files changed, 31 insertions(+), 16 deletions(-) diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 1273f4a..e78cae5 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -161,7 +161,7 @@ { "required": false, "title": "Organisation Logo", - "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. If a logo is not submitted this will default to the logo for the team submitting the metadata.", "guidance": "", "examples": null, "is_list": false, @@ -177,7 +177,7 @@ { "required": false, "title": "Organisation Description", - "description": "Please provide a URL that describes the organisation.", + "description": "Please provide a URL that describes the organisation. If a description is not provided this will default to the description of the team submitting the metadata.", "guidance": "", "examples": null, "is_list": false, @@ -208,7 +208,7 @@ { "required": false, "title": "Organisation Membership", - "description": "Please indicate if the organisation is an Alliance Member or a Hub.'", + "description": "Please indicate if the organisation is an Alliance Member or a Hub. If this field is not submitted this will default to the membership for the team submitting the metadata.", "guidance": "", "examples": null, "is_list": false, diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index d776a89..145f462 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -152,7 +152,7 @@ The organisation responsible for running or supporting the data access request p #### logo -Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. +Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. If a logo is not submitted this will default to the logo for the team submitting the metadata. | title | guidance | is_list | required | type | |:------------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| @@ -163,7 +163,7 @@ Please provide a logo associated with the Gateway Organisation using a valid URL #### description -Please provide a URL that describes the organisation. +Please provide a URL that describes the organisation. If a description is not provided this will default to the description of the team submitting the metadata. | title | guidance | is_list | required | type | |:-------------------------|:-----------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------| @@ -185,7 +185,7 @@ Organisation contact point(s) which will be used for receiving queries from HDR, #### memberOf -Please indicate if the organisation is an Alliance Member or a Hub.' +Please indicate if the organisation is an Alliance Member or a Hub. If this field is not submitted this will default to the membership for the team submitting the metadata. | title | guidance | is_list | required | type | |:------------------------|:-----------|:----------|:-----------|:-----------------------------------------------------| diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index b42153b..3c65d0c 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -199,7 +199,7 @@ "name": "logo", "required": false, "title": "Organisation Logo", - "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. If a logo is not submitted this will default to the logo for the team submitting the metadata.", "guidance": "", "examples": null, "type": [ @@ -214,7 +214,7 @@ "name": "description", "required": false, "title": "Organisation Description", - "description": "Please provide a URL that describes the organisation.", + "description": "Please provide a URL that describes the organisation. If a description is not provided this will default to the description of the team submitting the metadata.", "guidance": "", "examples": null, "type": [ @@ -244,7 +244,7 @@ "name": "memberOf", "required": false, "title": "Organisation Membership", - "description": "Please indicate if the organisation is an Alliance Member or a Hub.'", + "description": "Please indicate if the organisation is an Alliance Member or a Hub. If this field is not submitted this will default to the membership for the team submitting the metadata.", "guidance": "", "examples": null, "type": [ diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 7d43e57..5bfe02a 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -1736,7 +1736,7 @@ } ], "default": null, - "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. If a logo is not submitted this will default to the logo for the team submitting the metadata.", "title": "Organisation Logo" }, "description": { @@ -1749,7 +1749,7 @@ } ], "default": null, - "description": "Please provide a URL that describes the organisation.", + "description": "Please provide a URL that describes the organisation. If a description is not provided this will default to the description of the team submitting the metadata.", "title": "Organisation Description" }, "contactPoint": { @@ -1777,7 +1777,7 @@ } ], "default": null, - "description": "Please indicate if the organisation is an Alliance Member or a Hub.'", + "description": "Please indicate if the organisation is an Alliance Member or a Hub. If this field is not submitted this will default to the membership for the team submitting the metadata.", "title": "Organisation Membership" } }, diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py index 691dc7d..a000066 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py @@ -1,6 +1,5 @@ -from hdr_schemata.models.HDRUK.v2_1_2.Organisation import Organisation as BaseOrganisation from typing import Optional, Union, List -from pydantic import Field, constr +from pydantic import Field, constr, BaseModel from hdr_schemata.definitions.HDRUK import * @@ -8,8 +7,9 @@ an = annotations.summary.organisation - -class Organisation(BaseOrganisation): +class Organisation(BaseModel): + class Config: + extra = "forbid" identifier: Union[constr(min_length=2, max_length=50), int] = Field( ..., **an.identifier.__dict__, json_schema_extra={"guidance": an.identifier.guidance} @@ -21,7 +21,22 @@ class Organisation(BaseOrganisation): json_schema_extra={"guidance": an.name.guidance} ) + logo: Optional[Url] = Field( + None, + **an.logo.__dict__, + ) + + description: Optional[Description] = Field( + None, + **an.description.__dict__, + ) + contactPoint: Union[EmailAddress, List[EmailAddress]] = Field( ..., **an.contactPoint.__dict__ ) + + memberOf: Optional[MemberOf] = Field( + None, + **an.memberOf.__dict__, + ) From 92ed19730fba4d64e8564469ecff68910d0f8a89 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Tue, 20 Aug 2024 14:12:55 +0100 Subject: [PATCH 14/23] update capitalisations --- docs/GWDM/2.0.form.json | 14 +++--- docs/GWDM/2.0.md | 6 +-- docs/GWDM/2.0.structure.json | 2 +- docs/HDRUK/3.0.0.form.json | 38 +++++++------- docs/HDRUK/3.0.0.md | 18 +++---- docs/HDRUK/3.0.0.structure.json | 6 +-- hdr_schemata/definitions/HDRUK/DatasetType.py | 18 +++---- hdr_schemata/definitions/HDRUK/Followup.py | 9 ++++ hdr_schemata/definitions/HDRUK/MemberOf.py | 5 ++ hdr_schemata/definitions/HDRUK/__init__.py | 4 +- hdr_schemata/models/GWDM/2.0/schema.json | 20 ++++---- hdr_schemata/models/GWDM/v2_0/Coverage.py | 2 +- hdr_schemata/models/HDRUK/3.0.0/schema.json | 50 +++++++++---------- hdr_schemata/models/HDRUK/v3_0_0/Coverage.py | 2 +- .../models/HDRUK/v3_0_0/Organisation.py | 2 +- 15 files changed, 105 insertions(+), 91 deletions(-) diff --git a/docs/GWDM/2.0.form.json b/docs/GWDM/2.0.form.json index fde4d6c..d36c02b 100644 --- a/docs/GWDM/2.0.form.json +++ b/docs/GWDM/2.0.form.json @@ -361,13 +361,13 @@ "types": { "type": "string", "options": [ - "0 - 6 MONTHS", - "6 - 12 MONTHS", - "1 - 10 YEARS", - "> 10 YEARS", - "UNKNOWN", - "CONTINUOUS", - "OTHER", + "0 - 6 Months", + "6 - 12 Months", + "1 - 10 Years", + "> 10 Years", + "Unknown", + "Continuous", + "Other", null ] }, diff --git a/docs/GWDM/2.0.md b/docs/GWDM/2.0.md index 97f2de0..922cc8b 100644 --- a/docs/GWDM/2.0.md +++ b/docs/GWDM/2.0.md @@ -334,9 +334,9 @@ Please indicate if the dataset is representative of the patient pathway and any If known, what is the typical time span that a patient appears in the dataset (follow up period) -| title | guidance | is_list | required | type | -|:---------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------| -| Followup | If known, please indicate the typical time span that a patient appears in the dataset (follow up period).
-**0 - 6 MONTHS**: Data typically available for a patient over a 0-6 month period.
-**6 - 12 MONTHS**: Data typically available for a patient over a 6-12 month period.
-**1 - 10 YEARS**: Data typically available for a patient over a 1-10 year period.
-**> 10 YEARS**: Data typically available for a patient for over a 10 year period.
-**CONTINUOUS**: Data for patients is being regularly added to and updated.
-**UNKNOWN**: Timespan is Unknown.
-**OTHER**: Data available for a patient over another time period. | False | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | +| title | guidance | is_list | required | type | +|:---------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------| +| Followup | If known, please indicate the typical time span that a patient appears in the dataset (follow up period).
-**0 - 6 MONTHS**: Data typically available for a patient over a 0-6 month period.
-**6 - 12 MONTHS**: Data typically available for a patient over a 6-12 month period.
-**1 - 10 YEARS**: Data typically available for a patient over a 1-10 year period.
-**> 10 YEARS**: Data typically available for a patient for over a 10 year period.
-**CONTINUOUS**: Data for patients is being regularly added to and updated.
-**UNKNOWN**: Timespan is Unknown.
-**OTHER**: Data available for a patient over another time period. | False | False | ["FollowupV2['0 - 6 Months','6 - 12 Months','1 - 10 Years','> 10 Years','Unknown','Continuous','Other',null]", 'null'] | diff --git a/docs/GWDM/2.0.structure.json b/docs/GWDM/2.0.structure.json index ab053b1..138a89b 100644 --- a/docs/GWDM/2.0.structure.json +++ b/docs/GWDM/2.0.structure.json @@ -444,7 +444,7 @@ "guidance": "If known, please indicate the typical time span that a patient appears in the dataset (follow up period).-0 - 6 MONTHS: Data typically available for a patient over a 0-6 month period.-6 - 12 MONTHS: Data typically available for a patient over a 6-12 month period.-1 - 10 YEARS: Data typically available for a patient over a 1-10 year period.-> 10 YEARS: Data typically available for a patient for over a 10 year period.-CONTINUOUS: Data for patients is being regularly added to and updated.-UNKNOWN: Timespan is Unknown.-OTHER: Data available for a patient over another time period.", "examples": null, "type": [ - "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "FollowupV2['0 - 6 Months','6 - 12 Months','1 - 10 Years','> 10 Years','Unknown','Continuous','Other',null]", "null" ], "is_list": false, diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index e78cae5..0f7e4d1 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -216,9 +216,9 @@ "types": { "type": "string", "options": [ - "HUB", - "ALLIANCE", - "OTHER", + "Hub", + "Alliance", + "Other", "NCS" ] }, @@ -468,13 +468,13 @@ "types": { "type": "string", "options": [ - "0 - 6 MONTHS", - "6 - 12 MONTHS", - "1 - 10 YEARS", - "> 10 YEARS", - "UNKNOWN", - "CONTINUOUS", - "OTHER", + "0 - 6 Months", + "6 - 12 Months", + "1 - 10 Years", + "> 10 Years", + "Unknown", + "Continuous", + "Other", null ] }, @@ -582,7 +582,7 @@ "Cardiovascular", "Cancer", "Rare diseases", - "Metabolic and Endocrine", + "Metabolic and endocrine", "Neurological", "Reproductive", "Maternity and neonatology", @@ -591,8 +591,8 @@ "Musculoskeletal", "Vision", "Renal and urogenital", - "Oral and Gastrointestinal", - "Cognitive Function", + "Oral and gastrointestinal", + "Cognitive function", "Hearing", "Others", "Vaccines", @@ -619,9 +619,9 @@ "Genomics", "Lipidomics", "Education", - "Crime and Justice", + "Crime and justice", "Ethnicity", - "Housing ", + "Housing", "Labour", "Ageing ", "Economics", @@ -633,12 +633,12 @@ "Finances", "Family circumstance", "Smoking", - "Physical Activity", + "Physical activity", "Dietary habits", "Alcohol", - "Disease Registry (research)", - "National Disease Registries and Audits", - "Births and Deaths", + "Disease registry (research)", + "National disease registries and audits", + "Births and deaths", "Not applicable" ] }, diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index 145f462..b15be0a 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -187,9 +187,9 @@ Organisation contact point(s) which will be used for receiving queries from HDR, Please indicate if the organisation is an Alliance Member or a Hub. If this field is not submitted this will default to the membership for the team submitting the metadata. -| title | guidance | is_list | required | type | -|:------------------------|:-----------|:----------|:-----------|:-----------------------------------------------------| -| Organisation Membership | | False | False | ["MemberOf['HUB','ALLIANCE','OTHER','NCS']", 'null'] | +| title | guidance | is_list | required | type | +|:------------------------|:-----------|:----------|:-----------|:-------------------------------------------------------| +| Organisation Membership | | False | False | ["MemberOfV2['Hub','Alliance','Other','NCS']", 'null'] | @@ -380,9 +380,9 @@ The type of biospecimen saved from a biological entity. If known, what is the typical time span that a patient appears in the dataset (follow up period). In a prospective cohort study, after baseline information is collected, participants are followed “longitudinally” i.e. new information is collected about them for a period of time afterward. This is known as the “follow up period”. What is the typical time span of follow up, e.g. 1 year, 5 years? If there are multiple cohorts in the dataset with varying follow up periods, please provide the longest follow up period. -| title | guidance | is_list | required | type | -|:----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------| -| Follow-up | If known, please indicate the typical time span that a patient appears in the dataset (follow up period).
-**0 - 6 MONTHS**: Data typically available for a patient over a 0-6 month period.
-**6 - 12 MONTHS**: Data typically available for a patient over a 6-12 month period.
-**1 - 10 YEARS**: Data typically available for a patient over a 1-10 year period.
-**> 10 YEARS**: Data typically available for a patient for over a 10 year period.
-**CONTINUOUS**: Data for patients is being regularly added to and updated.
-**UNKNOWN**: Timespan is Unknown.
-**OTHER**: Data available for a patient over another time period. | False | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | +| title | guidance | is_list | required | type | +|:----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------| +| Follow-up | If known, please indicate the typical time span that a patient appears in the dataset (follow up period).
-**0 - 6 MONTHS**: Data typically available for a patient over a 0-6 month period.
-**6 - 12 MONTHS**: Data typically available for a patient over a 6-12 month period.
-**1 - 10 YEARS**: Data typically available for a patient over a 1-10 year period.
-**> 10 YEARS**: Data typically available for a patient for over a 10 year period.
-**CONTINUOUS**: Data for patients is being regularly added to and updated.
-**UNKNOWN**: Timespan is Unknown.
-**OTHER**: Data available for a patient over another time period. | False | False | ["FollowupV2['0 - 6 Months','6 - 12 Months','1 - 10 Years','> 10 Years','Unknown','Continuous','Other',null]", 'null'] | @@ -453,9 +453,9 @@ The topic areas to which the dataset content relates. The sub-type of the dataset content. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType ('proteomics', 'transcriptomics', 'epigenomics', 'metabolomics', 'metagenomics', 'genomics', 'lipidomics') is selected -| title | guidance | is_list | required | type | -|:-----------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Dataset sub-type | Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.
- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.
- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- *Information and communication**: Includes any data related to the study or application of information and communication.
- *Politics**: Includes any data related to political views, activities, voting, etc. | True | False | ["DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']"] | +| title | guidance | is_list | required | type | +|:-----------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset sub-type | Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- *Measurements/Tests**- Includes any data related to laboratory or other diagnostics.
- *Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- *Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- *Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.
- *Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- *Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- *Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- *Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- *Information and communication**: Includes any data related to the study or application of information and communication.
- *Politics**: Includes any data related to political views, activities, voting, etc. | True | False | ["DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and gastrointestinal','Cognitive function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Metagenomics','Genomics','Lipidomics','Education','Crime and justice','Ethnicity','Housing','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical activity','Dietary habits','Alcohol','Disease registry (research)','National disease registries and audits','Births and deaths','Not applicable']"] | diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index 3c65d0c..3ce099d 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -248,7 +248,7 @@ "guidance": "", "examples": null, "type": [ - "MemberOf['HUB','ALLIANCE','OTHER','NCS']", + "MemberOfV2['Hub','Alliance','Other','NCS']", "null" ], "is_list": false, @@ -504,7 +504,7 @@ "guidance": "If known, please indicate the typical time span that a patient appears in the dataset (follow up period).-0 - 6 MONTHS: Data typically available for a patient over a 0-6 month period.-6 - 12 MONTHS: Data typically available for a patient over a 6-12 month period.-1 - 10 YEARS: Data typically available for a patient over a 1-10 year period.-> 10 YEARS: Data typically available for a patient for over a 10 year period.-CONTINUOUS: Data for patients is being regularly added to and updated.-UNKNOWN: Timespan is Unknown.-OTHER: Data available for a patient over another time period.", "examples": null, "type": [ - "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "FollowupV2['0 - 6 Months','6 - 12 Months','1 - 10 Years','> 10 Years','Unknown','Continuous','Other',null]", "null" ], "is_list": false, @@ -602,7 +602,7 @@ "guidance": "Sub-types include those listed below under each data type. Datasets can have more than one sub-type associated.- Health and disease: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.- Treatments/Interventions: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.- Measurements/Tests- Includes any data related to laboratory or other diagnostics.- Imaging types: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.- *Imaging area of the body: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.- Omics: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics. Multiomics is selected on behalf of the submitter if more that one omics datasetSubType is selected.- Socioeconomic: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.- *Lifestyle: Includes any data related to smoking, physical activity, dietary habits or alcohol.- Registry: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.- Environment and energy: Includes any data related to the monitoring or study of environmental or energy factors or events.- *Information and communication: Includes any data related to the study or application of information and communication.- Politics*: Includes any data related to political views, activities, voting, etc.", "examples": null, "type": [ - "DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Metagenomics','Genomics','Lipidomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable']" + "DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and gastrointestinal','Cognitive function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Metagenomics','Genomics','Lipidomics','Education','Crime and justice','Ethnicity','Housing','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical activity','Dietary habits','Alcohol','Disease registry (research)','National disease registries and audits','Births and deaths','Not applicable']" ], "is_list": true, "is_optional": true diff --git a/hdr_schemata/definitions/HDRUK/DatasetType.py b/hdr_schemata/definitions/HDRUK/DatasetType.py index 42e6c2b..b2aa074 100644 --- a/hdr_schemata/definitions/HDRUK/DatasetType.py +++ b/hdr_schemata/definitions/HDRUK/DatasetType.py @@ -26,7 +26,7 @@ class DatasetSubType(Enum): CARDIOVASCULAR = 'Cardiovascular' CANCER = 'Cancer' RARE_DISEASES = 'Rare diseases' - METABOLIC_AND_ENDOCRINE = 'Metabolic and Endocrine' + METABOLIC_AND_ENDOCRINE = 'Metabolic and endocrine' NEUROLOGICAL = 'Neurological' REPRODUCTIVE = 'Reproductive' MATERNITY_AND_NEONATOLOGY = 'Maternity and neonatology' @@ -35,8 +35,8 @@ class DatasetSubType(Enum): MUSCULOSKELETAL = 'Musculoskeletal' VISION = 'Vision' RENAL_AND_UROGENITAL = 'Renal and urogenital' - ORAL_AND_GASTROINTESTINAL = 'Oral and Gastrointestinal' - COGNITIVE_FUNCTION = 'Cognitive Function' + ORAL_AND_GASTROINTESTINAL = 'Oral and gastrointestinal' + COGNITIVE_FUNCTION = 'Cognitive function' HEARING = 'Hearing' OTHERS = 'Others' VACCINES = 'Vaccines' @@ -67,9 +67,9 @@ class DatasetSubType(Enum): LIPIDOMICS = 'Lipidomics' # OTHERS = 'Others' EDUCATION = 'Education' - CRIME_AND_JUSTICE = 'Crime and Justice' + CRIME_AND_JUSTICE = 'Crime and justice' ETHNICITY = 'Ethnicity' - HOUSING_ = 'Housing ' + HOUSING_ = 'Housing' LABOUR = 'Labour' AGEING_ = 'Ageing ' ECONOMICS = 'Economics' @@ -82,12 +82,12 @@ class DatasetSubType(Enum): FAMILY_CIRCUMSTANCE = 'Family circumstance' # OTHERS = 'Others' SMOKING = 'Smoking' - PHYSICAL_ACTIVITY = 'Physical Activity' + PHYSICAL_ACTIVITY = 'Physical activity' DIETARY_HABITS = 'Dietary habits' ALCOHOL = 'Alcohol' # OTHERS = 'Others' - DISEASE_REGISTRY_RESEARCH = 'Disease Registry (research)' - NATIONAL_DISEASE_REGISTRIES_AND_AUDITS = 'National Disease Registries and Audits' - BIRTHS_AND_DEATHS = 'Births and Deaths' + DISEASE_REGISTRY_RESEARCH = 'Disease registry (research)' + NATIONAL_DISEASE_REGISTRIES_AND_AUDITS = 'National disease registries and audits' + BIRTHS_AND_DEATHS = 'Births and deaths' # OTHERS = 'Others' NOT_APPLICABLE = 'Not applicable' diff --git a/hdr_schemata/definitions/HDRUK/Followup.py b/hdr_schemata/definitions/HDRUK/Followup.py index 8f1ed7e..222b03f 100644 --- a/hdr_schemata/definitions/HDRUK/Followup.py +++ b/hdr_schemata/definitions/HDRUK/Followup.py @@ -10,4 +10,13 @@ class Followup(Enum): OTHER = 'OTHER' NoneType_None = None +class FollowupV2(Enum): + field_0___6_MONTHS = '0 - 6 Months' + field_6___12_MONTHS = '6 - 12 Months' + field_1___10_YEARS = '1 - 10 Years' + field__10_YEARS = '> 10 Years' + UNKNOWN = 'Unknown' + CONTINUOUS = 'Continuous' + OTHER = 'Other' + NoneType_None = None diff --git a/hdr_schemata/definitions/HDRUK/MemberOf.py b/hdr_schemata/definitions/HDRUK/MemberOf.py index 9d3d95e..745cf43 100644 --- a/hdr_schemata/definitions/HDRUK/MemberOf.py +++ b/hdr_schemata/definitions/HDRUK/MemberOf.py @@ -9,4 +9,9 @@ class MemberOf(Enum): OTHER = 'OTHER' NCS = 'NCS' +class MemberOfV2(Enum): + HUB = 'Hub' + ALLIANCE = 'Alliance' + OTHER = 'Other' + NCS = 'NCS' diff --git a/hdr_schemata/definitions/HDRUK/__init__.py b/hdr_schemata/definitions/HDRUK/__init__.py index 5e76eff..a7cad8a 100644 --- a/hdr_schemata/definitions/HDRUK/__init__.py +++ b/hdr_schemata/definitions/HDRUK/__init__.py @@ -18,7 +18,7 @@ from .EthnicityEnum import EthnicityEnum from .EmailAddress import EmailAddress from .EndDateEnum import EndDateEnum -from .Followup import Followup +from .Followup import Followup, FollowupV2 from .Format import Format from .GenderEnum import GenderEnum from .ICD_0_3 import ICD_0_3 @@ -30,7 +30,7 @@ from .LongDescription import LongDescription from .MaterialTypeCategories import MaterialTypeCategories, MaterialTypeCategoriesV2 from .MeasuredProperty import MeasuredProperty -from .MemberOf import MemberOf +from .MemberOf import MemberOf, MemberOfV2 from .Name import Name from .OneHundredFiftyCharacters import OneHundredFiftyCharacters from .Periodicity import Periodicity, PeriodicityV2 diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index 1cf68f4..f9e89b6 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -307,7 +307,7 @@ "followUp": { "anyOf": [ { - "$ref": "#/$defs/Followup" + "$ref": "#/$defs/FollowupV2" }, { "type": "null" @@ -770,18 +770,18 @@ "title": "EthnicityEnum", "type": "string" }, - "Followup": { + "FollowupV2": { "enum": [ - "0 - 6 MONTHS", - "6 - 12 MONTHS", - "1 - 10 YEARS", - "> 10 YEARS", - "UNKNOWN", - "CONTINUOUS", - "OTHER", + "0 - 6 Months", + "6 - 12 Months", + "1 - 10 Years", + "> 10 Years", + "Unknown", + "Continuous", + "Other", null ], - "title": "Followup" + "title": "FollowupV2" }, "FormatAndStandards": { "additionalProperties": false, diff --git a/hdr_schemata/models/GWDM/v2_0/Coverage.py b/hdr_schemata/models/GWDM/v2_0/Coverage.py index edddbe8..1c7d06d 100644 --- a/hdr_schemata/models/GWDM/v2_0/Coverage.py +++ b/hdr_schemata/models/GWDM/v2_0/Coverage.py @@ -17,7 +17,7 @@ class Coverage(BaseModel): pathway: Optional[LongDescription] = Field(None, **an.pathway.__dict__) - followUp: Optional[Followup] = Field(None, **an.followUp.__dict__) + followUp: Optional[FollowupV2] = Field(None, **an.followUp.__dict__) typicalAgeRange: Optional[AgeRange] = Field(None, **an.typicalAgeRange.__dict__) diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 5bfe02a..6e62a49 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -428,7 +428,7 @@ "followUp": { "anyOf": [ { - "$ref": "#/$defs/Followup" + "$ref": "#/$defs/FollowupV2" }, { "type": "null" @@ -707,7 +707,7 @@ "Cardiovascular", "Cancer", "Rare diseases", - "Metabolic and Endocrine", + "Metabolic and endocrine", "Neurological", "Reproductive", "Maternity and neonatology", @@ -716,8 +716,8 @@ "Musculoskeletal", "Vision", "Renal and urogenital", - "Oral and Gastrointestinal", - "Cognitive Function", + "Oral and gastrointestinal", + "Cognitive function", "Hearing", "Others", "Vaccines", @@ -744,9 +744,9 @@ "Genomics", "Lipidomics", "Education", - "Crime and Justice", + "Crime and justice", "Ethnicity", - "Housing ", + "Housing", "Labour", "Ageing ", "Economics", @@ -758,12 +758,12 @@ "Finances", "Family circumstance", "Smoking", - "Physical Activity", + "Physical activity", "Dietary habits", "Alcohol", - "Disease Registry (research)", - "National Disease Registries and Audits", - "Births and Deaths", + "Disease registry (research)", + "National disease registries and audits", + "Births and deaths", "Not applicable" ], "title": "DatasetSubType", @@ -1217,18 +1217,18 @@ "title": "EthnicityEnum", "type": "string" }, - "Followup": { + "FollowupV2": { "enum": [ - "0 - 6 MONTHS", - "6 - 12 MONTHS", - "1 - 10 YEARS", - "> 10 YEARS", - "UNKNOWN", - "CONTINUOUS", - "OTHER", + "0 - 6 Months", + "6 - 12 Months", + "1 - 10 Years", + "> 10 Years", + "Unknown", + "Continuous", + "Other", null ], - "title": "Followup" + "title": "FollowupV2" }, "Format": { "minLength": 1, @@ -1573,14 +1573,14 @@ "MeasuredProperty": { "title": "MeasuredProperty" }, - "MemberOf": { + "MemberOfV2": { "enum": [ - "HUB", - "ALLIANCE", - "OTHER", + "Hub", + "Alliance", + "Other", "NCS" ], - "title": "MemberOf", + "title": "MemberOfV2", "type": "string" }, "Name": { @@ -1770,7 +1770,7 @@ "memberOf": { "anyOf": [ { - "$ref": "#/$defs/MemberOf" + "$ref": "#/$defs/MemberOfV2" }, { "type": "null" diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py b/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py index 8324b1e..08c39b4 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py @@ -41,7 +41,7 @@ class Config: json_schema_extra={"guidance": an.materialType.guidance} ) - followUp: Optional[Followup] = Field( + followUp: Optional[FollowupV2] = Field( "UNKNOWN", **an.followUp.__dict__, json_schema_extra={"guidance": an.followUp.guidance} diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py index a000066..97a5882 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Organisation.py @@ -36,7 +36,7 @@ class Config: **an.contactPoint.__dict__ ) - memberOf: Optional[MemberOf] = Field( + memberOf: Optional[MemberOfV2] = Field( None, **an.memberOf.__dict__, ) From 5e72600f1444cfba7b50f8bd39b3f4aae04661b3 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Thu, 22 Aug 2024 13:05:19 +0100 Subject: [PATCH 15/23] update enums keep all values --- docs/HDRUK/3.0.0.form.json | 13 ++++++++++--- docs/HDRUK/3.0.0.md | 12 ++++++------ docs/HDRUK/3.0.0.structure.json | 4 ++-- hdr_schemata/definitions/HDRUK/DataUseLimitation.py | 6 ++++++ .../definitions/HDRUK/DataUseRequirements.py | 7 ++++--- hdr_schemata/models/HDRUK/3.0.0/schema.json | 13 ++++++++++--- 6 files changed, 38 insertions(+), 17 deletions(-) diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 0f7e4d1..82b6b68 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -823,10 +823,16 @@ "type": "string", "options": [ "General research use", + "Commercial research use", "Genetic studies only", "No general methods research", "No restriction", + "Geographical restrictions", + "Institution-specific restrictions", + "Not for profit use", + "Project-specific restrictions", "Research-specific restrictions", + "User-specific restrictions", "Research use only", "No linkage" ] @@ -845,15 +851,16 @@ "type": "string", "options": [ "Collaboration required", + "Project-specific restrictions", "Ethics approval required", - "Geographical restrictions", "Institution-specific restrictions", - "Not for profit use", - "Project-specific restrictions", + "Geographical restrictions", "Publication moratorium", "Publication required", "Return to database or resource", "Time limit on use", + "Disclosure control", + "Not for profit use", "User-specific restriction" ] }, diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index b15be0a..e2dcf91 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -579,9 +579,9 @@ This section includes information about how the data can be used and how it is c Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the Data Use Ontology to include a value for NO LINKAGE. -| title | guidance | is_list | required | type | -|:--------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Data use limitation | Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used.
- **General research use**: This data use limitation indicates that use is allowed for general research use for any research purpose.
- **Genetic studies only**: This data use limitation indicates that use is limited to genetic studies only (i.e., no phenotype-only research).
- **No general methods research**: This data use limitation indicates that use includes methods development research(e.g., development of software or algorithms) only within the bounds of other use limitations.
- **No restriction**: This data use limitation indicates there is no restriction on use.
- **Research-specific restrictions**: This data use limitation indicates that use is limited to studies of a certain research type.
- **Research use only**: This data use limitation indicates that use is limited to research purposes (e.g., does not include its use in clinical care).
- **No linkage**: This data use limitation indicates there is a restriction on linking to any other datasets | True | False | ["DataUseLimitationV2['General research use','Genetic studies only','No general methods research','No restriction','Research-specific restrictions','Research use only','No linkage']"] | +| title | guidance | is_list | required | type | +|:--------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data use limitation | Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used.
- **General research use**: This data use limitation indicates that use is allowed for general research use for any research purpose.
- **Genetic studies only**: This data use limitation indicates that use is limited to genetic studies only (i.e., no phenotype-only research).
- **No general methods research**: This data use limitation indicates that use includes methods development research(e.g., development of software or algorithms) only within the bounds of other use limitations.
- **No restriction**: This data use limitation indicates there is no restriction on use.
- **Research-specific restrictions**: This data use limitation indicates that use is limited to studies of a certain research type.
- **Research use only**: This data use limitation indicates that use is limited to research purposes (e.g., does not include its use in clinical care).
- **No linkage**: This data use limitation indicates there is a restriction on linking to any other datasets | True | False | ["DataUseLimitationV2['General research use','Commercial research use','Genetic studies only','No general methods research','No restriction','Geographical restrictions','Institution-specific restrictions','Not for profit use','Project-specific restrictions','Research-specific restrictions','User-specific restrictions','Research use only','No linkage']"] | @@ -590,9 +590,9 @@ Please provide an indication of consent permissions for datasets and/or material Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information. -| title | guidance | is_list | required | type | -|:----------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| -| Data use requirements | - Please indicate if there are any additional conditions set for use if any, multiple requirements may be provided.
- Please ensure that these restrictions are documented in access rights information.
- **Collaboration required**: This requirement indicates that the requestor must either agree to join a research consortium or collaborate with the primary study investigator(s).
- **Ethics approval required**: This requirement indicates that the requestor must provide documentation of local institutional review board (IRB)/ ethics review board (ERB) approval.
- **Geographical restrictions**: This requirement indicates that use is limited to within a specific geographic region.
- **Institution-specific restrictions**: This requirement indicates that use is limited to use within an approved institution.
- **Not for profit use**: This requirement indicates that use of the data is limited to not-for-profit organizations and not-for-profit use, non-commercial use.
- **Project-specific restrictions**: This requirement indicates that use is limited to use within an approved project.
- **Publication moratorium**: This requirement indicates that requestor agrees not to publish results of studies until a specific date.
- **Publication required**: This requirement indicates that requestor agrees to make results of studies using the data available to the larger scientific community.
- **Return to database or resource**: This requirement indicates that the requestor must return derived/enriched data to the database/resource.
- **Time limit on use**: This requirement indicates that use is approved for a specific number of months.
- **User-specific restriction**: This requirement indicates that use is limited to use by approved users. | True | False | ["DataUseRequirementsV2['Collaboration required','Ethics approval required','Geographical restrictions','Institution-specific restrictions','Not for profit use','Project-specific restrictions','Publication moratorium','Publication required','Return to database or resource','Time limit on use','User-specific restriction']"] | +| title | guidance | is_list | required | type | +|:----------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data use requirements | - Please indicate if there are any additional conditions set for use if any, multiple requirements may be provided.
- Please ensure that these restrictions are documented in access rights information.
- **Collaboration required**: This requirement indicates that the requestor must either agree to join a research consortium or collaborate with the primary study investigator(s).
- **Ethics approval required**: This requirement indicates that the requestor must provide documentation of local institutional review board (IRB)/ ethics review board (ERB) approval.
- **Geographical restrictions**: This requirement indicates that use is limited to within a specific geographic region.
- **Institution-specific restrictions**: This requirement indicates that use is limited to use within an approved institution.
- **Not for profit use**: This requirement indicates that use of the data is limited to not-for-profit organizations and not-for-profit use, non-commercial use.
- **Project-specific restrictions**: This requirement indicates that use is limited to use within an approved project.
- **Publication moratorium**: This requirement indicates that requestor agrees not to publish results of studies until a specific date.
- **Publication required**: This requirement indicates that requestor agrees to make results of studies using the data available to the larger scientific community.
- **Return to database or resource**: This requirement indicates that the requestor must return derived/enriched data to the database/resource.
- **Time limit on use**: This requirement indicates that use is approved for a specific number of months.
- **User-specific restriction**: This requirement indicates that use is limited to use by approved users. | True | False | ["DataUseRequirementsV2['Collaboration required','Project-specific restrictions','Ethics approval required','Institution-specific restrictions','Geographical restrictions','Publication moratorium','Publication required','Return to database or resource','Time limit on use','Disclosure control','Not for profit use','User-specific restriction']"] | diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index 3ce099d..5575803 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -772,7 +772,7 @@ "guidance": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used.- General research use: This data use limitation indicates that use is allowed for general research use for any research purpose.- Genetic studies only: This data use limitation indicates that use is limited to genetic studies only (i.e., no phenotype-only research).- No general methods research: This data use limitation indicates that use includes methods development research(e.g., development of software or algorithms) only within the bounds of other use limitations.- No restriction: This data use limitation indicates there is no restriction on use.- Research-specific restrictions: This data use limitation indicates that use is limited to studies of a certain research type.- Research use only: This data use limitation indicates that use is limited to research purposes (e.g., does not include its use in clinical care).- No linkage: This data use limitation indicates there is a restriction on linking to any other datasets", "examples": null, "type": [ - "DataUseLimitationV2['General research use','Genetic studies only','No general methods research','No restriction','Research-specific restrictions','Research use only','No linkage']" + "DataUseLimitationV2['General research use','Commercial research use','Genetic studies only','No general methods research','No restriction','Geographical restrictions','Institution-specific restrictions','Not for profit use','Project-specific restrictions','Research-specific restrictions','User-specific restrictions','Research use only','No linkage']" ], "is_list": true, "is_optional": true @@ -785,7 +785,7 @@ "guidance": "Please indicate if there are any additional conditions set for use if any, multiple requirements may be provided.- Please ensure that these restrictions are documented in access rights information.- Collaboration required: This requirement indicates that the requestor must either agree to join a research consortium or collaborate with the primary study investigator(s).- Ethics approval required: This requirement indicates that the requestor must provide documentation of local institutional review board (IRB)/ ethics review board (ERB) approval.- Geographical restrictions: This requirement indicates that use is limited to within a specific geographic region.- Institution-specific restrictions: This requirement indicates that use is limited to use within an approved institution.- Not for profit use: This requirement indicates that use of the data is limited to not-for-profit organizations and not-for-profit use, non-commercial use.- Project-specific restrictions: This requirement indicates that use is limited to use within an approved project.- Publication moratorium: This requirement indicates that requestor agrees not to publish results of studies until a specific date.- Publication required: This requirement indicates that requestor agrees to make results of studies using the data available to the larger scientific community.- Return to database or resource: This requirement indicates that the requestor must return derived/enriched data to the database/resource.- Time limit on use: This requirement indicates that use is approved for a specific number of months.- User-specific restriction: This requirement indicates that use is limited to use by approved users.", "examples": null, "type": [ - "DataUseRequirementsV2['Collaboration required','Ethics approval required','Geographical restrictions','Institution-specific restrictions','Not for profit use','Project-specific restrictions','Publication moratorium','Publication required','Return to database or resource','Time limit on use','User-specific restriction']" + "DataUseRequirementsV2['Collaboration required','Project-specific restrictions','Ethics approval required','Institution-specific restrictions','Geographical restrictions','Publication moratorium','Publication required','Return to database or resource','Time limit on use','Disclosure control','Not for profit use','User-specific restriction']" ], "is_list": true, "is_optional": true diff --git a/hdr_schemata/definitions/HDRUK/DataUseLimitation.py b/hdr_schemata/definitions/HDRUK/DataUseLimitation.py index 24a729a..ff61b85 100644 --- a/hdr_schemata/definitions/HDRUK/DataUseLimitation.py +++ b/hdr_schemata/definitions/HDRUK/DataUseLimitation.py @@ -17,9 +17,15 @@ class DataUseLimitation(Enum): class DataUseLimitationV2(Enum): GENERAL_RESEARCH_USE = 'General research use' + COMMERCIAL_RESEARCH_USE = 'Commercial research use' GENETIC_STUDIES_ONLY = 'Genetic studies only' NO_GENERAL_METHODS_RESEARCH = 'No general methods research' NO_RESTRICTION = 'No restriction' + GEOGRAPHICAL_RESTRICTIONS = 'Geographical restrictions' + INSTITUTION_SPECIFIC_RESTRICTIONS = 'Institution-specific restrictions' + NOT_FOR_PROFIT_USE = 'Not for profit use' + PROJECT_SPECIFIC_RESTRICTIONS = 'Project-specific restrictions' RESEARCH_SPECIFIC_RESTRICTIONS = 'Research-specific restrictions' + USER_SPECIFIC_RESTRICTIONS = 'User-specific restrictions' RESEARCH_USE_ONLY = 'Research use only' NO_LINKAGE = 'No linkage' diff --git a/hdr_schemata/definitions/HDRUK/DataUseRequirements.py b/hdr_schemata/definitions/HDRUK/DataUseRequirements.py index cd00430..55277f7 100644 --- a/hdr_schemata/definitions/HDRUK/DataUseRequirements.py +++ b/hdr_schemata/definitions/HDRUK/DataUseRequirements.py @@ -17,13 +17,14 @@ class DataUseRequirements(Enum): class DataUseRequirementsV2(Enum): COLLABORATION_REQUIRED = 'Collaboration required' + PROJECT_SPECIFIC_RESTRICTIONS = 'Project-specific restrictions' ETHICS_APPROVAL_REQUIRED = 'Ethics approval required' - GEOGRAPHICAL_RESTRICTIONS = 'Geographical restrictions' INSTITUTION_SPECIFIC_RESTRICTIONS = 'Institution-specific restrictions' - NOT_FOR_PROFIT_USE = 'Not for profit use' - PROJECT_SPECIFIC_RESTRICTIONS = 'Project-specific restrictions' + GEOGRAPHICAL_RESTRICTIONS = 'Geographical restrictions' PUBLICATION_MORATORIUM = 'Publication moratorium' PUBLICATION_REQUIRED = 'Publication required' RETURN_TO_DATABASE_OR_RESOURCE = 'Return to database or resource' TIME_LIMIT_ON_USE = 'Time limit on use' + DISCLOSURE_CONTROL = 'Disclosure control' + NOT_FOR_PROFIT_USE = 'Not for profit use' USER_SPECIFIC_RESTRICTION = 'User-specific restriction' diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 6e62a49..d858a0e 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -586,10 +586,16 @@ "DataUseLimitationV2": { "enum": [ "General research use", + "Commercial research use", "Genetic studies only", "No general methods research", "No restriction", + "Geographical restrictions", + "Institution-specific restrictions", + "Not for profit use", + "Project-specific restrictions", "Research-specific restrictions", + "User-specific restrictions", "Research use only", "No linkage" ], @@ -599,15 +605,16 @@ "DataUseRequirementsV2": { "enum": [ "Collaboration required", + "Project-specific restrictions", "Ethics approval required", - "Geographical restrictions", "Institution-specific restrictions", - "Not for profit use", - "Project-specific restrictions", + "Geographical restrictions", "Publication moratorium", "Publication required", "Return to database or resource", "Time limit on use", + "Disclosure control", + "Not for profit use", "User-specific restriction" ], "title": "DataUseRequirementsV2", From 78de3445787601df78cd42741713212cc46bf890 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Thu, 22 Aug 2024 15:26:32 +0100 Subject: [PATCH 16/23] update guidance on array fields --- docs/HDRUK/3.0.0.form.json | 6 +- docs/HDRUK/3.0.0.md | 18 +- docs/HDRUK/3.0.0.structure.json | 6 +- .../definitions/HDRUK/CountryCodeEnum.py | 252 ++++++++++++++++++ hdr_schemata/models/HDRUK/3.0.0/schema.json | 6 +- .../HDRUK/v3_0_0/annotations/config.yaml | 6 +- 6 files changed, 273 insertions(+), 21 deletions(-) create mode 100644 hdr_schemata/definitions/HDRUK/CountryCodeEnum.py diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 82b6b68..f0d506f 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -1535,7 +1535,7 @@ "required": false, "title": "Investigations", "description": "Please provide link to any active projects that are using the dataset.", - "guidance": "- Please provide the website address(es) which document information related to active projects utilising the dataset.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, please click on **'+' symbol** to enter each separate website.\\n- **Example**: ", + "guidance": "- Please provide the website address(es) which document information related to active projects utilising the dataset.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, type in each website and press enter to add it to the list.\\n- **Example**: ", "examples": null, "is_list": true, "is_optional": true, @@ -1567,7 +1567,7 @@ "required": false, "title": "Publication about the dataset", "description": "DOIs for publications which describe the dataset.", - "guidance": "- Please provide the DOIs for publications which describe the dataset.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, please click on **'+' symbol** to enter each separate citation.\\n- **Example**: ", + "guidance": "- Please provide the DOIs for publications which describe the dataset.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, type in each citation and press enter to add it to the list.\\n- **Example**: ", "examples": [ "https://doi.org/10.1093/ije/dyab028" ], @@ -1584,7 +1584,7 @@ "required": false, "title": "Publication using the dataset", "description": "DOIs for publications which use the dataset for analysis.", - "guidance": "- Please provide the DOIs for publications which have used the dataset in their analysis.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, please click on **'+' symbol** to enter each separate citation.\\n- **Example**: ", + "guidance": "- Please provide the DOIs for publications which have used the dataset in their analysis.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, type in each citation and press enter to add it to the list.\\n- **Example**: ", "examples": [ "https://doi.org/10.1001/jamapediatrics.2016.3633" ], diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index e2dcf91..d1ffca0 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -960,9 +960,9 @@ None Please provide link to any active projects that are using the dataset. -| title | guidance | is_list | required | type | -|:---------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| Investigations | - Please provide the website address(es) which document information related to active projects utilising the dataset.
- Please split your existing list of citations into separate fields.
- To add multiple entries, please click on **'+' symbol** to enter each separate website.
- **Example**: | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | +| title | guidance | is_list | required | type | +|:---------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------| +| Investigations | - Please provide the website address(es) which document information related to active projects utilising the dataset.
- Please split your existing list of citations into separate fields.
- To add multiple entries, type in each website and press enter to add it to the list.
- **Example**: | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | @@ -982,9 +982,9 @@ Please provide the URL of any analysis tools or models that have been created fo DOIs for publications which describe the dataset. -| title | guidance | is_list | required | type | -|:------------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------| -| Publication about the dataset | - Please provide the DOIs for publications which describe the dataset.
- Please split your existing list of citations into separate fields.
- To add multiple entries, please click on **'+' symbol** to enter each separate citation.
- **Example**: | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]"] | +| title | guidance | is_list | required | type | +|:------------------------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Publication about the dataset | - Please provide the DOIs for publications which describe the dataset.
- Please split your existing list of citations into separate fields.
- To add multiple entries, type in each citation and press enter to add it to the list.
- **Example**: | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]"] | Examples: @@ -995,9 +995,9 @@ Examples: DOIs for publications which use the dataset for analysis. -| title | guidance | is_list | required | type | -|:------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------| -| Publication using the dataset | - Please provide the DOIs for publications which have used the dataset in their analysis.
- Please split your existing list of citations into separate fields.
- To add multiple entries, please click on **'+' symbol** to enter each separate citation.
- **Example**: | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]"] | +| title | guidance | is_list | required | type | +|:------------------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Publication using the dataset | - Please provide the DOIs for publications which have used the dataset in their analysis.
- Please split your existing list of citations into separate fields.
- To add multiple entries, type in each citation and press enter to add it to the list.
- **Example**: | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]"] | Examples: diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index 5575803..c6e79bb 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -1290,7 +1290,7 @@ "required": false, "title": "Investigations", "description": "Please provide link to any active projects that are using the dataset.", - "guidance": "Please provide the website address(es) which document information related to active projects utilising the dataset.- Please split your existing list of citations into separate fields.- To add multiple entries, please click on '+' symbol to enter each separate website.- Example: https://dataloch.org/insights/projects-delivered/data-driven-innovation-multi-morbidity-report-partner-gps", + "guidance": "Please provide the website address(es) which document information related to active projects utilising the dataset.- Please split your existing list of citations into separate fields.- To add multiple entries, type in each website and press enter to add it to the list.- Example: https://dataloch.org/insights/projects-delivered/data-driven-innovation-multi-morbidity-report-partner-gps", "examples": null, "type": [ "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" @@ -1318,7 +1318,7 @@ "required": false, "title": "Publication about the dataset", "description": "DOIs for publications which describe the dataset.", - "guidance": "Please provide the DOIs for publications which describe the dataset.- Please split your existing list of citations into separate fields.- To add multiple entries, please click on '+' symbol to enter each separate citation.- Example: https://doi.org/10.1093/ije/dyab028", + "guidance": "Please provide the DOIs for publications which describe the dataset.- Please split your existing list of citations into separate fields.- To add multiple entries, type in each citation and press enter to add it to the list.- Example: https://doi.org/10.1093/ije/dyab028", "examples": [ "https://doi.org/10.1093/ije/dyab028" ], @@ -1334,7 +1334,7 @@ "required": false, "title": "Publication using the dataset", "description": "DOIs for publications which use the dataset for analysis.", - "guidance": "Please provide the DOIs for publications which have used the dataset in their analysis.- Please split your existing list of citations into separate fields.- To add multiple entries, please click on '+' symbol to enter each separate citation.- Example: https://doi.org/10.1001/jamapediatrics.2016.3633", + "guidance": "Please provide the DOIs for publications which have used the dataset in their analysis.- Please split your existing list of citations into separate fields.- To add multiple entries, type in each citation and press enter to add it to the list.- Example: https://doi.org/10.1001/jamapediatrics.2016.3633", "examples": [ "https://doi.org/10.1001/jamapediatrics.2016.3633" ], diff --git a/hdr_schemata/definitions/HDRUK/CountryCodeEnum.py b/hdr_schemata/definitions/HDRUK/CountryCodeEnum.py new file mode 100644 index 0000000..4fa78ac --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/CountryCodeEnum.py @@ -0,0 +1,252 @@ +from enum import Enum + +class CountryCodeEnum(Enum): + AF = 'AF' + AL = 'AL' + DZ = 'DZ' + AS = 'AS' + AD = 'AD' + AO = 'AO' + AI = 'AI' + AQ = 'AQ' + AG = 'AG' + AR = 'AR' + AM = 'AM' + AW = 'AW' + AU = 'AU' + AT = 'AT' + AZ = 'AZ' + BS = 'BS' + BH = 'BH' + BD = 'BD' + BB = 'BB' + BY = 'BY' + BE = 'BE' + BZ = 'BZ' + BJ = 'BJ' + BM = 'BM' + BT = 'BT' + BO = 'BO' + BQ = 'BQ' + BA = 'BA' + BW = 'BW' + BV = 'BV' + BR = 'BR' + IO = 'IO' + BN = 'BN' + BG = 'BG' + BF = 'BF' + BI = 'BI' + CV = 'CV' + KH = 'KH' + CM = 'CM' + CA = 'CA' + KY = 'KY' + CF = 'CF' + TD = 'TD' + CL = 'CL' + CN = 'CN' + CX = 'CX' + CC = 'CC' + CO = 'CO' + KM = 'KM' + CD = 'CD' + CG = 'CG' + CK = 'CK' + CR = 'CR' + HR = 'HR' + CU = 'CU' + CW = 'CW' + CY = 'CY' + CZ = 'CZ' + CI = 'CI' + DK = 'DK' + DJ = 'DJ' + DM = 'DM' + DO = 'DO' + EC = 'EC' + EG = 'EG' + SV = 'SV' + GQ = 'GQ' + ER = 'ER' + EE = 'EE' + SZ = 'SZ' + ET = 'ET' + FK = 'FK' + FO = 'FO' + FJ = 'FJ' + FI = 'FI' + FR = 'FR' + GF = 'GF' + PF = 'PF' + TF = 'TF' + GA = 'GA' + GM = 'GM' + GE = 'GE' + DE = 'DE' + GH = 'GH' + GI = 'GI' + GR = 'GR' + GL = 'GL' + GD = 'GD' + GP = 'GP' + GU = 'GU' + GT = 'GT' + GG = 'GG' + GN = 'GN' + GW = 'GW' + GY = 'GY' + HT = 'HT' + HM = 'HM' + VA = 'VA' + HN = 'HN' + HK = 'HK' + HU = 'HU' + IS = 'IS' + IN = 'IN' + ID = 'ID' + IR = 'IR' + IQ = 'IQ' + IE = 'IE' + IM = 'IM' + IL = 'IL' + IT = 'IT' + JM = 'JM' + JP = 'JP' + JE = 'JE' + JO = 'JO' + KZ = 'KZ' + KE = 'KE' + KI = 'KI' + KP = 'KP' + KR = 'KR' + KW = 'KW' + KG = 'KG' + LA = 'LA' + LV = 'LV' + LB = 'LB' + LS = 'LS' + LR = 'LR' + LY = 'LY' + LI = 'LI' + LT = 'LT' + LU = 'LU' + MO = 'MO' + MG = 'MG' + MW = 'MW' + MY = 'MY' + MV = 'MV' + ML = 'ML' + MT = 'MT' + MH = 'MH' + MQ = 'MQ' + MR = 'MR' + MU = 'MU' + YT = 'YT' + MX = 'MX' + FM = 'FM' + MD = 'MD' + MC = 'MC' + MN = 'MN' + ME = 'ME' + MS = 'MS' + MA = 'MA' + MZ = 'MZ' + MM = 'MM' + NA = 'NA' + NR = 'NR' + NP = 'NP' + NL = 'NL' + NC = 'NC' + NZ = 'NZ' + NI = 'NI' + NE = 'NE' + NG = 'NG' + NU = 'NU' + NF = 'NF' + MK = 'MK' + MP = 'MP' + NO = 'NO' + OM = 'OM' + PK = 'PK' + PW = 'PW' + PS = 'PS' + PA = 'PA' + PG = 'PG' + PY = 'PY' + PE = 'PE' + PH = 'PH' + PN = 'PN' + PL = 'PL' + PT = 'PT' + PR = 'PR' + QA = 'QA' + RO = 'RO' + RU = 'RU' + RW = 'RW' + RE = 'RE' + BL = 'BL' + SH = 'SH' + KN = 'KN' + LC = 'LC' + MF = 'MF' + PM = 'PM' + VC = 'VC' + WS = 'WS' + SM = 'SM' + ST = 'ST' + SA = 'SA' + SN = 'SN' + RS = 'RS' + SC = 'SC' + SL = 'SL' + SG = 'SG' + SX = 'SX' + SK = 'SK' + SI = 'SI' + SB = 'SB' + SO = 'SO' + ZA = 'ZA' + GS = 'GS' + SS = 'SS' + ES = 'ES' + LK = 'LK' + SD = 'SD' + SR = 'SR' + SJ = 'SJ' + SE = 'SE' + CH = 'CH' + SY = 'SY' + TW = 'TW' + TJ = 'TJ' + TZ = 'TZ' + TH = 'TH' + TL = 'TL' + TG = 'TG' + TK = 'TK' + TO = 'TO' + TT = 'TT' + TN = 'TN' + TM = 'TM' + TC = 'TC' + TV = 'TV' + TR = 'TR' + UG = 'UG' + UA = 'UA' + AE = 'AE' + GB = 'GB' + UM = 'UM' + US = 'US' + UY = 'UY' + UZ = 'UZ' + VU = 'VU' + VE = 'VE' + VN = 'VN' + VG = 'VG' + VI = 'VI' + WF = 'WF' + EH = 'EH' + YE = 'YE' + ZM = 'ZM' + ZW = 'ZW' + AX = 'AX' diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index d858a0e..d77acca 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -1114,7 +1114,7 @@ ], "default": null, "description": "Please provide link to any active projects that are using the dataset.", - "guidance": "Please provide the website address(es) which document information related to active projects utilising the dataset.- Please split your existing list of citations into separate fields.- To add multiple entries, please click on '+' symbol to enter each separate website.- Example: https://dataloch.org/insights/projects-delivered/data-driven-innovation-multi-morbidity-report-partner-gps", + "guidance": "Please provide the website address(es) which document information related to active projects utilising the dataset.- Please split your existing list of citations into separate fields.- To add multiple entries, type in each website and press enter to add it to the list.- Example: https://dataloch.org/insights/projects-delivered/data-driven-innovation-multi-morbidity-report-partner-gps", "title": "Investigations" }, "tools": { @@ -1151,7 +1151,7 @@ "examples": [ "https://doi.org/10.1093/ije/dyab028" ], - "guidance": "Please provide the DOIs for publications which describe the dataset.- Please split your existing list of citations into separate fields.- To add multiple entries, please click on '+' symbol to enter each separate citation.- Example: https://doi.org/10.1093/ije/dyab028", + "guidance": "Please provide the DOIs for publications which describe the dataset.- Please split your existing list of citations into separate fields.- To add multiple entries, type in each citation and press enter to add it to the list.- Example: https://doi.org/10.1093/ije/dyab028", "title": "Publication about the dataset" }, "publicationUsingDataset": { @@ -1171,7 +1171,7 @@ "examples": [ "https://doi.org/10.1001/jamapediatrics.2016.3633" ], - "guidance": "Please provide the DOIs for publications which have used the dataset in their analysis.- Please split your existing list of citations into separate fields.- To add multiple entries, please click on '+' symbol to enter each separate citation.- Example: https://doi.org/10.1001/jamapediatrics.2016.3633", + "guidance": "Please provide the DOIs for publications which have used the dataset in their analysis.- Please split your existing list of citations into separate fields.- To add multiple entries, type in each citation and press enter to add it to the list.- Example: https://doi.org/10.1001/jamapediatrics.2016.3633", "title": "Publication using the dataset" } }, diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index e2875b9..81278a3 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -304,13 +304,13 @@ enrichmentAndLinkage: publicationAboutDataset: - guidance: "- Please provide the DOIs for publications which describe the dataset.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, please click on **'+' symbol** to enter each separate citation.\\n- **Example**: " + guidance: "- Please provide the DOIs for publications which describe the dataset.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, type in each citation and press enter to add it to the list.\\n- **Example**: " title: "Publication about the dataset" examples: - "https://doi.org/10.1093/ije/dyab028" description: "DOIs for publications which describe the dataset." publicationUsingDataset: - guidance: "- Please provide the DOIs for publications which have used the dataset in their analysis.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, please click on **'+' symbol** to enter each separate citation.\\n- **Example**: " + guidance: "- Please provide the DOIs for publications which have used the dataset in their analysis.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, type in each citation and press enter to add it to the list.\\n- **Example**: " title: "Publication using the dataset" examples: - "https://doi.org/10.1001/jamapediatrics.2016.3633" @@ -323,7 +323,7 @@ enrichmentAndLinkage: description: "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/" investigations: - guidance: "- Please provide the website address(es) which document information related to active projects utilising the dataset.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, please click on **'+' symbol** to enter each separate website.\\n- **Example**: " + guidance: "- Please provide the website address(es) which document information related to active projects utilising the dataset.\\n- Please split your existing list of citations into separate fields.\\n- To add multiple entries, type in each website and press enter to add it to the list.\\n- **Example**: " title: "Investigations" description: "Please provide link to any active projects that are using the dataset." documentation: From 6b90df29edb57166f3c6f7e2beab80e844230515 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Thu, 22 Aug 2024 15:38:13 +0100 Subject: [PATCH 17/23] rm gender fields --- docs/GWDM/2.0.form.json | 28 ------- docs/GWDM/2.0.md | 33 +-------- docs/GWDM/2.0.structure.json | 43 +---------- docs/HDRUK/3.0.0.form.json | 46 ------------ docs/HDRUK/3.0.0.md | 44 +---------- docs/HDRUK/3.0.0.structure.json | 56 +------------- .../models/GWDM/v2_0/DemographicFrequency.py | 8 -- .../models/GWDM/v2_0/GenderAssignedAtBirth.py | 11 --- .../models/GWDM/v2_0/annotations/config.yaml | 9 +-- hdr_schemata/models/HDRUK/2.2.0/schema.json | 1 - hdr_schemata/models/HDRUK/3.0.0/schema.json | 74 +------------------ hdr_schemata/models/HDRUK/v3_0_0/Coverage.py | 4 - .../HDRUK/v3_0_0/DemographicFrequency.py | 8 -- .../HDRUK/v3_0_0/GenderAssignedAtBirth.py | 11 --- .../HDRUK/v3_0_0/annotations/config.yaml | 13 +--- 15 files changed, 7 insertions(+), 382 deletions(-) delete mode 100644 hdr_schemata/models/GWDM/v2_0/GenderAssignedAtBirth.py delete mode 100644 hdr_schemata/models/HDRUK/v3_0_0/GenderAssignedAtBirth.py diff --git a/docs/GWDM/2.0.form.json b/docs/GWDM/2.0.form.json index d36c02b..9299032 100644 --- a/docs/GWDM/2.0.form.json +++ b/docs/GWDM/2.0.form.json @@ -1497,34 +1497,6 @@ "types": "int", "location": "demographicFrequency.ethnicity.count" }, - { - "required": true, - "title": "Gender assigned at birth bin", - "description": null, - "guidance": "", - "examples": null, - "is_list": false, - "is_optional": false, - "types": { - "type": "string", - "options": [ - "male", - "female" - ] - }, - "location": "demographicFrequency.genderAssignedAtBirth.bin" - }, - { - "required": true, - "title": "Gender assigned at birth count", - "description": null, - "guidance": "", - "examples": null, - "is_list": false, - "is_optional": false, - "types": "int", - "location": "demographicFrequency.genderAssignedAtBirth.count" - }, { "required": true, "title": "Disease code", diff --git a/docs/GWDM/2.0.md b/docs/GWDM/2.0.md index 922cc8b..c08d6d0 100644 --- a/docs/GWDM/2.0.md +++ b/docs/GWDM/2.0.md @@ -1369,7 +1369,7 @@ Restrictions on the use of the tissue sample ## demographicFrequency -An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes. +An object containing demographic frequency data categorised by age, ethnicity, and disease attributes. @@ -1438,37 +1438,6 @@ None -### genderAssignedAtBirth - -Gender assigned at birth, male or female, and their corresponding counts. - - - - - - -#### bin - -None - -| title | guidance | is_list | required | type | -|:-----------------------------|:-----------|:----------|:-----------|:--------------------------------| -| Gender assigned at birth bin | | False | True | ["GenderEnum['male','female']"] | - - - - -#### count - -None - -| title | guidance | is_list | required | type | -|:-------------------------------|:-----------|:----------|:-----------|:--------| -| Gender assigned at birth count | | False | True | ['int'] | - - - - ### disease Array of diseases and their corresponding counts. diff --git a/docs/GWDM/2.0.structure.json b/docs/GWDM/2.0.structure.json index 138a89b..75da9a4 100644 --- a/docs/GWDM/2.0.structure.json +++ b/docs/GWDM/2.0.structure.json @@ -1866,7 +1866,7 @@ "name": "demographicFrequency", "required": false, "title": "Demographic frequency", - "description": "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes.", + "description": "An object containing demographic frequency data categorised by age, ethnicity, and disease attributes.", "guidance": "", "examples": null, "type": [ @@ -1958,47 +1958,6 @@ } ] }, - { - "name": "genderAssignedAtBirth", - "required": false, - "title": "Gender assigned at birth", - "description": "Gender assigned at birth, male or female, and their corresponding counts.", - "guidance": "", - "examples": null, - "type": [ - "GenderAssignedAtBirth" - ], - "is_list": true, - "is_optional": true, - "subItems": [ - { - "name": "bin", - "required": true, - "title": "Gender assigned at birth bin", - "description": null, - "guidance": "", - "examples": null, - "type": [ - "GenderEnum['male','female']" - ], - "is_list": false, - "is_optional": false - }, - { - "name": "count", - "required": true, - "title": "Gender assigned at birth count", - "description": null, - "guidance": "", - "examples": null, - "type": [ - "int" - ], - "is_list": false, - "is_optional": false - } - ] - }, { "name": "disease", "required": false, diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index f0d506f..d85bff0 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -496,24 +496,6 @@ }, "location": "coverage.pathway" }, - { - "required": false, - "title": "Gender", - "description": "Male, Female, Other", - "guidance": "Options are limited to Male/Female/Other.", - "examples": null, - "is_list": true, - "is_optional": true, - "types": { - "type": "string", - "options": [ - "Male", - "Female", - "Other" - ] - }, - "location": "coverage.gender" - }, { "required": false, "title": "Purpose of dataset collection", @@ -1763,34 +1745,6 @@ "types": "int", "location": "demographicFrequency.ethnicity.count" }, - { - "required": true, - "title": "Gender assigned at birth", - "description": null, - "guidance": "", - "examples": null, - "is_list": false, - "is_optional": false, - "types": { - "type": "string", - "options": [ - "male", - "female" - ] - }, - "location": "demographicFrequency.genderAssignedAtBirth.bin" - }, - { - "required": true, - "title": "Gender assigned at birth count", - "description": null, - "guidance": "", - "examples": null, - "is_list": false, - "is_optional": false, - "types": "int", - "location": "demographicFrequency.genderAssignedAtBirth.count" - }, { "required": true, "title": "Disease code", diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index d1ffca0..4d03a84 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -398,17 +398,6 @@ Please indicate if the dataset is representative of the patient pathway and any -### gender - -Male, Female, Other - -| title | guidance | is_list | required | type | -|:--------|:------------------------------------------|:----------|:-----------|:----------------------------------------| -| Gender | Options are limited to Male/Female/Other. | True | False | ["GenderType['Male','Female','Other']"] | - - - - ## provenance Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness. @@ -1218,7 +1207,7 @@ Links to locations of information and or raw downloads of synthetic data associa ## demographicFrequency -An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes. +An object containing demographic frequency data categorised by age, ethnicity, and disease attributes. @@ -1287,37 +1276,6 @@ None -### genderAssignedAtBirth - -Gender assigned at birth, male or female, and their corresponding counts. - - - - - - -#### bin - -None - -| title | guidance | is_list | required | type | -|:-------------------------|:-----------|:----------|:-----------|:--------------------------------| -| Gender assigned at birth | | False | True | ["GenderEnum['male','female']"] | - - - - -#### count - -None - -| title | guidance | is_list | required | type | -|:-------------------------------|:-----------|:----------|:-----------|:--------| -| Gender assigned at birth count | | False | True | ['int'] | - - - - ### disease Array of diseases and their corresponding counts. diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index c6e79bb..e59d0cc 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -524,19 +524,6 @@ "is_list": false, "is_optional": true, "subItems": [] - }, - { - "name": "gender", - "required": false, - "title": "Gender", - "description": "Male, Female, Other", - "guidance": "Options are limited to Male/Female/Other.", - "examples": null, - "type": [ - "GenderType['Male','Female','Other']" - ], - "is_list": true, - "is_optional": true } ] }, @@ -1636,7 +1623,7 @@ "name": "demographicFrequency", "required": false, "title": "Demographic frequency", - "description": "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes.", + "description": "An object containing demographic frequency data categorised by age, ethnicity, and disease attributes.", "guidance": "", "examples": null, "type": [ @@ -1728,47 +1715,6 @@ } ] }, - { - "name": "genderAssignedAtBirth", - "required": false, - "title": "Gender assigned at birth", - "description": "Gender assigned at birth, male or female, and their corresponding counts.", - "guidance": "", - "examples": null, - "type": [ - "GenderAssignedAtBirth" - ], - "is_list": true, - "is_optional": true, - "subItems": [ - { - "name": "bin", - "required": true, - "title": "Gender assigned at birth", - "description": null, - "guidance": "", - "examples": null, - "type": [ - "GenderEnum['male','female']" - ], - "is_list": false, - "is_optional": false - }, - { - "name": "count", - "required": true, - "title": "Gender assigned at birth count", - "description": null, - "guidance": "", - "examples": null, - "type": [ - "int" - ], - "is_list": false, - "is_optional": false - } - ] - }, { "name": "disease", "required": false, diff --git a/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py b/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py index 173c56d..9efede8 100644 --- a/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py +++ b/hdr_schemata/models/GWDM/v2_0/DemographicFrequency.py @@ -6,7 +6,6 @@ from .Age import Age from .Disease import Disease from .Ethnicity import Ethnicity -from .GenderAssignedAtBirth import GenderAssignedAtBirth an = annotations.demographicFrequency @@ -29,13 +28,6 @@ class Config: # json_schema_extra={"guidance": an.ethnicity.guidance} ) - genderAssignedAtBirth: Optional[List[GenderAssignedAtBirth]] = Field( - None, - title=an.genderAssignedAtBirth.title, - description=an.genderAssignedAtBirth.description, - # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} - ) - disease: Optional[List[Disease]] = Field( None, title=an.disease.title, diff --git a/hdr_schemata/models/GWDM/v2_0/GenderAssignedAtBirth.py b/hdr_schemata/models/GWDM/v2_0/GenderAssignedAtBirth.py deleted file mode 100644 index b8c137e..0000000 --- a/hdr_schemata/models/GWDM/v2_0/GenderAssignedAtBirth.py +++ /dev/null @@ -1,11 +0,0 @@ -from pydantic import BaseModel, Field -from enum import Enum -from hdr_schemata.definitions.HDRUK import * - -from .annotations import annotations - -an = annotations.demographicFrequency.genderAssignedAtBirth - -class GenderAssignedAtBirth(BaseModel): - bin: GenderEnum = Field(..., **an.bin.__dict__) - count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml b/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml index 76f0ac2..1164e81 100644 --- a/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml +++ b/hdr_schemata/models/GWDM/v2_0/annotations/config.yaml @@ -561,7 +561,7 @@ tissuesSampleCollection: description: "Restrictions on the use of the tissue sample" demographicFrequency: title: "Demographic frequency" - description: "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes." + description: "An object containing demographic frequency data categorised by age, ethnicity, and disease attributes." age: title: "Age" description: "Array of age bins and their corresponding counts." @@ -576,13 +576,6 @@ demographicFrequency: title: "Ethnicity bin" count: title: "Ethnicity count" - genderAssignedAtBirth: - title: "Gender assigned at birth" - description: "Gender assigned at birth, male or female, and their corresponding counts." - bin: - title: "Gender assigned at birth bin" - count: - title: "Gender assigned at birth count" disease: title: "Disease" description: "Array of diseases and their corresponding counts." diff --git a/hdr_schemata/models/HDRUK/2.2.0/schema.json b/hdr_schemata/models/HDRUK/2.2.0/schema.json index 5c90046..08b4e5c 100644 --- a/hdr_schemata/models/HDRUK/2.2.0/schema.json +++ b/hdr_schemata/models/HDRUK/2.2.0/schema.json @@ -368,7 +368,6 @@ ], "default": null, "description": "Male, Female, Other", - "guidance": "Options are limited to Male/Female/Other.", "title": "Gender" }, "biologicalsamples": { diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index d77acca..3245b29 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -452,23 +452,6 @@ "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", "guidance": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage.- This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", "title": "Patient pathway description" - }, - "gender": { - "anyOf": [ - { - "items": { - "$ref": "#/$defs/GenderType" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Male, Female, Other", - "guidance": "Options are limited to Male/Female/Other.", - "title": "Gender" } }, "title": "Coverage", @@ -844,22 +827,6 @@ "description": "Array of ethnicity bins and their corresponding counts.", "title": "Ethnicity" }, - "genderAssignedAtBirth": { - "anyOf": [ - { - "items": { - "$ref": "#/$defs/GenderAssignedAtBirth" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Gender assigned at birth, male or female, and their corresponding counts.", - "title": "Gender assigned at birth" - }, "disease": { "anyOf": [ { @@ -1300,45 +1267,6 @@ "title": "FormatAndStandards", "type": "object" }, - "GenderAssignedAtBirth": { - "properties": { - "bin": { - "allOf": [ - { - "$ref": "#/$defs/GenderEnum" - } - ], - "title": "Gender assigned at birth" - }, - "count": { - "title": "Gender assigned at birth count", - "type": "integer" - } - }, - "required": [ - "bin", - "count" - ], - "title": "GenderAssignedAtBirth", - "type": "object" - }, - "GenderEnum": { - "enum": [ - "male", - "female" - ], - "title": "GenderEnum", - "type": "string" - }, - "GenderType": { - "enum": [ - "Male", - "Female", - "Other" - ], - "title": "GenderType", - "type": "string" - }, "Isocountrycode": { "pattern": "^[A-Z]{2}(-[A-Z]{2,3})?$", "title": "Isocountrycode", @@ -2659,7 +2587,7 @@ } ], "default": null, - "description": "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes.", + "description": "An object containing demographic frequency data categorised by age, ethnicity, and disease attributes.", "title": "Demographic frequency" }, "omics": { diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py b/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py index 08c39b4..482669a 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Coverage.py @@ -50,7 +50,3 @@ class Config: pathway: Optional[Description] = Field( None, **an.pathway.__dict__, json_schema_extra={"guidance": an.pathway.guidance} ) - - gender: Optional[List[GenderType]] = Field( - None, **an.gender.__dict__, json_schema_extra={"guidance": an.gender.guidance} - ) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py b/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py index 173c56d..9efede8 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/DemographicFrequency.py @@ -6,7 +6,6 @@ from .Age import Age from .Disease import Disease from .Ethnicity import Ethnicity -from .GenderAssignedAtBirth import GenderAssignedAtBirth an = annotations.demographicFrequency @@ -29,13 +28,6 @@ class Config: # json_schema_extra={"guidance": an.ethnicity.guidance} ) - genderAssignedAtBirth: Optional[List[GenderAssignedAtBirth]] = Field( - None, - title=an.genderAssignedAtBirth.title, - description=an.genderAssignedAtBirth.description, - # json_schema_extra={"guidance": an.genderAssignedAtBirth.guidance} - ) - disease: Optional[List[Disease]] = Field( None, title=an.disease.title, diff --git a/hdr_schemata/models/HDRUK/v3_0_0/GenderAssignedAtBirth.py b/hdr_schemata/models/HDRUK/v3_0_0/GenderAssignedAtBirth.py deleted file mode 100644 index b8c137e..0000000 --- a/hdr_schemata/models/HDRUK/v3_0_0/GenderAssignedAtBirth.py +++ /dev/null @@ -1,11 +0,0 @@ -from pydantic import BaseModel, Field -from enum import Enum -from hdr_schemata.definitions.HDRUK import * - -from .annotations import annotations - -an = annotations.demographicFrequency.genderAssignedAtBirth - -class GenderAssignedAtBirth(BaseModel): - bin: GenderEnum = Field(..., **an.bin.__dict__) - count: int = Field(..., **an.count.__dict__) diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index 81278a3..2cb5f7d 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -354,10 +354,6 @@ coverage: guidance: "- Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage.\\n- This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway." title: "Patient pathway description" description: "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway." - gender: - guidance: "Options are limited to Male/Female/Other." - title: "Gender" - description: "Male, Female, Other" materialType: guidance: "Indicate the specimen type, can be several values from the list below:\\n- Availability of physical samples associated with the dataset.\\n- If samples are available, please indicate the types of samples that are available.\\n- More than one type may be provided.\\n- If samples are not yet available, please provide **“Availability to be confirmed”**.\\n- If samples are not available, then please provide **“Not available”**.\\n- **Not available**: Samples associated with the dataset are not available.\\n- **Bone marrow**: Bone marrow samples associated with the data are available.\\n- **Cancer cell lines**: Cancer cell line samples associated with the data are available.\\n- **CDNA/MRNA**: CDNA/MRNA samples associated with the data are available.\\n- **Core biopsy**: Core biopsy samples associated with the data are available.\\n- **DNA**: DNA samples associated with the data are available.\\n- **Entire body organ**: Entire body organ associated with the data are available.\\n- **Faeces**: Faeces samples associated with the data are available.\\n- **Immortalized cell lines**: Immortalized cell line samples associated with the data are available.\\n- **Isolated pathogen**: Isolated pathogen associated with the data are available.\\n- **MicroRNA**: MicroRNA samples associated with the data are available.\\n- **Peripheral blood cells**: Peripheral blood cell samples associated with the data are available.\\n- **Plasma**: Plasma samples associated with the data are available.\\n- **PM Tissue**: PM Tissue samples associated with the data are available.\\n- **Primary cells**: Primary cell samples associated with the data are available.\\n- **RNA**: RNA samples associated with the data are available.\\n- **Saliva**: Saliva samples associated with the data are available.\\n- **Serum**: Serum samples associated with the data are available.\\n- **Swabs**: Swab samples associated with the data are available.\\n- **Tissue**: Tissue samples associated with the data are available.\\n- **Urine**: Urine samples associated with the data are available.\\n- **Whole blood**: Whole blood samples associated with the data are available.\\n- **Availability to be confirmed**: Availability of samples is currently being confirmed.\\n- **Other**: Other types of sample available." title: "Biological sample availability" @@ -406,7 +402,7 @@ datasetDescriptor: title: "Url of a dataset" demographicFrequency: title: "Demographic frequency" - description: "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes." + description: "An object containing demographic frequency data categorised by age, ethnicity, and disease attributes." age: title: "Age" description: "Array of age bins and their corresponding counts." @@ -421,13 +417,6 @@ demographicFrequency: title: "Ethnicity grouping" count: title: "Ethnicity count" - genderAssignedAtBirth: - title: "Gender assigned at birth" - description: "Gender assigned at birth, male or female, and their corresponding counts." - bin: - title: "Gender assigned at birth" - count: - title: "Gender assigned at birth count" disease: title: "Disease" description: "Array of diseases and their corresponding counts." From 0951389cb4f35a8097fadc8c9a23efbae70dff58 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Thu, 22 Aug 2024 16:27:55 +0100 Subject: [PATCH 18/23] update GWDM schema --- hdr_schemata/models/GWDM/2.0/schema.json | 48 +----------------------- 1 file changed, 1 insertion(+), 47 deletions(-) diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index f9e89b6..5ff150b 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -633,22 +633,6 @@ "description": "Array of ethnicity bins and their corresponding counts.", "title": "Ethnicity" }, - "genderAssignedAtBirth": { - "anyOf": [ - { - "items": { - "$ref": "#/$defs/GenderAssignedAtBirth" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Gender assigned at birth, male or female, and their corresponding counts.", - "title": "Gender assigned at birth" - }, "disease": { "anyOf": [ { @@ -854,36 +838,6 @@ "title": "FormatAndStandards", "type": "object" }, - "GenderAssignedAtBirth": { - "properties": { - "bin": { - "allOf": [ - { - "$ref": "#/$defs/GenderEnum" - } - ], - "title": "Gender assigned at birth bin" - }, - "count": { - "title": "Gender assigned at birth count", - "type": "integer" - } - }, - "required": [ - "bin", - "count" - ], - "title": "GenderAssignedAtBirth", - "type": "object" - }, - "GenderEnum": { - "enum": [ - "male", - "female" - ], - "title": "GenderEnum", - "type": "string" - }, "Linkage": { "additionalProperties": false, "properties": { @@ -2276,7 +2230,7 @@ } ], "default": null, - "description": "An object containing demographic frequency data categorised by age, ethnicity, gender assigned at birth, and disease attributes.", + "description": "An object containing demographic frequency data categorised by age, ethnicity, and disease attributes.", "title": "Demographic frequency" }, "omics": { From db53e2832ecf77caf7a0077cf904a21c4b2da6ae Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Fri, 23 Aug 2024 10:20:24 +0100 Subject: [PATCH 19/23] uncomment in create markdown --- hdr_schemata/utils/create_markdown.py | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index 65b98e9..b060d26 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -244,10 +244,10 @@ def remove_types(data): from hdr_schemata.models.GWDM.v2_0 import Gwdm20 -# create_markdown(Hdruk220, dir_path+"/../../docs/HDRUK", "2.2.0") -# create_markdown(Hdruk221, dir_path+"/../../docs/HDRUK", "2.2.1") -# create_markdown(Hdruk212, dir_path+"/../../docs/HDRUK", "2.1.2") -# create_markdown(Hdruk213, dir_path+"/../../docs/HDRUK", "2.1.3") +create_markdown(Hdruk220, dir_path+"/../../docs/HDRUK", "2.2.0") +create_markdown(Hdruk221, dir_path+"/../../docs/HDRUK", "2.2.1") +create_markdown(Hdruk212, dir_path+"/../../docs/HDRUK", "2.1.2") +create_markdown(Hdruk213, dir_path+"/../../docs/HDRUK", "2.1.3") create_markdown(Hdruk300, dir_path+"/../../docs/HDRUK", "3.0.0") from hdr_schemata.models.GWDM.v1_1 import Gwdm10 @@ -255,7 +255,7 @@ def remove_types(data): from hdr_schemata.models.GWDM.v1_2 import Gwdm12 from hdr_schemata.models.GWDM.v2_0 import Gwdm20 -# create_markdown(Gwdm10, dir_path+"/../../docs/GWDM", "1.0") -# create_markdown(Gwdm11, dir_path+"/../../docs/GWDM", "1.1") -# create_markdown(Gwdm12, dir_path+"/../../docs/GWDM", "1.2") +create_markdown(Gwdm10, dir_path+"/../../docs/GWDM", "1.0") +create_markdown(Gwdm11, dir_path+"/../../docs/GWDM", "1.1") +create_markdown(Gwdm12, dir_path+"/../../docs/GWDM", "1.2") create_markdown(Gwdm20, dir_path+"/../../docs/GWDM", "2.0") From f5cb0472d37373c7cf1d576660d1de2da558d69e Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Fri, 23 Aug 2024 13:22:10 +0100 Subject: [PATCH 20/23] udpate keywords type --- hdr_schemata/models/HDRUK/3.0.0/schema.json | 3 --- hdr_schemata/models/HDRUK/v3_0_0/Summary.py | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 3245b29..8c8c7da 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -2141,9 +2141,6 @@ }, "type": "array" }, - { - "$ref": "#/$defs/CommaSeparatedValues" - }, { "type": "null" } diff --git a/hdr_schemata/models/HDRUK/v3_0_0/Summary.py b/hdr_schemata/models/HDRUK/v3_0_0/Summary.py index e8d591a..894f7e0 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/Summary.py +++ b/hdr_schemata/models/HDRUK/v3_0_0/Summary.py @@ -29,7 +29,7 @@ class Config: ..., **an.populationSize.__dict__, json_schema_extra={"guidance": an.populationSize.guidance} ) - keywords: Optional[Union[List[OneHundredFiftyCharacters], CommaSeparatedValues]] = Field( + keywords: Optional[List[OneHundredFiftyCharacters]] = Field( None, **an.keywords.__dict__, json_schema_extra={"guidance": an.keywords.guidance} ) From ca06961185a94cdfb269d7fa880a712f44150e6c Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Wed, 28 Aug 2024 17:33:43 +0100 Subject: [PATCH 21/23] update timelag enum --- docs/GWDM/2.0.form.json | 2 +- docs/GWDM/2.0.md | 6 +++--- docs/GWDM/2.0.structure.json | 2 +- docs/HDRUK/3.0.0.form.json | 2 +- docs/HDRUK/3.0.0.md | 6 +++--- docs/HDRUK/3.0.0.structure.json | 2 +- hdr_schemata/definitions/HDRUK/TimeLag.py | 2 +- hdr_schemata/models/GWDM/2.0/schema.json | 2 +- hdr_schemata/models/HDRUK/3.0.0/schema.json | 2 +- 9 files changed, 13 insertions(+), 13 deletions(-) diff --git a/docs/GWDM/2.0.form.json b/docs/GWDM/2.0.form.json index 9299032..c5b82ce 100644 --- a/docs/GWDM/2.0.form.json +++ b/docs/GWDM/2.0.form.json @@ -509,7 +509,7 @@ "2-4 weeks", "1-2 months", "2-6 months", - "6 months plus", + "More than 6 months", "Variable", "Not applicable", "Other" diff --git a/docs/GWDM/2.0.md b/docs/GWDM/2.0.md index c08d6d0..2b3d5f0 100644 --- a/docs/GWDM/2.0.md +++ b/docs/GWDM/2.0.md @@ -464,9 +464,9 @@ The end of the time period that the dataset provides coverage for. If the datase Please indicate the typical time-lag between an event and the data for that event appearing in the dataset -| title | guidance | is_list | required | type | -|:---------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------| -| Time Lag | Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.
- **Less than 1 week**: Typical time lag of less than a week.
- **1-2 weeks**: Typical time-lag of one to two weeks.
- **2-4 weeks**: Typical time-lag of two to four weeks.
- **1-2 months**: Typical time-lag of one to two months.
- **2-6 months**: Typical time-lag of two to six months.
- **6 months plus**: Typical time-lag of more than six months.
- **Variable**: Variable time-lag.
- **Not applicable**: Not Applicable i.e. static dataset.
- **Other**: Other time-lag. | False | True | ["TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']"] | +| title | guidance | is_list | required | type | +|:---------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.
- **Less than 1 week**: Typical time lag of less than a week.
- **1-2 weeks**: Typical time-lag of one to two weeks.
- **2-4 weeks**: Typical time-lag of two to four weeks.
- **1-2 months**: Typical time-lag of one to two months.
- **2-6 months**: Typical time-lag of two to six months.
- **6 months plus**: Typical time-lag of more than six months.
- **Variable**: Variable time-lag.
- **Not applicable**: Not Applicable i.e. static dataset.
- **Other**: Other time-lag. | False | True | ["TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']"] | diff --git a/docs/GWDM/2.0.structure.json b/docs/GWDM/2.0.structure.json index 75da9a4..09ab25e 100644 --- a/docs/GWDM/2.0.structure.json +++ b/docs/GWDM/2.0.structure.json @@ -626,7 +626,7 @@ "guidance": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.- Less than 1 week: Typical time lag of less than a week.- 1-2 weeks: Typical time-lag of one to two weeks.- 2-4 weeks: Typical time-lag of two to four weeks.- 1-2 months: Typical time-lag of one to two months.- 2-6 months: Typical time-lag of two to six months.- 6 months plus: Typical time-lag of more than six months.- Variable: Variable time-lag.- Not applicable: Not Applicable i.e. static dataset.- Other: Other time-lag.", "examples": null, "type": [ - "TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']" + "TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']" ], "is_list": false, "is_optional": false diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index d85bff0..873f941 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -785,7 +785,7 @@ "2-4 weeks", "1-2 months", "2-6 months", - "6 months plus", + "More than 6 months", "Variable", "Not applicable", "Other" diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index 4d03a84..0e05c89 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -539,9 +539,9 @@ The end of the time period that the dataset provides coverage for. If the datase Please indicate the typical time-lag between an event and the data for that event appearing in the dataset. -| title | guidance | is_list | required | type | -|:---------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------| -| Time lag | Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.
- **Less than 1 week**: Typical time lag of less than a week.
- **1-2 weeks**: Typical time-lag of one to two weeks.
- **2-4 weeks**: Typical time-lag of two to four weeks.
- **1-2 months**: Typical time-lag of one to two months.
- **2-6 months**: Typical time-lag of two to six months.
- **6 months plus**: Typical time-lag of more than six months.
- **Variable**: Variable time-lag.
- **Not applicable**: Not Applicable i.e. static dataset.
- **Other**: Other time-lag. | False | True | ["TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']"] | +| title | guidance | is_list | required | type | +|:---------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------| +| Time lag | Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.
- **Less than 1 week**: Typical time lag of less than a week.
- **1-2 weeks**: Typical time-lag of one to two weeks.
- **2-4 weeks**: Typical time-lag of two to four weeks.
- **1-2 months**: Typical time-lag of one to two months.
- **2-6 months**: Typical time-lag of two to six months.
- **6 months plus**: Typical time-lag of more than six months.
- **Variable**: Variable time-lag.
- **Not applicable**: Not Applicable i.e. static dataset.
- **Other**: Other time-lag. | False | True | ["TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']"] | diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index e59d0cc..aa60ec0 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -715,7 +715,7 @@ "guidance": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.- Less than 1 week: Typical time lag of less than a week.- 1-2 weeks: Typical time-lag of one to two weeks.- 2-4 weeks: Typical time-lag of two to four weeks.- 1-2 months: Typical time-lag of one to two months.- 2-6 months: Typical time-lag of two to six months.- 6 months plus: Typical time-lag of more than six months.- Variable: Variable time-lag.- Not applicable: Not Applicable i.e. static dataset.- Other: Other time-lag.", "examples": null, "type": [ - "TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']" + "TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']" ], "is_list": false, "is_optional": false diff --git a/hdr_schemata/definitions/HDRUK/TimeLag.py b/hdr_schemata/definitions/HDRUK/TimeLag.py index 35beb3c..5aa1756 100644 --- a/hdr_schemata/definitions/HDRUK/TimeLag.py +++ b/hdr_schemata/definitions/HDRUK/TimeLag.py @@ -19,7 +19,7 @@ class TimeLagV2(Enum): TWO_FOUR_WEEKS = '2-4 weeks' ONE_TWO_MONTHS = '1-2 months' TWO_SIX_MONTHS = '2-6 months' - SIX_MONTHS_PLUS = '6 months plus' + SIX_MONTHS_PLUS = 'More than 6 months' VARIABLE = 'Variable' NOT_APPLICABLE = 'Not applicable' OTHER = 'Other' diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index 5ff150b..1453e23 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -1767,7 +1767,7 @@ "2-4 weeks", "1-2 months", "2-6 months", - "6 months plus", + "More than 6 months", "Variable", "Not applicable", "Other" diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index 8c8c7da..9310515 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -2330,7 +2330,7 @@ "2-4 weeks", "1-2 months", "2-6 months", - "6 months plus", + "More than 6 months", "Variable", "Not applicable", "Other" From 314b30f4dc9843394f150844dbca518caa64f56b Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Fri, 30 Aug 2024 10:04:24 +0100 Subject: [PATCH 22/23] update regex in hydration and revsions in GWDM --- docs/GWDM/2.0.form.json | 5 +++-- docs/GWDM/2.0.md | 6 +++--- docs/GWDM/2.0.structure.json | 5 +++-- docs/HDRUK/3.0.0.form.json | 3 ++- hdr_schemata/models/GWDM/2.0/schema.json | 5 ++++- hdr_schemata/models/GWDM/v2_0/Revision.py | 3 ++- hdr_schemata/utils/create_markdown.py | 1 + 7 files changed, 18 insertions(+), 10 deletions(-) diff --git a/docs/GWDM/2.0.form.json b/docs/GWDM/2.0.form.json index c5b82ce..82a72eb 100644 --- a/docs/GWDM/2.0.form.json +++ b/docs/GWDM/2.0.form.json @@ -66,7 +66,7 @@ "https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561" ], "is_list": false, - "is_optional": false, + "is_optional": true, "types": { "title": "Url", "format": "uri", @@ -1593,5 +1593,6 @@ }, "location": "omics.platform" } - ] + ], + "url_regex": "(https?:\\/\\/)?([a-zA-Z0-9-]+\\.)+[a-zA-Z]{2,}(:\\d+)?(\\/[^\\s]*)?$" } \ No newline at end of file diff --git a/docs/GWDM/2.0.md b/docs/GWDM/2.0.md index 2b3d5f0..84df875 100644 --- a/docs/GWDM/2.0.md +++ b/docs/GWDM/2.0.md @@ -78,9 +78,9 @@ Examples: Some url with a reference to the record of a previous version of this dataset -| title | guidance | is_list | required | type | -|:-------------|:-----------|:----------|:-----------|:--------------------------------------------------------------------------------------------| -| revision url | | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | +| title | guidance | is_list | required | type | +|:-------------|:-----------|:----------|:-----------|:----------------------------------------------------------------------------------------------------| +| revision url | | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: diff --git a/docs/GWDM/2.0.structure.json b/docs/GWDM/2.0.structure.json index 09ab25e..e7a7c47 100644 --- a/docs/GWDM/2.0.structure.json +++ b/docs/GWDM/2.0.structure.json @@ -102,10 +102,11 @@ "https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561" ], "type": [ - "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" ], "is_list": false, - "is_optional": false, + "is_optional": true, "subItems": [] } ] diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 873f941..c05a702 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -1841,5 +1841,6 @@ }, "location": "omics.platform" } - ] + ], + "url_regex": "(https?:\\/\\/)?([a-zA-Z0-9-]+\\.)+[a-zA-Z]{2,}(:\\d+)?(\\/[^\\s]*)?$" } \ No newline at end of file diff --git a/hdr_schemata/models/GWDM/2.0/schema.json b/hdr_schemata/models/GWDM/2.0/schema.json index 1453e23..809db11 100644 --- a/hdr_schemata/models/GWDM/2.0/schema.json +++ b/hdr_schemata/models/GWDM/2.0/schema.json @@ -1368,9 +1368,12 @@ "type": "string" }, "url": { - "allOf": [ + "anyOf": [ { "$ref": "#/$defs/Url" + }, + { + "type": "null" } ], "description": "Some url with a reference to the record of a previous version of this dataset", diff --git a/hdr_schemata/models/GWDM/v2_0/Revision.py b/hdr_schemata/models/GWDM/v2_0/Revision.py index 4ae6e01..96a04cd 100644 --- a/hdr_schemata/models/GWDM/v2_0/Revision.py +++ b/hdr_schemata/models/GWDM/v2_0/Revision.py @@ -1,5 +1,6 @@ from pydantic import AnyUrl, BaseModel, EmailStr, Field, constr from hdr_schemata.definitions.HDRUK import * +from typing import Optional from .annotations import annotations @@ -13,7 +14,7 @@ class Revision(BaseModel): **an.version.__dict__, ) - url: Url = Field( + url: Optional[Url] = Field( ..., **an.url.__dict__, ) diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index b060d26..4fcc695 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -219,6 +219,7 @@ def remove_types(data): form = {} form["schema_fields"] = [] + form["url_regex"] = "(https?:\/\/)?([a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(:\d+)?(\/[^\s]*)?$" form_structure(structure, form) with open(f"{path}/{name}.form.json", "w") as f: json.dump(form, f, indent=6) From b515134272718ba17f0a24e3220889dae418d61a Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Fri, 30 Aug 2024 13:53:11 +0100 Subject: [PATCH 23/23] update regex pattern --- docs/HDRUK/3.0.0.form.json | 2 +- hdr_schemata/utils/create_markdown.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index c05a702..258aa93 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -1842,5 +1842,5 @@ "location": "omics.platform" } ], - "url_regex": "(https?:\\/\\/)?([a-zA-Z0-9-]+\\.)+[a-zA-Z]{2,}(:\\d+)?(\\/[^\\s]*)?$" + "url_regex": "^\\s*((https?:\\/\\/)*([a-zA-Z0-9-]+\\.?)+[a-zA-Z]{2,}(:\\d+)?(\\/[^\\s]*)?(\n)?)+$" } \ No newline at end of file diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index 4fcc695..c339e10 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -219,7 +219,7 @@ def remove_types(data): form = {} form["schema_fields"] = [] - form["url_regex"] = "(https?:\/\/)?([a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(:\d+)?(\/[^\s]*)?$" + form["url_regex"] = "^\s*((https?:\/\/)*([a-zA-Z0-9-]+\.?)+[a-zA-Z]{2,}(:\d+)?(\/[^\s]*)?(\n)?)+$" form_structure(structure, form) with open(f"{path}/{name}.form.json", "w") as f: json.dump(form, f, indent=6)