From b6167cec49944a29f71577c06aa12359c872db94 Mon Sep 17 00:00:00 2001 From: Branwen Snelling Date: Mon, 28 Oct 2024 09:09:16 +0000 Subject: [PATCH] update examples and make example metadata file --- docs/HDRUK/3.0.0.example.json | 258 ++++++++++-------- docs/HDRUK/3.0.0.form.json | 111 ++++++-- docs/HDRUK/3.0.0.md | 61 ++++- docs/HDRUK/3.0.0.structure.json | 111 ++++++-- docs/HDRUK/3.0.0.template.json | 151 ++++++++++ hdr_schemata/models/HDRUK/2.1.3/schema.json | 3 + hdr_schemata/models/HDRUK/2.2.0/schema.json | 3 + hdr_schemata/models/HDRUK/2.2.1/schema.json | 6 +- hdr_schemata/models/HDRUK/3.0.0/schema.json | 85 +++++- .../HDRUK/v3_0_0/annotations/config.yaml | 61 ++++- hdr_schemata/utils/create_example.py | 31 ++- 11 files changed, 707 insertions(+), 174 deletions(-) create mode 100644 docs/HDRUK/3.0.0.template.json diff --git a/docs/HDRUK/3.0.0.example.json b/docs/HDRUK/3.0.0.example.json index fa8b3a8..ea6bac5 100644 --- a/docs/HDRUK/3.0.0.example.json +++ b/docs/HDRUK/3.0.0.example.json @@ -1,134 +1,178 @@ { - "identifier": "Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}] | Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null", - "version": "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]", - "revisions": { - "version": "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]", - "url": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null" - }, - "issued": "datetime", - "modified": "datetime", + "identifier": null, + "version": "1.1.0", + "revisions": [ + { + "version": "6.0.0", + "url": null + } + ], + "issued": "2024-10-24T00:00:00.000Z", + "modified": "2024-10-24T00:00:00.000Z", "summary": { - "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", - "abstract": "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", - "dataProvider": { - "identifier": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null", - "name": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", - "logo": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null", - "description": "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null", - "contactPoint": "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}] | List", - "memberOf": "MemberOf['HUB','ALLIANCE','OTHER','NCS'] | null" + "title": "North West London COVID-19 Patient Level Situation Report", + "abstract": "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice.", + "dataCustodian": { + "identifier": "30f16703-28bc-4f45-9ce5-625d2d3db27d", + "name": "Health Data Research UK", + "logo": null, + "description": null, + "contactPoint": "test@test.co.uk", + "memberOf": null }, - "populationSize": "int", - "keywords": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List | null", - "doiName": "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}] | null", - "contactPoint": "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", - "alternateIdentifiers": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List | null" + "populationSize": 1000, + "keywords": [ + "Outpatient Care", + "Socioeconomic Deprivation", + "Infant Morbidity", + "Learning disability", + "Primary Care Prescription", + "Accident and Emergency Admissions" + ], + "doiName": null, + "contactPoint": "gateway@hdruk.ac.uk", + "alternateIdentifiers": null }, "documentation": { - "description": "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", - "associatedMedia": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List | null", - "inPipeline": "Pipeline['Available','Not available'] | null" + "description": "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice.", + "associatedMedia": null, + "inPipeline": null }, "coverage": { - "spatial": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List", - "typicalAgeRangeMin": "int | null", - "typicalAgeRangeMax": "int | null", - "datasetCompleteness": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null", - "materialType": "MaterialTypeCategoriesV2['None/not available','Bone marrow','Cancer cell lines','CDNA/MRNA','Core biopsy','DNA','Entire body organ','Faeces','Immortalized cell lines','Isolated pathogen','MicroRNA','Peripheral blood cells','Plasma','PM Tissue','Primary cells','RNA','Saliva','Serum','Swabs','Tissue','Urine','Whole blood','Availability to be confirmed','Other']", - "followup": "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null] | null", - "pathway": "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null", - "gender": "GenderType['Male','Female','Other']" + "spatial": "United Kingdom", + "typicalAgeRangeMin": null, + "typicalAgeRangeMax": null, + "datasetCompleteness": null, + "materialType": null, + "followUp": null, + "pathway": null }, "provenance": { "origin": { - "purpose": "PurposeV2['Research cohort','Study','Disease registry','Trial','Care','Audit','Administrative','Financial','Statuatory','Other',null]", - "datasetType": "DatasetTypeV2['Health and disease','Treatments/Interventions','Measurements/Tests','Imaging types','Imaging area of the body','Omics','Socioeconomic','Lifestyle','Registry','Environment and energy','Information and communication','Politics']", - "datasetSubType": "DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and Endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and Gastrointestinal','Cognitive Function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Multiomics','Metagenomics','Genomics','Education','Crime and Justice','Ethnicity','Housing ','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical Activity','Dietary habits','Alcohol','Disease Registry (research)','National Disease Registries and Audits','Births and Deaths','Not applicable'] | null", - "source": "SourceV2['EPR','Electronic survey','LIMS','Paper-based','Free text NLP','Machine generated','Other']", - "collectionSource": "SettingV2['Cohort, study, trial','Clinic','Primary care - Referrals','Primary care - Clinic','Primary care - Out of hours','Secondary care - Accident and Emergency','Secondary care - Outpatients','Secondary care - In-patients','Secondary care - Ambulance','Secondary care - ICU','Prescribing - Community pharmacy','Prescribing - Hospital','Patient report outcome','Wearables','Local authority','National government','Community','Services','Home','Private','Social care - Health care at home','Social care - Other social data','Census','Other',null]", - "imageContrast": "Ternary['Yes','No','Not stated'] | null" + "purpose": null, + "datasetType": [ + "Health and disease" + ], + "datasetSubType": null, + "source": null, + "collectionSource": null, + "imageContrast": null }, "temporal": { - "publishingFrequency": "PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]", - "distributionReleaseDate": "date | datetime | null", - "startDate": "date | datetime", - "endDate": "date | datetime | EndDateEnum['CONTINUOUS',null] | null", - "timeLag": "TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','6 months plus','Variable','Not applicable','Other']" + "publishingFrequency": "Continuous", + "distributionReleaseDate": null, + "startDate": "2024-10-24T00:00:00.000Z", + "endDate": null, + "timeLag": "Not applicable" } }, "accessibility": { "usage": { - "dataUseLimitation": "DataUseLimitationV2['General research use','Genetic studies only','No general methods research','No restriction','Research-specific restrictions','Research use only','No linkage']", - "dataUseRequirements": "DataUseRequirementsV2['Collaboration required','Ethics approval required','Geographical restrictions','Institution-specific restrictions','Not for profit use','Project-specific restrictions','Publication moratorium','Publication required','Return to database or resource','Time limit on use','User-specific restriction']", - "resourceCreator": "ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | List | null" + "dataUseLimitation": null, + "dataUseRequirements": null, + "resourceCreator": null }, "access": { - "accessRights": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", - "accessServiceCategory": "AccessService['TRE/SDE','Direct access','Open access','Varies based on project'] | null", - "accessMode": "AccessMode['Join research consortium','New project'] | null", - "accessService": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null", - "accessRequestCost": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null", - "deliveryLeadTime": "DeliveryLeadTimeV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other'] | null", - "jurisdiction": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List | null", - "dataController": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null", - "dataProcessor": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null" + "accessRights": "In Progress", + "accessServiceCategory": null, + "accessService": null, + "accessRequestCost": null, + "deliveryLeadTime": null, + "jurisdiction": null, + "dataController": null, + "dataProcessor": null }, "formatAndStandards": { - "vocabularyEncodingScheme": "ControlledVocabulary[{'$defs': {'ControlledVocabularyEnum': {'enum': ['LOCAL', 'OPCS4', 'READ', 'SNOMED CT', 'SNOMED RT', 'DM PLUS D', 'DM+D', 'NHS NATIONAL CODES', 'NHS SCOTLAND NATIONAL CODES', 'NHS WALES NATIONAL CODES', 'ODS', 'LOINC', 'ICD10', 'ICD10CM', 'ICD10PCS', 'ICD9CM', 'ICD9', 'ICDO3', 'AMT', 'APC', 'ATC', 'CIEL', 'HPO', 'CPT4', 'DPD', 'DRG', 'HEMONC', 'JMDC', 'KCD7', 'MULTUM', 'NAACCR', 'NDC', 'NDFRT', 'OXMIS', 'RXNORM', 'RXNORM EXTENSION', 'SPL', 'OTHER'], 'title': 'ControlledVocabularyEnum', 'type': 'string'}}, 'anyOf': [{'$ref': '#/$defs/ControlledVocabularyEnum'}, {'type': 'null'}], 'default': null}]", - "conformsTo": "StandardisedDataModels[{'$defs': {'StandardisedDataModelsEnum': {'enum': ['HL7 FHIR', 'HL7 V2', 'HL7 CDA', 'HL7 CCOW', 'LOINC', 'DICOM', 'I2B2', 'IHE', 'OMOP', 'OPENEHR', 'SENTINEL', 'PCORNET', 'CDISC', 'NHS DATA DICTIONARY', 'NHS SCOTLAND DATA DICTIONARY', 'NHS WALES DATA DICTIONARY', 'LOCAL', 'OTHER'], 'title': 'StandardisedDataModelsEnum', 'type': 'string'}}, 'anyOf': [{'$ref': '#/$defs/StandardisedDataModelsEnum'}, {'type': 'null'}], 'default': null}]", - "language": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List | null", - "format": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List | null" + "vocabularyEncodingScheme": [ + "LOCAL", + "ICD10" + ], + "conformsTo": [ + "LOCAL", + "NHS DATA DICTIONARY" + ], + "language": [ + "en" + ], + "format": [ + "text/tab-separated-values", + "application/sql", + "text/csv", + "image/diacom-rle" + ] } }, "enrichmentAndLinkage": { - "derivedFrom": { - "pid": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", - "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", - "url": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null" - }, - "isPartOf": { - "pid": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", - "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", - "url": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null" - }, - "linkableDatasets": { - "pid": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", - "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", - "url": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null" - }, - "similarToDatasets": { - "pid": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", - "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", - "url": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null" - }, - "investigations": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", - "tools": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", - "publicationAboutDataset": "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", - "publicationUsingDataset": "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]" - }, - "observations": { - "observedNode": "StatisticalPopulationConstrainedV2['Person','Event','Findings','Number of scans per modality']", - "measuredValue": "int", - "disambiguatingDescription": "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}] | null", - "observationDate": "date | datetime", - "measuredProperty": "MeasuredProperty[{}]" + "derivedFrom": [ + { + "pid": null, + "title": null, + "url": null + } + ], + "isPartOf": [ + { + "pid": null, + "title": null, + "url": null + } + ], + "linkableDatasets": [ + { + "pid": null, + "title": null, + "url": null + } + ], + "similarToDatasets": [ + { + "pid": null, + "title": null, + "url": null + } + ], + "investigations": null, + "tools": null, + "publicationAboutDataset": [ + "10.1093/ije/dyab028" + ], + "publicationUsingDataset": [ + "10.1001/jamapediatrics.2016.3633" + ] }, - "structuralMetadata": { - "tables": { - "name": "str | null", - "description": "str | null", - "columns": { - "name": "Name[{}]", - "dataType": "str", - "description": "str | null", - "sensitive": "bool", - "values": { - "name": "Name[{}]", - "description": "str | null", - "frequency": "int | null" - } + "observations": [ + { + "observedNode": "Persons", + "measuredValue": 1000, + "disambiguatingDescription": null, + "observationDate": "2024-10-24", + "measuredProperty": "Count" + } + ], + "structuralMetadata": null, + "demographicFrequency": { + "age": [ + { + "bin": "30-34 years", + "count": 1000 } - }, - "syntheticDataWebLink": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + ], + "ethnicity": [ + { + "bin": "Black or Black British - Any other Black background", + "count": 1000 + } + ], + "disease": [ + { + "diseaseCode": "J45", + "diseaseCodeVocabulary": "ICD10", + "count": 1000 + } + ] + }, + "omics": { + "assay": null, + "platform": null } } \ No newline at end of file diff --git a/docs/HDRUK/3.0.0.form.json b/docs/HDRUK/3.0.0.form.json index 9166800..4b575e9 100644 --- a/docs/HDRUK/3.0.0.form.json +++ b/docs/HDRUK/3.0.0.form.json @@ -76,7 +76,9 @@ "title": "Metadata Issued Datetime", "description": "Datetime stamp of when this metadata version was initially issued", "guidance": "", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "is_list": false, "is_optional": false, "types": "datetime", @@ -87,7 +89,9 @@ "title": "Last Modified Datetime", "description": "Datetime stamp of when this metadata was last modified", "guidance": "", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "is_list": false, "is_optional": false, "types": "datetime", @@ -135,7 +139,9 @@ "title": "identifier", "description": "Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation.", "guidance": "**Example**: https://ror.org/053fq8t95\\nIf your organisation does not have a ROR identifier please use the \u201csuggest and institute\u201d function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform", - "examples": null, + "examples": [ + "30f16703-28bc-4f45-9ce5-625d2d3db27d" + ], "is_list": false, "is_optional": false, "types": "str", @@ -146,7 +152,9 @@ "title": "Name of data provider", "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata.", "guidance": "In most this will be the same as the Team you have on the Gateway. However, in some cases this will be different. For example, Tissue Directory are a Team on the Gateway but coordinate activities across a number of Data Providers such as Cambridge Blood and Stem Cell Biobank.", - "examples": null, + "examples": [ + "Health Data Research UK" + ], "is_list": false, "is_optional": false, "types": { @@ -194,7 +202,9 @@ "title": "contact point", "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata.", "guidance": "", - "examples": null, + "examples": [ + "test@test.co.uk" + ], "is_list": false, "is_optional": false, "types": { @@ -228,7 +238,9 @@ "title": "Dataset population size", "description": "Input the number of people captured within the dataset.", "guidance": "This number informs a filter for Researchers to differentiate dataset search results based on the number of people in the dataset, and does not pull from the **Observations** fields. The filter also allows for Researchers to search datasets which have no population size reported, but will **not** pull any population size captured in the **Observations** section.", - "examples": null, + "examples": [ + 1000 + ], "is_list": false, "is_optional": false, "types": "int", @@ -280,7 +292,7 @@ "description": "Please provide a valid email address that can be used to coordinate data access requests.", "guidance": "Organisations are expected to provide a dedicated email address associated with the data access request process. If no contact point is provided in this field, this field will be defaulted to the teams support email provided in the teams setting.\\n**Note:** An employee's email address can only be provided on a temporary basis and if one is provided, **you must obtain explicit consent for this purpose**.", "examples": [ - "SAILDatabank@swansea.ac.uk" + "gateway@hdruk.ac.uk" ], "is_list": false, "is_optional": false, @@ -311,7 +323,9 @@ "title": "Description", "description": "A free-text description of the dataset.\\nA URL can also be provided as the description of the dataset.\\nGateway Feature: Keywords and text may be extracted out of the description and indexed for search.", "guidance": "- An HTML account of the data that **provides context and scope** of the data, **limited to 10000 characters, and/or a resolvable URL** that describes the dataset.\\n- Additional information can be recorded and included using the Associated media field.", - "examples": null, + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], "is_list": false, "is_optional": false, "types": { @@ -362,6 +376,7 @@ "description": "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:\\n- For locations in the UK: [ONS standards](https://geoportal.statistics.gov.uk/datasets/208d9884575647c29f0dd5a1184e711a/about)\\n- For locations in other countries: [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes)", "guidance": "- The geographical area covered by the dataset.\\n- Please provide a valid location.\\n- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/208d9884575647c29f0dd5a1184e711a/about).\\n- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes).", "examples": [ + "United Kingdom", "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" ], "is_list": false, @@ -526,7 +541,9 @@ "title": "Dataset type", "description": "The topic areas to which the dataset content relates.", "guidance": "Types include those listed below. Datasets can have more than one type associated.\\n- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.\\n- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.\\n- **Measurements/Tests**: Includes any data related to laboratory or other diagnostics.\\n- **Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.\\n- **Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.\\n- **Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics.\\n- **Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.\\n- **Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.\\n- **Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.\\n- **Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.\\n- **Information and communication**: Includes any data related to the study or application of information and communication.\\n- **Politics**: Includes any data related to political views, activities, voting, etc.", - "examples": null, + "examples": [ + "Health and disease" + ], "is_list": true, "is_optional": false, "types": { @@ -710,7 +727,9 @@ "title": "Publishing frequency", "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", "guidance": "Please indicate the frequency of publishing.\\n- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.\\n- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.\\n- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.\\n- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.\\n- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.\\n- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/.\\n\\n Options:\\n- **Static**: Dataset published once.\\n- **Irregular**: Dataset published at uneven intervals.\\n- **Continuous**: Dataset published without interruption.\\n- **Biennial**: Dataset published every two years.\\n- **Annual**: Dataset published occurs once a year.\\n- **Biannual**: Dataset published twice a year.\\n- **Quarterly**: Dataset published every three months.\\n- **Bimonthly**: Dataset published every two months.\\n- **Monthly**: Dataset published once a month.\\n- **Biweekly**: Dataset published every two weeks.\\n- **Weekly**: Dataset published once a week.\\n- **Twice weekly**: Dataset published twice a week.\\n- **Daily**: Dataset published once a day.\\n- **Other**: Dataset published using other interval.", - "examples": null, + "examples": [ + "Continuous" + ], "is_list": false, "is_optional": false, "types": { @@ -740,7 +759,9 @@ "title": "Distribution release date", "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", "guidance": "- Please indicate the frequency the dataset is published.\\n- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.\\n- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.\\n- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.\\n- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.\\n- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.\\n- Notes: see [https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/](https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/)", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "is_list": false, "is_optional": true, "types": "date", @@ -751,7 +772,9 @@ "title": "Start date", "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", "guidance": "- The start of the time period that the dataset provides coverage for.\\n- If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "is_list": false, "is_optional": false, "types": "date", @@ -762,7 +785,9 @@ "title": "End date", "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", "guidance": "- The end of the time period that the dataset provides coverage for.\\n- If the dataset is **\u201cContinuous\u201d** and has no known end date, **please leave blank**.\\n- If there are **multiple cohorts** in the dataset with varying end dates, please provide the **latest date**.", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "is_list": false, "is_optional": true, "types": "date", @@ -773,7 +798,9 @@ "title": "Time lag", "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.", "guidance": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.\\n- **Less than 1 week**: Typical time lag of less than a week.\\n- **1-2 weeks**: Typical time-lag of one to two weeks.\\n- **2-4 weeks**: Typical time-lag of two to four weeks.\\n- **1-2 months**: Typical time-lag of one to two months.\\n- **2-6 months**: Typical time-lag of two to six months.\\n- **6 months plus**: Typical time-lag of more than six months.\\n- **Variable**: Variable time-lag.\\n- **Not applicable**: Not Applicable i.e. static dataset.\\n- **Other**: Other time-lag.", - "examples": null, + "examples": [ + "Not applicable" + ], "is_list": false, "is_optional": false, "types": { @@ -870,7 +897,9 @@ "title": "Access rights", "description": "Please provide details for the data access rights.", "guidance": "- The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both separated by a comma.\\n- If such a resource or the underlying process doesn\u2019t exist, please provide \u201cIn Progress\u201d, until both the process and the documentation are ready.", - "examples": null, + "examples": [ + "In Progress" + ], "is_list": false, "is_optional": false, "types": { @@ -1018,7 +1047,10 @@ "title": "Controlled vocabulary", "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", "guidance": "- List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.\\n- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.\\n- Notes: More than one vocabulary may be provided.\\n- **Local**: Local Coding Standard.\\n- **OPCS4**: .\\n- **READ**: .\\n- **SNOMED CT**: .\\n- **SNOMED RT**: .\\n- **DM+D**: .\\n- **NHS National Codes**: .\\n- **ODS**: .\\n- **LOINC**: .\\n- **ICD10**: .\\n- **ICD10CM**: .\\n- **ICD10PCS**: .\\n- **ICD9CM**: .\\n- **ICD9**: .\\n- **ICDO3**: .\\n- **AMT**: .\\n- **APC**: .\\n- **ATC**: .\\n- **CIEL**: .\\n- **HPO**: .\\n- **CPT4**: .\\n- **DPD**: .\\n- **DRG**: .\\n- **HEMONC**: .\\n- **JMDC**: .\\n- **KCD7**: .\\n- **MULTUM**: .\\n- **NAACCR**: .\\n- **NDC**: .\\n- **NDFRT** <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.\\n- **OXMIS**: .\\n- **RXNORM**: .\\n- **RXNORM EXTENSION**: .\\n- **SPL**: .\\n- **Other**: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.\\n- **NHS Scotland National Codes**: .\\n- **NHS Wales National Codes**: ", - "examples": null, + "examples": [ + "LOCAL", + "ICD10" + ], "is_list": true, "is_optional": false, "types": { @@ -1072,7 +1104,8 @@ "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", "guidance": "- List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.\\n- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.\\n- **HL7 FHIR**: .\\n- **HL7 V2**: .\\n- **HL7 CDA**: .\\n- **HL7 CCOW**: .\\n- **DICOM**: .\\n- **I2B2**: .\\n- **IHE**: .\\n- **OMOP**: .\\n- **openEHR**: .\\n- **Sentinel**: .\\n- **PCORnet**: .\\n- **CDISC**: .\\n- **Local**: In-house developed data model.\\n- **Other**: Other standardised data model.\\n- **NHS Data Dictionary**: .\\n- **NHS Scotland Data Dictionary**: .\\n- **NHS Wales Data Dictionary**: .", "examples": [ - "LOCAL,NHS DATA DICTIONARY" + "LOCAL", + "NHS DATA DICTIONARY" ], "is_list": true, "is_optional": false, @@ -1106,7 +1139,9 @@ "title": "Language", "description": "This should list all the languages in which the dataset metadata and underlying data is made available complaint with ISO 639.", "guidance": "https://www.iso.org/iso-639-language-code\\n- **aa**: Afar\\n- **ab**: Abkhazian\\n- **af**: Afrikaans\\n- **ak**: Akan\\n- **sq**: Albanian\\n- **am**: Amharic\\n- **ar**: Arabic\\n- **an**: Aragonese\\n- **hy**: Armenian\\n- **as**: Assamese\\n- **av**: Avaric\\n- **ae**: Avestan\\n- **ay**: Aymara\\n- **az**: Azerbaijani\\n- **ba**: Bashkir\\n- **bm**: Bambara\\n- **eu**: Basque\\n- **be**: Belarusian\\n- **bn**: Bengali\\n- **bh**: Bihari languages\\n- **bi**: Bislama\\n- **bo**: Tibetan\\n- **bs**: Bosnian\\n- **br**: Breton\\n- **bg**: Bulgarian\\n- **my**: Burmese\\n- **ca**: Catalan; Valencian\\n- **cs**: Czech\\n- **ch**: Chamorro\\n- **ce**: Chechen\\n- **zh**: Chinese\\n- **cu**: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic\\n- **cv**: Chuvash\\n- **kw**: Cornish\\n- **co**: Corsican\\n- **cr**: Cree\\n- **cy**: Welsh\\n- **cs**: Czech\\n- **da**: Danish\\n- **de**: German\\n- **dv**: Divehi; Dhivehi; Maldivian\\n- **nl**: Dutch; Flemish\\n- **dz**: Dzongkha\\n- **el**: Greek, Modern (1453-)\\n- **en**: English\\n- **eo**: Esperanto\\n- **et**: Estonian\\n- **eu**: Basque\\n- **ee**: Ewe\\n- **fo**: Faroese\\n- **fa**: Persian\\n- **fj**: Fijian\\n- **fi**: Finnish\\n- **fr**: French\\n- **fy**: Western Frisian\\n- **ff**: Fulah\\n- **ka**: Georgian\\n- **de**: German\\n- **gd**: Gaelic; Scottish Gaelic\\n- **ga**: Irish\\n- **gl**: Galician\\n- **gv**: Manx\\n- **el**: Greek, Modern (1453-)\\n- **gn**: Guarani\\n- **gu**: Gujarati\\n- **ht**: Haitian; Haitian Creole\\n- **ha**: Hausa\\n- **ho**: Hiri Motu\\n- **hr**: Croatian\\n- **hu**: Hungarian\\n- **hy**: Armenian\\n- **ig**: Igbo\\n- **is**: Icelandic\\n- **io**: Ido\\n- **ii**: Sichuan Yi; Nuosu\\n- **iu**: Inuktitut\\n- **ie**: Interlingue; Occidental\\n- **ia**: Interlingua (International Auxiliary Language Association)\\n- **id**: Indonesian\\n- **ik**: Inupiaq\\n- **is**: Icelandic\\n- **it**: Italian\\n- **jv**: Javanese\\n- **ja**: Japanese\\n- **kl**: Kalaallisut; Greenlandic\\n- **kn**: Kannada\\n- **ks**: Kashmiri\\n- **ka**: Georgian\\n- **kr**: Kanuri\\n- **kk**: Kazakh\\n- **km**: Central Khmer\\n- **ki**: Kikuyu; Gikuyu\\n- **rw**: Kinyarwanda\\n- **ky**: Kirghiz; Kyrgyz\\n- **kv**: Komi\\n- **kg**: Kongo\\n- **ko**: Korean\\n- **kj**: Kuanyama; Kwanyama\\n- **ku**: Kurdish\\n- **lo**: Lao\\n- **la**: Latin\\n- **lv**: Latvian\\n- **li**: Limburgan; Limburger; limburgish\\n- **ln**: Lingala\\n- **lt**: Lithuanian\\n- **lb**: Luxembourgish; Letzeburgesch\\n- **lu**: Luba-Katanga\\n- **lg**: Ganda\\n- **mk**: Macedonian\\n- **mh**: Marshallese\\n- **ml**: Malayalam\\n- **mi**: Maori\\n- **mr**: Marathi\\n- **ms**: Malay\\n- **mk**: Macedonian\\n- **mg**: Malagasy\\n- **mt**: Maltese\\n- **mn**: Mongolian\\n- **mi**: Maori\\n- **ms**: Malay\\n- **my**: Burmese\\n- **na**: Nauru\\n- **nv**: Navajo; Navaho\\n- **nr**: Ndebele, South; South Ndebele\\n- **nd**: Ndebele, North; North Ndebele\\n- **ng**: Ndonga\\n- **ne**: Nepali\\n- **nl**: Dutch; Flemish\\n- **nn**: Norwegian Nynorsk; Nynorsk, Norwegian\\n- **nb**: Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l\\n- **no**: Norwegian\\n- **ny**: Chichewa; Chewa; Nyanja\\n- **oc**: Occitan (post 1500)\\n- **oj**: Ojibwa\\n- **or**: Oriya\\n- **om**: Oromo\\n- **os**: Ossetian; Ossetic\\n- **pa**: Panjabi; Punjabi\\n- **fa**: Persian\\n- **pi**: Pali\\n- **pl**: Polish\\n- **pt**: Portuguese\\n- **ps**: Pushto; Pashto\\n- **qu**: Quechua\\n- **rm**: Romansh\\n- **ro**: Romanian; Moldavian; Moldovan\\n- **rn**: Rundi\\n- **ru**: Russian\\n- **sg**: Sango\\n- **sa**: Sanskrit\\n- **si**: Sinhala; Sinhalese\\n- **sk**: Slovak\\n- **sl**: Slovenian\\n- **se**: Northern Sami\\n- **sm**: Samoan\\n- **sn**: Shona\\n- **sd**: Sindhi\\n- **so**: Somali\\n- **st**: Sotho, Southern\\n- **es**: Spanish; Castilian\\n- **sq**: Albanian\\n- **sc**: Sardinian\\n- **sr**: Serbian\\n- **ss**: Swati\\n- **su**: Sundanese\\n- **sw**: Swahili\\n- **sv**: Swedish\\n- **ty**: Tahitian\\n- **ta**: Tamil\\n- **tt**: Tatar\\n- **te**: Telugu\\n- **tg**: Tajik\\n- **tl**: Tagalog\\n- **th**: Thai\\n- **bo**: Tibetan\\n- **ti**: Tigrinya\\n- **to**: Tonga (Tonga Islands)\\n- **tn**: Tswana\\n- **ts**: Tsonga\\n- **tk**: Turkmen\\n- **tr**: Turkish\\n- **tw**: Twi\\n- **ug**: Uighur; Uyghur\\n- **uk**: Ukrainian\\n- **ur**: Urdu\\n- **uz**: Uzbek\\n- **ve**: Venda\\n- **vi**: Vietnamese\\n- **vo**: Volap\u00fck\\n- **cy**: Welsh\\n- **wa**: Walloon\\n- **wo**: Wolof\\n- **xh**: Xhosa\\n- **yi**: Yiddish\\n- **yo**: Yoruba\\n- **za**: Zhuang; Chuang\\n- **zh**: Chinese\\n- **zu**: Zulu", - "examples": null, + "examples": [ + "en" + ], "is_list": true, "is_optional": false, "types": { @@ -1604,7 +1639,9 @@ "title": "Measured value", "description": "An integer value size of the measured property, such as \u20181000\u2019 for 1000 people in the study or \u201887\u2019 for 87 MRI scans in the dataset.", "guidance": "An integer value size of the measured property, such as \u20181000\u2019 for 1000 people in the study or \u201887\u2019 for 87 MRI scans in the dataset.", - "examples": null, + "examples": [ + 1000 + ], "is_list": false, "is_optional": false, "types": "int", @@ -1631,7 +1668,9 @@ "title": "Observation date", "description": "Provide the date, or datetime that the observation was made. Multiple observations of the same property can be provided, for example an observation of cumulative COVID positive cases by specimen on the 1/1/2021 with a measuredValue of 2000000, and a second observation entry on 8/2/2021 recording a measuredValue of as 3100000.", "guidance": "Provide the date, or datetime that the observation was made. Multiple observations of the same property can be provided, for example an observation of cumulative COVID positive cases by specimen on the 1/1/2021 with a measuredValue of 2000000, and a second observation entry on 8/2/2021 recording a measuredValue of as 3100000.", - "examples": null, + "examples": [ + "2024-10-24" + ], "is_list": false, "is_optional": false, "types": "date", @@ -1642,7 +1681,9 @@ "title": "Measured property", "description": "Descriptive term for the observation property measured. For example, people, procedures, x-rays, or diagnosis of type 1 diabetes. This could also be a specific SNOMED CT term.", "guidance": "Descriptive term for the observation property measured.", - "examples": null, + "examples": [ + "Count" + ], "is_list": false, "is_optional": false, "types": { @@ -1655,7 +1696,9 @@ "title": "Age grouping", "description": null, "guidance": "", - "examples": null, + "examples": [ + "30-34 years" + ], "is_list": false, "is_optional": false, "types": { @@ -1694,7 +1737,9 @@ "title": "Age count", "description": null, "guidance": "", - "examples": null, + "examples": [ + 1000 + ], "is_list": false, "is_optional": false, "types": "int", @@ -1705,7 +1750,9 @@ "title": "Ethnicity grouping", "description": null, "guidance": "", - "examples": null, + "examples": [ + "Black or Black British - Any other Black background" + ], "is_list": false, "is_optional": false, "types": { @@ -1738,7 +1785,9 @@ "title": "Ethnicity count", "description": null, "guidance": "", - "examples": null, + "examples": [ + 1000 + ], "is_list": false, "is_optional": false, "types": "int", @@ -1749,7 +1798,9 @@ "title": "Disease code", "description": null, "guidance": "", - "examples": null, + "examples": [ + "J45" + ], "is_list": false, "is_optional": false, "types": "str", @@ -1760,7 +1811,9 @@ "title": "Disease code vocabulary", "description": null, "guidance": "", - "examples": null, + "examples": [ + "ICD10" + ], "is_list": false, "is_optional": false, "types": { @@ -1778,7 +1831,9 @@ "title": "Disease count", "description": null, "guidance": "", - "examples": null, + "examples": [ + 1000 + ], "is_list": false, "is_optional": false, "types": "int", diff --git a/docs/HDRUK/3.0.0.md b/docs/HDRUK/3.0.0.md index b6ed61a..5ff9602 100644 --- a/docs/HDRUK/3.0.0.md +++ b/docs/HDRUK/3.0.0.md @@ -69,7 +69,9 @@ Datetime stamp of when this metadata version was initially issued |:-------------------------|:-----------|:----------|:-----------|:-------------| | Metadata Issued Datetime | | False | True | ['datetime'] | +Examples: + * 2024-10-24T00:00:00.000Z ## modified @@ -80,7 +82,9 @@ Datetime stamp of when this metadata was last modified |:-----------------------|:-----------|:----------|:-----------|:-------------| | Last Modified Datetime | | False | True | ['datetime'] | +Examples: + * 2024-10-24T00:00:00.000Z ## summary @@ -136,7 +140,9 @@ Please provide a Research Organization Registry (ROR) identifier (see https://ro |:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------| | identifier | **Example**: https://ror.org/053fq8t95
If your organisation does not have a ROR identifier please use the “suggest and institute” function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform | False | True | ['str', 'int'] | +Examples: + * 30f16703-28bc-4f45-9ce5-625d2d3db27d #### name @@ -147,7 +153,9 @@ The organisation responsible for running or supporting the data access request p |:----------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------| | Name of data provider | In most this will be the same as the Team you have on the Gateway. However, in some cases this will be different. For example, Tissue Directory are a Team on the Gateway but coordinate activities across a number of Data Providers such as Cambridge Blood and Stem Cell Biobank. | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | +Examples: + * Health Data Research UK #### logo @@ -180,7 +188,9 @@ Organisation contact point(s) which will be used for receiving queries from HDR, |:--------------|:-----------|:----------|:-----------|:-----------------------------------------------------------------------------------------------| | contact point | | False | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'List'] | +Examples: + * test@test.co.uk #### memberOf @@ -202,7 +212,9 @@ Input the number of people captured within the dataset. |:------------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------| | Dataset population size | This number informs a filter for Researchers to differentiate dataset search results based on the number of people in the dataset, and does not pull from the **Observations** fields. The filter also allows for Researchers to search datasets which have no population size reported, but will **not** pull any population size captured in the **Observations** section. | False | True | ['int'] | +Examples: + * 1000 ### keywords @@ -246,7 +258,7 @@ Please provide a valid email address that can be used to coordinate data access Examples: - * SAILDatabank@swansea.ac.uk + * gateway@hdruk.ac.uk ### alternateIdentifiers @@ -277,7 +289,9 @@ A free-text description of the dataset.
A URL can also be provided as the des |:------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-------------------------------------------------------------------------------------------------------| | Description | - An HTML account of the data that **provides context and scope** of the data, **limited to 10000 characters, and/or a resolvable URL** that describes the dataset.
- Additional information can be recorded and included using the Associated media field. | False | True | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]"] | +Examples: + * CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web® including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice. ### associatedMedia @@ -323,7 +337,8 @@ The geographical area covered by the dataset. It is recommended that links are t Examples: - * https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html + * United Kingdom + * https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html ### typicalAgeRangeMin @@ -435,7 +450,9 @@ The topic areas to which the dataset content relates. |:-------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Dataset type | Types include those listed below. Datasets can have more than one type associated.
- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.
- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.
- **Measurements/Tests**: Includes any data related to laboratory or other diagnostics.
- **Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.
- **Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.
- **Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics.
- **Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.
- **Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.
- **Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.
- **Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.
- **Information and communication**: Includes any data related to the study or application of information and communication.
- **Politics**: Includes any data related to political views, activities, voting, etc. | True | True | ["DatasetTypeV2['Health and disease','Treatments/Interventions','Measurements/Tests','Imaging types','Imaging area of the body','Omics','Socioeconomic','Lifestyle','Registry','Environment and energy','Information and communication','Politics']"] | +Examples: + * Health and disease #### datasetSubType @@ -499,7 +516,9 @@ Please indicate the frequency of distribution release. If a dataset is distribut |:---------------------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Publishing frequency | Please indicate the frequency of publishing.
- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.
- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.
- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.
- If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null.
- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.
- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/.

Options:
- **Static**: Dataset published once.
- **Irregular**: Dataset published at uneven intervals.
- **Continuous**: Dataset published without interruption.
- **Biennial**: Dataset published every two years.
- **Annual**: Dataset published occurs once a year.
- **Biannual**: Dataset published twice a year.
- **Quarterly**: Dataset published every three months.
- **Bimonthly**: Dataset published every two months.
- **Monthly**: Dataset published once a month.
- **Biweekly**: Dataset published every two weeks.
- **Weekly**: Dataset published once a week.
- **Twice weekly**: Dataset published twice a week.
- **Daily**: Dataset published once a day.
- **Other**: Dataset published using other interval. | False | True | ["PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]"] | +Examples: + * Continuous #### distributionReleaseDate @@ -510,7 +529,9 @@ Date of the latest release of the dataset. If this is a regular release i.e. qua |:--------------------------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------| | Distribution release date | - Please indicate the frequency the dataset is published.
- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.
- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.
- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.
- If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null.
- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.
- Notes: see [https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/](https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/) | False | False | ['date', 'datetime', 'null'] | +Examples: + * 2024-10-24T00:00:00.000Z #### startDate @@ -521,7 +542,9 @@ The start of the time period that the dataset provides coverage for. If there ar |:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------| | Start date | - The start of the time period that the dataset provides coverage for.
- If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. | False | True | ['date', 'datetime'] | +Examples: + * 2024-10-24T00:00:00.000Z #### endDate @@ -532,7 +555,9 @@ The end of the time period that the dataset provides coverage for. If the datase |:---------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------------------------------------------------| | End date | - The end of the time period that the dataset provides coverage for.
- If the dataset is **“Continuous”** and has no known end date, **please leave blank**.
- If there are **multiple cohorts** in the dataset with varying end dates, please provide the **latest date**. | False | False | ['date', 'datetime', "EndDateEnum['CONTINUOUS',null]", 'null'] | +Examples: + * 2024-10-24T00:00:00.000Z #### timeLag @@ -543,7 +568,9 @@ Please indicate the typical time-lag between an event and the data for that even |:---------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------| | Time lag | Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.
- **Less than 1 week**: Typical time lag of less than a week.
- **1-2 weeks**: Typical time-lag of one to two weeks.
- **2-4 weeks**: Typical time-lag of two to four weeks.
- **1-2 months**: Typical time-lag of one to two months.
- **2-6 months**: Typical time-lag of two to six months.
- **6 months plus**: Typical time-lag of more than six months.
- **Variable**: Variable time-lag.
- **Not applicable**: Not Applicable i.e. static dataset.
- **Other**: Other time-lag. | False | True | ["TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']"] | +Examples: + * Not applicable ## accessibility @@ -616,7 +643,9 @@ Please provide details for the data access rights. |:--------------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------| | Access rights | - The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both separated by a comma.
- If such a resource or the underlying process doesn’t exist, please provide “In Progress”, until both the process and the documentation are ready. | False | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]"] | +Examples: + * In Progress #### accessServiceCategory @@ -723,7 +752,10 @@ List any relevant terminologies / ontologies / controlled vocabularies, such as |:----------------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Controlled vocabulary | - List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.
- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition.
- Notes: More than one vocabulary may be provided.
- **Local**: Local Coding Standard.
- **OPCS4**: .
- **READ**: .
- **SNOMED CT**: .
- **SNOMED RT**: .
- **DM+D**: .
- **NHS National Codes**: .
- **ODS**: .
- **LOINC**: .
- **ICD10**: .
- **ICD10CM**: .
- **ICD10PCS**: .
- **ICD9CM**: .
- **ICD9**: .
- **ICDO3**: .
- **AMT**: .
- **APC**: .
- **ATC**: .
- **CIEL**: .
- **HPO**: .
- **CPT4**: .
- **DPD**: .
- **DRG**: .
- **HEMONC**: .
- **JMDC**: .
- **KCD7**: .
- **MULTUM**: .
- **NAACCR**: .
- **NDC**: .
- **NDFRT** <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.
- **OXMIS**: .
- **RXNORM**: .
- **RXNORM EXTENSION**: .
- **SPL**: .
- **Other**: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.
- **NHS Scotland National Codes**: .
- **NHS Wales National Codes**: | True | True | ["ControlledVocabularyEnum['LOCAL','OPCS4','READ','SNOMED CT','SNOMED RT','DM PLUS D','DM+D','NHS NATIONAL CODES','NHS SCOTLAND NATIONAL CODES','NHS WALES NATIONAL CODES','ODS','LOINC','ICD10','ICD10CM','ICD10PCS','ICD9CM','ICD9','ICDO3','AMT','APC','ATC','CIEL','HPO','CPT4','DPD','DRG','HEMONC','JMDC','KCD7','MULTUM','NAACCR','NDC','NDFRT','OXMIS','RXNORM','RXNORM EXTENSION','SPL','OTHER']"] | +Examples: + * LOCAL + * ICD10 #### conformsTo @@ -736,7 +768,8 @@ List standardised data models that the dataset has been stored in or transformed Examples: - * LOCAL,NHS DATA DICTIONARY + * LOCAL + * NHS DATA DICTIONARY #### language @@ -747,7 +780,9 @@ This should list all the languages in which the dataset metadata and underlying |:---------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Language | https://www.iso.org/iso-639-language-code
- **aa**: Afar
- **ab**: Abkhazian
- **af**: Afrikaans
- **ak**: Akan
- **sq**: Albanian
- **am**: Amharic
- **ar**: Arabic
- **an**: Aragonese
- **hy**: Armenian
- **as**: Assamese
- **av**: Avaric
- **ae**: Avestan
- **ay**: Aymara
- **az**: Azerbaijani
- **ba**: Bashkir
- **bm**: Bambara
- **eu**: Basque
- **be**: Belarusian
- **bn**: Bengali
- **bh**: Bihari languages
- **bi**: Bislama
- **bo**: Tibetan
- **bs**: Bosnian
- **br**: Breton
- **bg**: Bulgarian
- **my**: Burmese
- **ca**: Catalan; Valencian
- **cs**: Czech
- **ch**: Chamorro
- **ce**: Chechen
- **zh**: Chinese
- **cu**: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic
- **cv**: Chuvash
- **kw**: Cornish
- **co**: Corsican
- **cr**: Cree
- **cy**: Welsh
- **cs**: Czech
- **da**: Danish
- **de**: German
- **dv**: Divehi; Dhivehi; Maldivian
- **nl**: Dutch; Flemish
- **dz**: Dzongkha
- **el**: Greek, Modern (1453-)
- **en**: English
- **eo**: Esperanto
- **et**: Estonian
- **eu**: Basque
- **ee**: Ewe
- **fo**: Faroese
- **fa**: Persian
- **fj**: Fijian
- **fi**: Finnish
- **fr**: French
- **fy**: Western Frisian
- **ff**: Fulah
- **ka**: Georgian
- **de**: German
- **gd**: Gaelic; Scottish Gaelic
- **ga**: Irish
- **gl**: Galician
- **gv**: Manx
- **el**: Greek, Modern (1453-)
- **gn**: Guarani
- **gu**: Gujarati
- **ht**: Haitian; Haitian Creole
- **ha**: Hausa
- **ho**: Hiri Motu
- **hr**: Croatian
- **hu**: Hungarian
- **hy**: Armenian
- **ig**: Igbo
- **is**: Icelandic
- **io**: Ido
- **ii**: Sichuan Yi; Nuosu
- **iu**: Inuktitut
- **ie**: Interlingue; Occidental
- **ia**: Interlingua (International Auxiliary Language Association)
- **id**: Indonesian
- **ik**: Inupiaq
- **is**: Icelandic
- **it**: Italian
- **jv**: Javanese
- **ja**: Japanese
- **kl**: Kalaallisut; Greenlandic
- **kn**: Kannada
- **ks**: Kashmiri
- **ka**: Georgian
- **kr**: Kanuri
- **kk**: Kazakh
- **km**: Central Khmer
- **ki**: Kikuyu; Gikuyu
- **rw**: Kinyarwanda
- **ky**: Kirghiz; Kyrgyz
- **kv**: Komi
- **kg**: Kongo
- **ko**: Korean
- **kj**: Kuanyama; Kwanyama
- **ku**: Kurdish
- **lo**: Lao
- **la**: Latin
- **lv**: Latvian
- **li**: Limburgan; Limburger; limburgish
- **ln**: Lingala
- **lt**: Lithuanian
- **lb**: Luxembourgish; Letzeburgesch
- **lu**: Luba-Katanga
- **lg**: Ganda
- **mk**: Macedonian
- **mh**: Marshallese
- **ml**: Malayalam
- **mi**: Maori
- **mr**: Marathi
- **ms**: Malay
- **mk**: Macedonian
- **mg**: Malagasy
- **mt**: Maltese
- **mn**: Mongolian
- **mi**: Maori
- **ms**: Malay
- **my**: Burmese
- **na**: Nauru
- **nv**: Navajo; Navaho
- **nr**: Ndebele, South; South Ndebele
- **nd**: Ndebele, North; North Ndebele
- **ng**: Ndonga
- **ne**: Nepali
- **nl**: Dutch; Flemish
- **nn**: Norwegian Nynorsk; Nynorsk, Norwegian
- **nb**: Bokmål, Norwegian; Norwegian Bokmål
- **no**: Norwegian
- **ny**: Chichewa; Chewa; Nyanja
- **oc**: Occitan (post 1500)
- **oj**: Ojibwa
- **or**: Oriya
- **om**: Oromo
- **os**: Ossetian; Ossetic
- **pa**: Panjabi; Punjabi
- **fa**: Persian
- **pi**: Pali
- **pl**: Polish
- **pt**: Portuguese
- **ps**: Pushto; Pashto
- **qu**: Quechua
- **rm**: Romansh
- **ro**: Romanian; Moldavian; Moldovan
- **rn**: Rundi
- **ru**: Russian
- **sg**: Sango
- **sa**: Sanskrit
- **si**: Sinhala; Sinhalese
- **sk**: Slovak
- **sl**: Slovenian
- **se**: Northern Sami
- **sm**: Samoan
- **sn**: Shona
- **sd**: Sindhi
- **so**: Somali
- **st**: Sotho, Southern
- **es**: Spanish; Castilian
- **sq**: Albanian
- **sc**: Sardinian
- **sr**: Serbian
- **ss**: Swati
- **su**: Sundanese
- **sw**: Swahili
- **sv**: Swedish
- **ty**: Tahitian
- **ta**: Tamil
- **tt**: Tatar
- **te**: Telugu
- **tg**: Tajik
- **tl**: Tagalog
- **th**: Thai
- **bo**: Tibetan
- **ti**: Tigrinya
- **to**: Tonga (Tonga Islands)
- **tn**: Tswana
- **ts**: Tsonga
- **tk**: Turkmen
- **tr**: Turkish
- **tw**: Twi
- **ug**: Uighur; Uyghur
- **uk**: Ukrainian
- **ur**: Urdu
- **uz**: Uzbek
- **ve**: Venda
- **vi**: Vietnamese
- **vo**: Volapük
- **cy**: Welsh
- **wa**: Walloon
- **wo**: Wolof
- **xh**: Xhosa
- **yi**: Yiddish
- **yo**: Yoruba
- **za**: Zhuang; Chuang
- **zh**: Chinese
- **zu**: Zulu | True | True | ["LanguageEnum['aa','ab','ae','af','ak','am','an','ar','as','av','ay','az','ba','be','bg','bh','bi','bm','bn','bo','br','bs','ca','ce','ch','co','cr','cs','cu','cv','cy','da','de','dv','dz','ee','el','en','eo','es','et','eu','fa','ff','fi','fj','fo','fr','fy','ga','gd','gl','gn','gu','gv','ha','he','hi','ho','hr','ht','hu','hy','hz','ia','id','ie','ig','ii','ik','io','is','it','iu','ja','jv','ka','kg','ki','kj','kk','kl','km','kn','ko','kr','ks','ku','kv','kw','ky','la','lb','lg','li','ln','lo','lt','lu','lv','mg','mh','mi','mk','ml','mn','mr','ms','mt','my','na','nb','nd','ne','ng','nl','nn','no','nr','nv','ny','oc','oj','om','or','os','pa','pi','pl','ps','pt','qu','rm','rn','ro','ru','rw','sa','sc','sd','se','sg','si','sk','sl','sm','sn','so','sq','sr','ss','st','su','sv','sw','ta','te','tg','th','ti','tk','tl','tn','to','tr','ts','tt','tw','ty','ug','uk','ur','uz','ve','vi','vo','wa','wo','xh','yi','yo','za','zh','zu']"] | +Examples: + * en #### format @@ -1023,7 +1058,9 @@ An integer value size of the measured property, such as ‘1000’ for 1000 peop |:---------------|:-------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:--------| | Measured value | An integer value size of the measured property, such as ‘1000’ for 1000 people in the study or ‘87’ for 87 MRI scans in the dataset. | False | True | ['int'] | +Examples: + * 1000 ### disambiguatingDescription @@ -1045,7 +1082,9 @@ Provide the date, or datetime that the observation was made. Multiple observatio |:-----------------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------|:-----------|:---------------------| | Observation date | Provide the date, or datetime that the observation was made. Multiple observations of the same property can be provided, for example an observation of cumulative COVID positive cases by specimen on the 1/1/2021 with a measuredValue of 2000000, and a second observation entry on 8/2/2021 recording a measuredValue of as 3100000. | False | True | ['date', 'datetime'] | +Examples: + * 2024-10-24 ### measuredProperty @@ -1056,7 +1095,9 @@ Descriptive term for the observation property measured. For example, people, pro |:------------------|:--------------------------------------------------------|:----------|:-----------|:-------------------------| | Measured property | Descriptive term for the observation property measured. | False | True | ['MeasuredProperty[{}]'] | +Examples: + * Count ## structuralMetadata @@ -1231,7 +1272,9 @@ None |:-------------|:-----------|:----------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Age grouping | | False | True | ["AgeEnum['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-24 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','95-99 years','100+ years']"] | +Examples: + * 30-34 years #### count @@ -1242,7 +1285,9 @@ None |:----------|:-----------|:----------|:-----------|:--------| | Age count | | False | True | ['int'] | +Examples: + * 1000 ### ethnicity @@ -1262,7 +1307,9 @@ None |:-------------------|:-----------|:----------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| | Ethnicity grouping | | False | True | ["EthnicityEnum['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']"] | +Examples: + * Black or Black British - Any other Black background #### count @@ -1273,7 +1320,9 @@ None |:----------------|:-----------|:----------|:-----------|:--------| | Ethnicity count | | False | True | ['int'] | +Examples: + * 1000 ### disease @@ -1293,7 +1342,9 @@ None |:-------------|:-----------|:----------|:-----------|:---------------| | Disease code | | False | True | ['str', 'int'] | +Examples: + * J45 #### diseaseCodeVocabulary @@ -1304,7 +1355,9 @@ None |:------------------------|:-----------|:----------|:-----------|:------------------------------------------------| | Disease code vocabulary | | False | True | ["DiseaseCodeEnum['ICD10','SNOMED CT','MeSH']"] | +Examples: + * ICD10 #### count @@ -1315,7 +1368,9 @@ None |:--------------|:-----------|:----------|:-----------|:--------| | Disease count | | False | True | ['int'] | +Examples: + * 1000 ## omics diff --git a/docs/HDRUK/3.0.0.structure.json b/docs/HDRUK/3.0.0.structure.json index 0159302..6c7812c 100644 --- a/docs/HDRUK/3.0.0.structure.json +++ b/docs/HDRUK/3.0.0.structure.json @@ -88,7 +88,9 @@ "title": "Metadata Issued Datetime", "description": "Datetime stamp of when this metadata version was initially issued", "guidance": "", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "type": [ "datetime" ], @@ -101,7 +103,9 @@ "title": "Last Modified Datetime", "description": "Datetime stamp of when this metadata was last modified", "guidance": "", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "type": [ "datetime" ], @@ -173,7 +177,9 @@ "title": "identifier", "description": "Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation.", "guidance": "Example: https://ror.org/053fq8t95If your organisation does not have a ROR identifier please use the \u201csuggest and institute\u201d function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform", - "examples": null, + "examples": [ + "30f16703-28bc-4f45-9ce5-625d2d3db27d" + ], "type": [ "str", "int" @@ -187,7 +193,9 @@ "title": "Name of data provider", "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata.", "guidance": "In most this will be the same as the Team you have on the Gateway. However, in some cases this will be different. For example, Tissue Directory are a Team on the Gateway but coordinate activities across a number of Data Providers such as Cambridge Blood and Stem Cell Biobank.", - "examples": null, + "examples": [ + "Health Data Research UK" + ], "type": [ "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" ], @@ -231,7 +239,9 @@ "title": "contact point", "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata.", "guidance": "", - "examples": null, + "examples": [ + "test@test.co.uk" + ], "type": [ "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", "List" @@ -262,7 +272,9 @@ "title": "Dataset population size", "description": "Input the number of people captured within the dataset.", "guidance": "This number informs a filter for Researchers to differentiate dataset search results based on the number of people in the dataset, and does not pull from the Observations fields. The filter also allows for Researchers to search datasets which have no population size reported, but will not pull any population size captured in the Observations section.", - "examples": null, + "examples": [ + 1000 + ], "type": [ "int" ], @@ -314,7 +326,7 @@ "description": "Please provide a valid email address that can be used to coordinate data access requests.", "guidance": "Organisations are expected to provide a dedicated email address associated with the data access request process. If no contact point is provided in this field, this field will be defaulted to the teams support email provided in the teams setting.Note: An employee's email address can only be provided on a temporary basis and if one is provided, you must obtain explicit consent for this purpose.", "examples": [ - "SAILDatabank@swansea.ac.uk" + "gateway@hdruk.ac.uk" ], "type": [ "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]" @@ -361,7 +373,9 @@ "title": "Description", "description": "A free-text description of the dataset.A URL can also be provided as the description of the dataset.Gateway Feature: Keywords and text may be extracted out of the description and indexed for search.", "guidance": "An HTML account of the data that provides context and scope of the data, limited to 10000 characters, and/or a resolvable URL that describes the dataset.- Additional information can be recorded and included using the Associated media field.", - "examples": null, + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], "type": [ "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]" ], @@ -424,6 +438,7 @@ "description": "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:- For locations in the UK: ONS standards- For locations in other countries: ISO 3166-1 & ISO 3166-2", "guidance": "The geographical area covered by the dataset.- Please provide a valid location.- For locations in the UK, this location should conform to ONS standards.- For locations in other countries we use ISO 3166-1 & ISO 3166-2.", "examples": [ + "United Kingdom", "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" ], "type": [ @@ -574,7 +589,9 @@ "title": "Dataset type", "description": "The topic areas to which the dataset content relates.", "guidance": "Types include those listed below. Datasets can have more than one type associated.- Health and disease: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.- Treatments/Interventions: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.- Measurements/Tests: Includes any data related to laboratory or other diagnostics.- Imaging types: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.- Imaging area of the body: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.- Omics: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics.- Socioeconomic: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.- Lifestyle: Includes any data related to smoking, physical activity, dietary habits or alcohol.- Registry: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.- Environment and energy: Includes any data related to the monitoring or study of environmental or energy factors or events.- Information and communication: Includes any data related to the study or application of information and communication.- Politics: Includes any data related to political views, activities, voting, etc.", - "examples": null, + "examples": [ + "Health and disease" + ], "type": [ "DatasetTypeV2['Health and disease','Treatments/Interventions','Measurements/Tests','Imaging types','Imaging area of the body','Omics','Socioeconomic','Lifestyle','Registry','Environment and energy','Information and communication','Politics']" ], @@ -655,7 +672,9 @@ "title": "Publishing frequency", "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", "guidance": "Please indicate the frequency of publishing.- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/. Options:- Static: Dataset published once.- Irregular: Dataset published at uneven intervals.- Continuous: Dataset published without interruption.- Biennial: Dataset published every two years.- Annual: Dataset published occurs once a year.- Biannual: Dataset published twice a year.- Quarterly: Dataset published every three months.- Bimonthly: Dataset published every two months.- Monthly: Dataset published once a month.- Biweekly: Dataset published every two weeks.- Weekly: Dataset published once a week.- Twice weekly: Dataset published twice a week.- Daily: Dataset published once a day.- Other: Dataset published using other interval.", - "examples": null, + "examples": [ + "Continuous" + ], "type": [ "PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]" ], @@ -668,7 +687,9 @@ "title": "Distribution release date", "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", "guidance": "Please indicate the frequency the dataset is published.- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "type": [ "date", "datetime", @@ -683,7 +704,9 @@ "title": "Start date", "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", "guidance": "The start of the time period that the dataset provides coverage for.- If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "type": [ "date", "datetime" @@ -697,7 +720,9 @@ "title": "End date", "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", "guidance": "The end of the time period that the dataset provides coverage for.- If the dataset is \u201cContinuous\u201d and has no known end date, please leave blank.- If there are multiple cohorts in the dataset with varying end dates, please provide the latest date.", - "examples": null, + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "type": [ "date", "datetime", @@ -713,7 +738,9 @@ "title": "Time lag", "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.", "guidance": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.- Less than 1 week: Typical time lag of less than a week.- 1-2 weeks: Typical time-lag of one to two weeks.- 2-4 weeks: Typical time-lag of two to four weeks.- 1-2 months: Typical time-lag of one to two months.- 2-6 months: Typical time-lag of two to six months.- 6 months plus: Typical time-lag of more than six months.- Variable: Variable time-lag.- Not applicable: Not Applicable i.e. static dataset.- Other: Other time-lag.", - "examples": null, + "examples": [ + "Not applicable" + ], "type": [ "TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']" ], @@ -816,7 +843,9 @@ "title": "Access rights", "description": "Please provide details for the data access rights.", "guidance": "The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both separated by a comma.- If such a resource or the underlying process doesn\u2019t exist, please provide \u201cIn Progress\u201d, until both the process and the documentation are ready.", - "examples": null, + "examples": [ + "In Progress" + ], "type": [ "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]" ], @@ -958,7 +987,10 @@ "title": "Controlled vocabulary", "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", "guidance": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.- Notes: More than one vocabulary may be provided.- Local: Local Coding Standard.- OPCS4: https://www.datadictionary.nhs.uk/web_site_content/supporting_information/clinical_coding/opcs_classification_of_interventions_and_procedures.asp.- READ: https://digital.nhs.uk/services/terminology-and-classifications/read-codes.- SNOMED CT: http://www.snomed.org/.- SNOMED RT: https://confluence.ihtsdotools.org/display/DOCGLOSS/SNOMED+RT.- DM+D: https://digital.nhs.uk/data-and-information/information-standards/information-standards-and-data-collections-including-extractions/publications-and-notifications/standards-and-collections/scci0052-dictionary-of-medicines-and-devices-dm-d.- NHS National Codes: https://www.datadictionary.nhs.uk/.- ODS: https://digital.nhs.uk/services/organisation-data-service.- LOINC: https://loinc.org/.- ICD10: https://www.who.int/classifications/icd/icdonlineversions/en/.- ICD10CM: https://www.cdc.gov/nchs/icd/icd10cm.htm.- ICD10PCS: https://ec.europa.eu/eip/ageing/standards/healthcare/e-health/icd-10-pcs_en.- ICD9CM: https://www.cdc.gov/nchs/icd/icd9cm.htm.- ICD9: https://www.cdc.gov/nchs/icd/icd9.htm.- ICDO3: https://www.who.int/classifications/icd/adaptations/oncology/en/.- AMT: https://www.digitalhealth.gov.au/about-the-agency/tenders-and-offers/community-pharmacy-software-industry-partnership-offer/Webinar%20-%20Australian%20Medicines%20Terminology%20(AMT)%20and%20Implementation%20Options%2001032017.pdf.- APC: https://www.acep.org/administration/reimbursement/reimbursement-faqs/apc-ambulatory-payment-classifications-faq/.- ATC: https://www.whocc.no/atc_ddd_index/.- CIEL: https://github.com/OpenConceptLab/ocl_web/wiki/CIEL.- HPO: https://hpo.jax.org/app/.- CPT4: https://www.cms.gov/Regulations-and-Guidance/Legislation/CLIA/Downloads/SubjecttoCLIA.pdf.- DPD: https://health-products.canada.ca/dpd-bdpp/index-eng.jsp.- DRG: http://www.euro.who.int/__data/assets/pdf_file/0004/162265/e96538.pdf.- HEMONC: https://hemonc.org/wiki/Main_Page.- JMDC: https://www.jmdc.co.jp/en/.- KCD7: https://forums.ohdsi.org/t/adding-kcd7-code-korean-icd-10-to-the-omop-vocabulary/7576.- MULTUM: https://www.cerner.com/solutions/drug-database.- NAACCR: https://www.naaccr.org/.- NDC: https://www.fda.gov/drugs/drug-approvals-and-databases/national-drug-code-directory.- NDFRT <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.- OXMIS: https://oxrisk.com/oxmis/.- RXNORM: https://www.nlm.nih.gov/research/umls/rxnorm/index.html.- RXNORM EXTENSION: https://www.nlm.nih.gov/research/umls/rxnorm/index.html.- SPL: https://www.fda.gov/industry/fda-resources-data-standards/structured-product-labeling-resources.- Other: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.- NHS Scotland National Codes: https://www.ndc.scot.nhs.uk/Data-Dictionary/.- NHS Wales National Codes: http://www.datadictionary.wales.nhs.uk/", - "examples": null, + "examples": [ + "LOCAL", + "ICD10" + ], "type": [ "ControlledVocabularyEnum['LOCAL','OPCS4','READ','SNOMED CT','SNOMED RT','DM PLUS D','DM+D','NHS NATIONAL CODES','NHS SCOTLAND NATIONAL CODES','NHS WALES NATIONAL CODES','ODS','LOINC','ICD10','ICD10CM','ICD10PCS','ICD9CM','ICD9','ICDO3','AMT','APC','ATC','CIEL','HPO','CPT4','DPD','DRG','HEMONC','JMDC','KCD7','MULTUM','NAACCR','NDC','NDFRT','OXMIS','RXNORM','RXNORM EXTENSION','SPL','OTHER']" ], @@ -972,7 +1004,8 @@ "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", "guidance": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.- HL7 FHIR: https://www.hl7.org/fhir/.- HL7 V2: https://www.hl7.org/implement/standards/product_section.cfm?section=13.- HL7 CDA: https://www.hl7.org/implement/standards/product_section.cfm?section=10.- HL7 CCOW: https://www.hl7.org/implement/standards/product_section.cfm?section=16.- DICOM: https://www.dicomstandard.org/.- I2B2: https://www.i2b2.org/.- IHE: https://www.ihe.net/resources/profiles/.- OMOP: https://www.ohdsi.org/data-standardization/the-common-data-model/.- openEHR: https://www.openehr.org/.- Sentinel: https://www.sentinelinitiative.org/sentinel/data/distributed-database-common-data-model.- PCORnet: https://pcornet.org/data-driven-common-model/.- CDISC: https://www.cdisc.org/standards/data-exchange/odm.- Local: In-house developed data model.- Other: Other standardised data model.- NHS Data Dictionary: https://www.datadictionary.nhs.uk/.- NHS Scotland Data Dictionary: https://www.ndc.scot.nhs.uk/Data-Dictionary/.- NHS Wales Data Dictionary: https://www.datadictionary.wales.nhs.uk/.", "examples": [ - "LOCAL,NHS DATA DICTIONARY" + "LOCAL", + "NHS DATA DICTIONARY" ], "type": [ "StandardisedDataModelsEnum['HL7 FHIR','HL7 V2','HL7 CDA','HL7 CCOW','LOINC','DICOM','I2B2','IHE','OMOP','OPENEHR','SENTINEL','PCORNET','CDISC','NHS DATA DICTIONARY','NHS SCOTLAND DATA DICTIONARY','NHS WALES DATA DICTIONARY','LOCAL','OTHER']" @@ -986,7 +1019,9 @@ "title": "Language", "description": "This should list all the languages in which the dataset metadata and underlying data is made available complaint with ISO 639.", "guidance": "https://www.iso.org/iso-639-language-code- aa: Afar- ab: Abkhazian- af: Afrikaans- ak: Akan- sq: Albanian- am: Amharic- ar: Arabic- an: Aragonese- hy: Armenian- as: Assamese- av: Avaric- ae: Avestan- ay: Aymara- az: Azerbaijani- ba: Bashkir- bm: Bambara- eu: Basque- be: Belarusian- bn: Bengali- bh: Bihari languages- bi: Bislama- bo: Tibetan- bs: Bosnian- br: Breton- bg: Bulgarian- my: Burmese- ca: Catalan; Valencian- cs: Czech- ch: Chamorro- ce: Chechen- zh: Chinese- cu: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic- cv: Chuvash- kw: Cornish- co: Corsican- cr: Cree- cy: Welsh- cs: Czech- da: Danish- de: German- dv: Divehi; Dhivehi; Maldivian- nl: Dutch; Flemish- dz: Dzongkha- el: Greek, Modern (1453-)- en: English- eo: Esperanto- et: Estonian- eu: Basque- ee: Ewe- fo: Faroese- fa: Persian- fj: Fijian- fi: Finnish- fr: French- fy: Western Frisian- ff: Fulah- ka: Georgian- de: German- gd: Gaelic; Scottish Gaelic- ga: Irish- gl: Galician- gv: Manx- el: Greek, Modern (1453-)- gn: Guarani- gu: Gujarati- ht: Haitian; Haitian Creole- ha: Hausa- ho: Hiri Motu- hr: Croatian- hu: Hungarian- hy: Armenian- ig: Igbo- is: Icelandic- io: Ido- ii: Sichuan Yi; Nuosu- iu: Inuktitut- ie: Interlingue; Occidental- ia: Interlingua (International Auxiliary Language Association)- id: Indonesian- ik: Inupiaq- is: Icelandic- it: Italian- jv: Javanese- ja: Japanese- kl: Kalaallisut; Greenlandic- kn: Kannada- ks: Kashmiri- ka: Georgian- kr: Kanuri- kk: Kazakh- km: Central Khmer- ki: Kikuyu; Gikuyu- rw: Kinyarwanda- ky: Kirghiz; Kyrgyz- kv: Komi- kg: Kongo- ko: Korean- kj: Kuanyama; Kwanyama- ku: Kurdish- lo: Lao- la: Latin- lv: Latvian- li: Limburgan; Limburger; limburgish- ln: Lingala- lt: Lithuanian- lb: Luxembourgish; Letzeburgesch- lu: Luba-Katanga- lg: Ganda- mk: Macedonian- mh: Marshallese- ml: Malayalam- mi: Maori- mr: Marathi- ms: Malay- mk: Macedonian- mg: Malagasy- mt: Maltese- mn: Mongolian- mi: Maori- ms: Malay- my: Burmese- na: Nauru- nv: Navajo; Navaho- nr: Ndebele, South; South Ndebele- nd: Ndebele, North; North Ndebele- ng: Ndonga- ne: Nepali- nl: Dutch; Flemish- nn: Norwegian Nynorsk; Nynorsk, Norwegian- nb: Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l- no: Norwegian- ny: Chichewa; Chewa; Nyanja- oc: Occitan (post 1500)- oj: Ojibwa- or: Oriya- om: Oromo- os: Ossetian; Ossetic- pa: Panjabi; Punjabi- fa: Persian- pi: Pali- pl: Polish- pt: Portuguese- ps: Pushto; Pashto- qu: Quechua- rm: Romansh- ro: Romanian; Moldavian; Moldovan- rn: Rundi- ru: Russian- sg: Sango- sa: Sanskrit- si: Sinhala; Sinhalese- sk: Slovak- sl: Slovenian- se: Northern Sami- sm: Samoan- sn: Shona- sd: Sindhi- so: Somali- st: Sotho, Southern- es: Spanish; Castilian- sq: Albanian- sc: Sardinian- sr: Serbian- ss: Swati- su: Sundanese- sw: Swahili- sv: Swedish- ty: Tahitian- ta: Tamil- tt: Tatar- te: Telugu- tg: Tajik- tl: Tagalog- th: Thai- bo: Tibetan- ti: Tigrinya- to: Tonga (Tonga Islands)- tn: Tswana- ts: Tsonga- tk: Turkmen- tr: Turkish- tw: Twi- ug: Uighur; Uyghur- uk: Ukrainian- ur: Urdu- uz: Uzbek- ve: Venda- vi: Vietnamese- vo: Volap\u00fck- cy: Welsh- wa: Walloon- wo: Wolof- xh: Xhosa- yi: Yiddish- yo: Yoruba- za: Zhuang; Chuang- zh: Chinese- zu: Zulu", - "examples": null, + "examples": [ + "en" + ], "type": [ "LanguageEnum['aa','ab','ae','af','ak','am','an','ar','as','av','ay','az','ba','be','bg','bh','bi','bm','bn','bo','br','bs','ca','ce','ch','co','cr','cs','cu','cv','cy','da','de','dv','dz','ee','el','en','eo','es','et','eu','fa','ff','fi','fj','fo','fr','fy','ga','gd','gl','gn','gu','gv','ha','he','hi','ho','hr','ht','hu','hy','hz','ia','id','ie','ig','ii','ik','io','is','it','iu','ja','jv','ka','kg','ki','kj','kk','kl','km','kn','ko','kr','ks','ku','kv','kw','ky','la','lb','lg','li','ln','lo','lt','lu','lv','mg','mh','mi','mk','ml','mn','mr','ms','mt','my','na','nb','nd','ne','ng','nl','nn','no','nr','nv','ny','oc','oj','om','or','os','pa','pi','pl','ps','pt','qu','rm','rn','ro','ru','rw','sa','sc','sd','se','sg','si','sk','sl','sm','sn','so','sq','sr','ss','st','su','sv','sw','ta','te','tg','th','ti','tk','tl','tn','to','tr','ts','tt','tw','ty','ug','uk','ur','uz','ve','vi','vo','wa','wo','xh','yi','yo','za','zh','zu']" ], @@ -1368,7 +1403,9 @@ "title": "Measured value", "description": "An integer value size of the measured property, such as \u20181000\u2019 for 1000 people in the study or \u201887\u2019 for 87 MRI scans in the dataset.", "guidance": "An integer value size of the measured property, such as \u20181000\u2019 for 1000 people in the study or \u201887\u2019 for 87 MRI scans in the dataset.", - "examples": null, + "examples": [ + 1000 + ], "type": [ "int" ], @@ -1396,7 +1433,9 @@ "title": "Observation date", "description": "Provide the date, or datetime that the observation was made. Multiple observations of the same property can be provided, for example an observation of cumulative COVID positive cases by specimen on the 1/1/2021 with a measuredValue of 2000000, and a second observation entry on 8/2/2021 recording a measuredValue of as 3100000.", "guidance": "Provide the date, or datetime that the observation was made. Multiple observations of the same property can be provided, for example an observation of cumulative COVID positive cases by specimen on the 1/1/2021 with a measuredValue of 2000000, and a second observation entry on 8/2/2021 recording a measuredValue of as 3100000.", - "examples": null, + "examples": [ + "2024-10-24" + ], "type": [ "date", "datetime" @@ -1410,7 +1449,9 @@ "title": "Measured property", "description": "Descriptive term for the observation property measured. For example, people, procedures, x-rays, or diagnosis of type 1 diabetes. This could also be a specific SNOMED CT term.", "guidance": "Descriptive term for the observation property measured.", - "examples": null, + "examples": [ + "Count" + ], "type": [ "MeasuredProperty[{}]" ], @@ -1652,7 +1693,9 @@ "title": "Age grouping", "description": null, "guidance": "", - "examples": null, + "examples": [ + "30-34 years" + ], "type": [ "AgeEnum['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-24 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','95-99 years','100+ years']" ], @@ -1665,7 +1708,9 @@ "title": "Age count", "description": null, "guidance": "", - "examples": null, + "examples": [ + 1000 + ], "type": [ "int" ], @@ -1693,7 +1738,9 @@ "title": "Ethnicity grouping", "description": null, "guidance": "", - "examples": null, + "examples": [ + "Black or Black British - Any other Black background" + ], "type": [ "EthnicityEnum['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']" ], @@ -1706,7 +1753,9 @@ "title": "Ethnicity count", "description": null, "guidance": "", - "examples": null, + "examples": [ + 1000 + ], "type": [ "int" ], @@ -1734,7 +1783,9 @@ "title": "Disease code", "description": null, "guidance": "", - "examples": null, + "examples": [ + "J45" + ], "type": [ "str", "int" @@ -1748,7 +1799,9 @@ "title": "Disease code vocabulary", "description": null, "guidance": "", - "examples": null, + "examples": [ + "ICD10" + ], "type": [ "DiseaseCodeEnum['ICD10','SNOMED CT','MeSH']" ], @@ -1761,7 +1814,9 @@ "title": "Disease count", "description": null, "guidance": "", - "examples": null, + "examples": [ + 1000 + ], "type": [ "int" ], diff --git a/docs/HDRUK/3.0.0.template.json b/docs/HDRUK/3.0.0.template.json new file mode 100644 index 0000000..41a27c0 --- /dev/null +++ b/docs/HDRUK/3.0.0.template.json @@ -0,0 +1,151 @@ +{ + "identifier": "Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}] | Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null", + "version": "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]", + "revisions": { + "version": "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]", + "url": "UrlV2[{'anyOf': [{'pattern': '^\\\\s*((https?:\\\\/\\\\/)*([a-zA-Z0-9-]+\\\\.?)+[a-zA-Z]{2,}(:\\\\d+)?(\\\\/[^\\\\s]*)?(\\\\n)?)+$', 'type': 'string'}, {'type': 'null'}]}] | null" + }, + "issued": "datetime", + "modified": "datetime", + "summary": { + "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "abstract": "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "dataCustodian": { + "identifier": "str | int", + "name": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "logo": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null", + "description": "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null", + "contactPoint": "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}] | List", + "memberOf": "MemberOfV2['Hub','Alliance','Other','NCS'] | null" + }, + "populationSize": "int", + "keywords": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]", + "doiName": "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}] | null", + "contactPoint": "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "alternateIdentifiers": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List | null" + }, + "documentation": { + "description": "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "associatedMedia": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List | null", + "inPipeline": "Pipeline['Available','Not available'] | null" + }, + "coverage": { + "spatial": "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}] | List", + "typicalAgeRangeMin": "int | null", + "typicalAgeRangeMax": "int | null", + "datasetCompleteness": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null", + "materialType": "MaterialTypeCategoriesV2['None/not available','Bone marrow','Cancer cell lines','CDNA/MRNA','Core biopsy','DNA','Entire body organ','Faeces','Immortalized cell lines','Isolated pathogen','MicroRNA','Peripheral blood cells','Plasma','PM Tissue','Primary cells','RNA','Saliva','Serum','Swabs','Tissue','Urine','Whole blood','Availability to be confirmed','Other']", + "followUp": "FollowupV2['0 - 6 Months','6 - 12 Months','1 - 10 Years','> 10 Years','Unknown','Continuous','Other',null] | null", + "pathway": "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null" + }, + "provenance": { + "origin": { + "purpose": "PurposeV2['Research cohort','Study','Disease registry','Trial','Care','Audit','Administrative','Financial','Statutory','Other',null]", + "datasetType": "DatasetTypeV2['Health and disease','Treatments/Interventions','Measurements/Tests','Imaging types','Imaging area of the body','Omics','Socioeconomic','Lifestyle','Registry','Environment and energy','Information and communication','Politics']", + "datasetSubType": "DatasetSubType['Mental health','Cardiovascular','Cancer','Rare diseases','Metabolic and endocrine','Neurological','Reproductive','Maternity and neonatology','Respiratory','Immunity','Musculoskeletal','Vision','Renal and urogenital','Oral and gastrointestinal','Cognitive function','Hearing','Others','Vaccines','Preventive','Therapeutic','Laboratory','Other diagnostics','CT','MRI','PET','X-ray','Ultrasound','Pathology','Head','Chest','Arm','Abdomen','Leg','Proteomics','Transcriptomics','Epigenomics','Metabolomics','Metagenomics','Genomics','Lipidomics','Education','Crime and justice','Ethnicity','Housing','Labour','Ageing ','Economics','Marital status','Social support','Deprivation','Religion','Occupation','Finances','Family circumstance','Smoking','Physical activity','Dietary habits','Alcohol','Disease registry (research)','National disease registries and audits','Births and deaths','Not applicable']", + "source": "SourceV2['EPR','Electronic survey','LIMS','Paper-based','Free text NLP','Machine generated','Other']", + "collectionSource": "SettingV2['Cohort, study, trial','Clinic','Primary care - Referrals','Primary care - Clinic','Primary care - Out of hours','Secondary care - Accident and Emergency','Secondary care - Outpatients','Secondary care - In-patients','Secondary care - Ambulance','Secondary care - ICU','Prescribing - Community pharmacy','Prescribing - Hospital','Patient report outcome','Wearables','Local authority','National government','Community','Services','Home','Private','Social care - Health care at home','Social care - Other social data','Census','Other',null]", + "imageContrast": "Ternary['Yes','No','Not stated'] | null" + }, + "temporal": { + "publishingFrequency": "PeriodicityV2['Static','Irregular','Continuous','Biennial','Annual','Biannual','Quarterly','Bimonthly','Monthly','Biweekly','Weekly','Twice a week','Daily','Other',null]", + "distributionReleaseDate": "date | datetime | null", + "startDate": "date | datetime", + "endDate": "date | datetime | EndDateEnum['CONTINUOUS',null] | null", + "timeLag": "TimeLagV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other']" + } + }, + "accessibility": { + "usage": { + "dataUseLimitation": "DataUseLimitationV2['General research use','Commercial research use','Genetic studies only','No general methods research','No restriction','Geographical restrictions','Institution-specific restrictions','Not for profit use','Project-specific restrictions','Research-specific restrictions','User-specific restrictions','Research use only','No linkage']", + "dataUseRequirements": "DataUseRequirementsV2['Collaboration required','Project-specific restrictions','Ethics approval required','Institution-specific restrictions','Geographical restrictions','Publication moratorium','Publication required','Return to database or resource','Time limit on use','Disclosure control','Not for profit use','User-specific restriction']", + "resourceCreator": "ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | List | null" + }, + "access": { + "accessRights": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "accessServiceCategory": "AccessService['TRE/SDE','Direct access','Open access','Varies based on project'] | null", + "accessService": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null", + "accessRequestCost": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null", + "deliveryLeadTime": "DeliveryLeadTimeV2['Less than 1 week','1-2 weeks','2-4 weeks','1-2 months','2-6 months','More than 6 months','Variable','Not applicable','Other'] | null", + "jurisdiction": "Isocountrycode[{'pattern': '^[A-Z]{2}(-[A-Z]{2,3})?$', 'type': 'string'}]", + "dataController": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null", + "dataProcessor": "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}] | null" + }, + "formatAndStandards": { + "vocabularyEncodingScheme": "ControlledVocabularyEnum['LOCAL','OPCS4','READ','SNOMED CT','SNOMED RT','DM PLUS D','DM+D','NHS NATIONAL CODES','NHS SCOTLAND NATIONAL CODES','NHS WALES NATIONAL CODES','ODS','LOINC','ICD10','ICD10CM','ICD10PCS','ICD9CM','ICD9','ICDO3','AMT','APC','ATC','CIEL','HPO','CPT4','DPD','DRG','HEMONC','JMDC','KCD7','MULTUM','NAACCR','NDC','NDFRT','OXMIS','RXNORM','RXNORM EXTENSION','SPL','OTHER']", + "conformsTo": "StandardisedDataModelsEnum['HL7 FHIR','HL7 V2','HL7 CDA','HL7 CCOW','LOINC','DICOM','I2B2','IHE','OMOP','OPENEHR','SENTINEL','PCORNET','CDISC','NHS DATA DICTIONARY','NHS SCOTLAND DATA DICTIONARY','NHS WALES DATA DICTIONARY','LOCAL','OTHER']", + "language": "LanguageEnum['aa','ab','ae','af','ak','am','an','ar','as','av','ay','az','ba','be','bg','bh','bi','bm','bn','bo','br','bs','ca','ce','ch','co','cr','cs','cu','cv','cy','da','de','dv','dz','ee','el','en','eo','es','et','eu','fa','ff','fi','fj','fo','fr','fy','ga','gd','gl','gn','gu','gv','ha','he','hi','ho','hr','ht','hu','hy','hz','ia','id','ie','ig','ii','ik','io','is','it','iu','ja','jv','ka','kg','ki','kj','kk','kl','km','kn','ko','kr','ks','ku','kv','kw','ky','la','lb','lg','li','ln','lo','lt','lu','lv','mg','mh','mi','mk','ml','mn','mr','ms','mt','my','na','nb','nd','ne','ng','nl','nn','no','nr','nv','ny','oc','oj','om','or','os','pa','pi','pl','ps','pt','qu','rm','rn','ro','ru','rw','sa','sc','sd','se','sg','si','sk','sl','sm','sn','so','sq','sr','ss','st','su','sv','sw','ta','te','tg','th','ti','tk','tl','tn','to','tr','ts','tt','tw','ty','ug','uk','ur','uz','ve','vi','vo','wa','wo','xh','yi','yo','za','zh','zu']", + "format": "Format[{'minLength': 1, 'type': 'string'}]" + } + }, + "enrichmentAndLinkage": { + "derivedFrom": { + "pid": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", + "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", + "url": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null" + }, + "isPartOf": { + "pid": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", + "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", + "url": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null" + }, + "linkableDatasets": { + "pid": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", + "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", + "url": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null" + }, + "similarToDatasets": { + "pid": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", + "title": "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}] | null", + "url": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}] | null" + }, + "investigations": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "tools": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "publicationAboutDataset": "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "publicationUsingDataset": "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]" + }, + "observations": { + "observedNode": "StatisticalPopulationConstrainedV2['Persons','Events','Findings','Number of scans per modality']", + "measuredValue": "int", + "disambiguatingDescription": "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}] | null", + "observationDate": "date | datetime", + "measuredProperty": "MeasuredProperty[{}]" + }, + "structuralMetadata": { + "tables": { + "name": "str | null", + "description": "str | null", + "columns": { + "name": "Name[{}]", + "dataType": "str", + "description": "str | null", + "sensitive": "bool", + "values": { + "name": "Name[{}]", + "description": "str | null", + "frequency": "int | null" + } + } + }, + "syntheticDataWebLink": "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + }, + "demographicFrequency": { + "age": { + "bin": "AgeEnum['0-6 days','7-27 days','1-11 months','1-4 years','5-9 years','10-14 years','15-19 years','20-24 years','25-29 years','30-34 years','35-39 years','40-44 years','45-49 years','50-54 years','55-59 years','60-64 years','65-69 years','70-74 years','75-79 years','80-84 years','85-89 years','90-94 years','95-99 years','100+ years']", + "count": "int" + }, + "ethnicity": { + "bin": "EthnicityEnum['White - British','White - Irish','White - Any other White background','Mixed - White and Black Caribbean','Mixed - White and Black African','Mixed - White and Asian','Mixed - Any other mixed background','Asian or Asian British - Indian','Asian or Asian British - Pakistani','Asian or Asian British - Bangladeshi','Asian or Asian British - Any other Asian background','Black or Black British - Caribbean','Black or Black British - African','Black or Black British - Any other Black background','Other Ethnic Groups - Chinese','Other Ethnic Groups - Any other ethnic group','Not stated','Not known']", + "count": "int" + }, + "disease": { + "diseaseCode": "str | int", + "diseaseCodeVocabulary": "DiseaseCodeEnum['ICD10','SNOMED CT','MeSH']", + "count": "int" + } + }, + "omics": { + "assay": "Assay['NMR spectroscopy','Mass-spectrometry','Whole genome sequencing','Exome sequencing','Genotyping by array','Transcriptome profiling by high-throughput sequencing','Transcriptome profiling by array','Amplicon sequencing','Methylation binding domain sequencing','Methylation profiling by high-throughput sequencing','Genomic variant calling','Chromatin accessibility profiling by high-throughput sequencing','Histone modification profiling by high-throughput sequencing','Chromatin immunoprecipitation sequencing','Whole genome shotgun sequencing','Whole transcriptome sequencing','Targeted mutation analysis'] | null", + "platform": "Platform['Other','NMR Nightingale','Metabolon','Biocrates','Illumina','Oxford Nanopore','454','Hi-C','HiFi'] | null" + } +} \ No newline at end of file diff --git a/hdr_schemata/models/HDRUK/2.1.3/schema.json b/hdr_schemata/models/HDRUK/2.1.3/schema.json index d5cc01d..513ce9b 100644 --- a/hdr_schemata/models/HDRUK/2.1.3/schema.json +++ b/hdr_schemata/models/HDRUK/2.1.3/schema.json @@ -1707,6 +1707,9 @@ } ], "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "examples": [ + "Continuous" + ], "guidance": "Please indicate the frequency of publishing.\\n- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.\\n- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.\\n- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.\\n- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.\\n- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.\\n- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/.\\n\\n Options:\\n- **Static**: Dataset published once.\\n- **Irregular**: Dataset published at uneven intervals.\\n- **Continuous**: Dataset published without interruption.\\n- **Biennial**: Dataset published every two years.\\n- **Annual**: Dataset published occurs once a year.\\n- **Biannual**: Dataset published twice a year.\\n- **Quarterly**: Dataset published every three months.\\n- **Bimonthly**: Dataset published every two months.\\n- **Monthly**: Dataset published once a month.\\n- **Biweekly**: Dataset published every two weeks.\\n- **Weekly**: Dataset published once a week.\\n- **Twice weekly**: Dataset published twice a week.\\n- **Daily**: Dataset published once a day.\\n- **Other**: Dataset published using other interval.", "title": "Publishing Frequency" } diff --git a/hdr_schemata/models/HDRUK/2.2.0/schema.json b/hdr_schemata/models/HDRUK/2.2.0/schema.json index 08b4e5c..cdc3dcd 100644 --- a/hdr_schemata/models/HDRUK/2.2.0/schema.json +++ b/hdr_schemata/models/HDRUK/2.2.0/schema.json @@ -1865,6 +1865,9 @@ } ], "description": "Summary population size of the cohort", + "examples": [ + 1000 + ], "guidance": "This number informs a filter for Researchers to differentiate dataset search results based on the number of people in the dataset, and does not pull from the **Observations** fields. The filter also allows for Researchers to search datasets which have no population size reported, but will **not** pull any population size captured in the **Observations** section.", "title": "Population size" } diff --git a/hdr_schemata/models/HDRUK/2.2.1/schema.json b/hdr_schemata/models/HDRUK/2.2.1/schema.json index b8e7a76..51da26a 100644 --- a/hdr_schemata/models/HDRUK/2.2.1/schema.json +++ b/hdr_schemata/models/HDRUK/2.2.1/schema.json @@ -945,7 +945,8 @@ ], "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", "examples": [ - "LOCAL,NHS DATA DICTIONARY" + "LOCAL", + "NHS DATA DICTIONARY" ], "guidance": "- List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.\\n- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.\\n- **HL7 FHIR**: .\\n- **HL7 V2**: .\\n- **HL7 CDA**: .\\n- **HL7 CCOW**: .\\n- **DICOM**: .\\n- **I2B2**: .\\n- **IHE**: .\\n- **OMOP**: .\\n- **openEHR**: .\\n- **Sentinel**: .\\n- **PCORnet**: .\\n- **CDISC**: .\\n- **Local**: In-house developed data model.\\n- **Other**: Other standardised data model.\\n- **NHS Data Dictionary**: .\\n- **NHS Scotland Data Dictionary**: .\\n- **NHS Wales Data Dictionary**: .", "title": "Alignment with standardised data models" @@ -1343,6 +1344,9 @@ } ], "description": "Descriptive term for the observation property measured. For example, people, procedures, x-rays, or diagnosis of type 1 diabetes. This could also be a specific SNOMED CT term.", + "examples": [ + "Count" + ], "guidance": "Descriptive term for the observation property measured.", "title": "Measured property" } diff --git a/hdr_schemata/models/HDRUK/3.0.0/schema.json b/hdr_schemata/models/HDRUK/3.0.0/schema.json index cfe83bb..e60bd98 100644 --- a/hdr_schemata/models/HDRUK/3.0.0/schema.json +++ b/hdr_schemata/models/HDRUK/3.0.0/schema.json @@ -23,6 +23,9 @@ } ], "description": "Please provide details for the data access rights.", + "examples": [ + "In Progress" + ], "guidance": "The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both separated by a comma.- If such a resource or the underlying process doesn\u2019t exist, please provide \u201cIn Progress\u201d, until both the process and the documentation are ready.", "title": "Access rights" }, @@ -211,9 +214,15 @@ "$ref": "#/$defs/AgeEnum" } ], + "examples": [ + "30-34 years" + ], "title": "Age grouping" }, "count": { + "examples": [ + 1000 + ], "title": "Age count", "type": "integer" } @@ -351,6 +360,7 @@ ], "description": "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:- For locations in the UK: ONS standards- For locations in other countries: ISO 3166-1 & ISO 3166-2", "examples": [ + "United Kingdom", "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" ], "guidance": "The geographical area covered by the dataset.- Please provide a valid location.- For locations in the UK, this location should conform to ONS standards.- For locations in other countries we use ISO 3166-1 & ISO 3166-2.", @@ -873,6 +883,9 @@ "type": "integer" } ], + "examples": [ + "J45" + ], "title": "Disease code" }, "diseaseCodeVocabulary": { @@ -881,9 +894,15 @@ "$ref": "#/$defs/DiseaseCodeEnum" } ], + "examples": [ + "ICD10" + ], "title": "Disease code vocabulary" }, "count": { + "examples": [ + 1000 + ], "title": "Disease count", "type": "integer" } @@ -915,6 +934,9 @@ } ], "description": "A free-text description of the dataset.A URL can also be provided as the description of the dataset.Gateway Feature: Keywords and text may be extracted out of the description and indexed for search.", + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], "guidance": "An HTML account of the data that provides context and scope of the data, limited to 10000 characters, and/or a resolvable URL that describes the dataset.- Additional information can be recorded and included using the Associated media field." }, "associatedMedia": { @@ -1155,9 +1177,15 @@ "$ref": "#/$defs/EthnicityEnum" } ], + "examples": [ + "Black or Black British - Any other Black background" + ], "title": "Ethnicity grouping" }, "count": { + "examples": [ + 1000 + ], "title": "Ethnicity count", "type": "integer" } @@ -1216,6 +1244,10 @@ "properties": { "vocabularyEncodingScheme": { "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", + "examples": [ + "LOCAL", + "ICD10" + ], "guidance": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.- Notes: More than one vocabulary may be provided.- Local: Local Coding Standard.- OPCS4: https://www.datadictionary.nhs.uk/web_site_content/supporting_information/clinical_coding/opcs_classification_of_interventions_and_procedures.asp.- READ: https://digital.nhs.uk/services/terminology-and-classifications/read-codes.- SNOMED CT: http://www.snomed.org/.- SNOMED RT: https://confluence.ihtsdotools.org/display/DOCGLOSS/SNOMED+RT.- DM+D: https://digital.nhs.uk/data-and-information/information-standards/information-standards-and-data-collections-including-extractions/publications-and-notifications/standards-and-collections/scci0052-dictionary-of-medicines-and-devices-dm-d.- NHS National Codes: https://www.datadictionary.nhs.uk/.- ODS: https://digital.nhs.uk/services/organisation-data-service.- LOINC: https://loinc.org/.- ICD10: https://www.who.int/classifications/icd/icdonlineversions/en/.- ICD10CM: https://www.cdc.gov/nchs/icd/icd10cm.htm.- ICD10PCS: https://ec.europa.eu/eip/ageing/standards/healthcare/e-health/icd-10-pcs_en.- ICD9CM: https://www.cdc.gov/nchs/icd/icd9cm.htm.- ICD9: https://www.cdc.gov/nchs/icd/icd9.htm.- ICDO3: https://www.who.int/classifications/icd/adaptations/oncology/en/.- AMT: https://www.digitalhealth.gov.au/about-the-agency/tenders-and-offers/community-pharmacy-software-industry-partnership-offer/Webinar%20-%20Australian%20Medicines%20Terminology%20(AMT)%20and%20Implementation%20Options%2001032017.pdf.- APC: https://www.acep.org/administration/reimbursement/reimbursement-faqs/apc-ambulatory-payment-classifications-faq/.- ATC: https://www.whocc.no/atc_ddd_index/.- CIEL: https://github.com/OpenConceptLab/ocl_web/wiki/CIEL.- HPO: https://hpo.jax.org/app/.- CPT4: https://www.cms.gov/Regulations-and-Guidance/Legislation/CLIA/Downloads/SubjecttoCLIA.pdf.- DPD: https://health-products.canada.ca/dpd-bdpp/index-eng.jsp.- DRG: http://www.euro.who.int/__data/assets/pdf_file/0004/162265/e96538.pdf.- HEMONC: https://hemonc.org/wiki/Main_Page.- JMDC: https://www.jmdc.co.jp/en/.- KCD7: https://forums.ohdsi.org/t/adding-kcd7-code-korean-icd-10-to-the-omop-vocabulary/7576.- MULTUM: https://www.cerner.com/solutions/drug-database.- NAACCR: https://www.naaccr.org/.- NDC: https://www.fda.gov/drugs/drug-approvals-and-databases/national-drug-code-directory.- NDFRT <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.- OXMIS: https://oxrisk.com/oxmis/.- RXNORM: https://www.nlm.nih.gov/research/umls/rxnorm/index.html.- RXNORM EXTENSION: https://www.nlm.nih.gov/research/umls/rxnorm/index.html.- SPL: https://www.fda.gov/industry/fda-resources-data-standards/structured-product-labeling-resources.- Other: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.- NHS Scotland National Codes: https://www.ndc.scot.nhs.uk/Data-Dictionary/.- NHS Wales National Codes: http://www.datadictionary.wales.nhs.uk/", "items": { "$ref": "#/$defs/ControlledVocabularyEnum" @@ -1226,7 +1258,8 @@ "conformsTo": { "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", "examples": [ - "LOCAL,NHS DATA DICTIONARY" + "LOCAL", + "NHS DATA DICTIONARY" ], "guidance": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.- HL7 FHIR: https://www.hl7.org/fhir/.- HL7 V2: https://www.hl7.org/implement/standards/product_section.cfm?section=13.- HL7 CDA: https://www.hl7.org/implement/standards/product_section.cfm?section=10.- HL7 CCOW: https://www.hl7.org/implement/standards/product_section.cfm?section=16.- DICOM: https://www.dicomstandard.org/.- I2B2: https://www.i2b2.org/.- IHE: https://www.ihe.net/resources/profiles/.- OMOP: https://www.ohdsi.org/data-standardization/the-common-data-model/.- openEHR: https://www.openehr.org/.- Sentinel: https://www.sentinelinitiative.org/sentinel/data/distributed-database-common-data-model.- PCORnet: https://pcornet.org/data-driven-common-model/.- CDISC: https://www.cdisc.org/standards/data-exchange/odm.- Local: In-house developed data model.- Other: Other standardised data model.- NHS Data Dictionary: https://www.datadictionary.nhs.uk/.- NHS Scotland Data Dictionary: https://www.ndc.scot.nhs.uk/Data-Dictionary/.- NHS Wales Data Dictionary: https://www.datadictionary.wales.nhs.uk/.", "items": { @@ -1237,6 +1270,9 @@ }, "language": { "description": "This should list all the languages in which the dataset metadata and underlying data is made available complaint with ISO 639.", + "examples": [ + "en" + ], "guidance": "https://www.iso.org/iso-639-language-code- aa: Afar- ab: Abkhazian- af: Afrikaans- ak: Akan- sq: Albanian- am: Amharic- ar: Arabic- an: Aragonese- hy: Armenian- as: Assamese- av: Avaric- ae: Avestan- ay: Aymara- az: Azerbaijani- ba: Bashkir- bm: Bambara- eu: Basque- be: Belarusian- bn: Bengali- bh: Bihari languages- bi: Bislama- bo: Tibetan- bs: Bosnian- br: Breton- bg: Bulgarian- my: Burmese- ca: Catalan; Valencian- cs: Czech- ch: Chamorro- ce: Chechen- zh: Chinese- cu: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic- cv: Chuvash- kw: Cornish- co: Corsican- cr: Cree- cy: Welsh- cs: Czech- da: Danish- de: German- dv: Divehi; Dhivehi; Maldivian- nl: Dutch; Flemish- dz: Dzongkha- el: Greek, Modern (1453-)- en: English- eo: Esperanto- et: Estonian- eu: Basque- ee: Ewe- fo: Faroese- fa: Persian- fj: Fijian- fi: Finnish- fr: French- fy: Western Frisian- ff: Fulah- ka: Georgian- de: German- gd: Gaelic; Scottish Gaelic- ga: Irish- gl: Galician- gv: Manx- el: Greek, Modern (1453-)- gn: Guarani- gu: Gujarati- ht: Haitian; Haitian Creole- ha: Hausa- ho: Hiri Motu- hr: Croatian- hu: Hungarian- hy: Armenian- ig: Igbo- is: Icelandic- io: Ido- ii: Sichuan Yi; Nuosu- iu: Inuktitut- ie: Interlingue; Occidental- ia: Interlingua (International Auxiliary Language Association)- id: Indonesian- ik: Inupiaq- is: Icelandic- it: Italian- jv: Javanese- ja: Japanese- kl: Kalaallisut; Greenlandic- kn: Kannada- ks: Kashmiri- ka: Georgian- kr: Kanuri- kk: Kazakh- km: Central Khmer- ki: Kikuyu; Gikuyu- rw: Kinyarwanda- ky: Kirghiz; Kyrgyz- kv: Komi- kg: Kongo- ko: Korean- kj: Kuanyama; Kwanyama- ku: Kurdish- lo: Lao- la: Latin- lv: Latvian- li: Limburgan; Limburger; limburgish- ln: Lingala- lt: Lithuanian- lb: Luxembourgish; Letzeburgesch- lu: Luba-Katanga- lg: Ganda- mk: Macedonian- mh: Marshallese- ml: Malayalam- mi: Maori- mr: Marathi- ms: Malay- mk: Macedonian- mg: Malagasy- mt: Maltese- mn: Mongolian- mi: Maori- ms: Malay- my: Burmese- na: Nauru- nv: Navajo; Navaho- nr: Ndebele, South; South Ndebele- nd: Ndebele, North; North Ndebele- ng: Ndonga- ne: Nepali- nl: Dutch; Flemish- nn: Norwegian Nynorsk; Nynorsk, Norwegian- nb: Bokm\u00e5l, Norwegian; Norwegian Bokm\u00e5l- no: Norwegian- ny: Chichewa; Chewa; Nyanja- oc: Occitan (post 1500)- oj: Ojibwa- or: Oriya- om: Oromo- os: Ossetian; Ossetic- pa: Panjabi; Punjabi- fa: Persian- pi: Pali- pl: Polish- pt: Portuguese- ps: Pushto; Pashto- qu: Quechua- rm: Romansh- ro: Romanian; Moldavian; Moldovan- rn: Rundi- ru: Russian- sg: Sango- sa: Sanskrit- si: Sinhala; Sinhalese- sk: Slovak- sl: Slovenian- se: Northern Sami- sm: Samoan- sn: Shona- sd: Sindhi- so: Somali- st: Sotho, Southern- es: Spanish; Castilian- sq: Albanian- sc: Sardinian- sr: Serbian- ss: Swati- su: Sundanese- sw: Swahili- sv: Swedish- ty: Tahitian- ta: Tamil- tt: Tatar- te: Telugu- tg: Tajik- tl: Tagalog- th: Thai- bo: Tibetan- ti: Tigrinya- to: Tonga (Tonga Islands)- tn: Tswana- ts: Tsonga- tk: Turkmen- tr: Turkish- tw: Twi- ug: Uighur; Uyghur- uk: Ukrainian- ur: Urdu- uz: Uzbek- ve: Venda- vi: Vietnamese- vo: Volap\u00fck- cy: Welsh- wa: Walloon- wo: Wolof- xh: Xhosa- yi: Yiddish- yo: Yoruba- za: Zhuang; Chuang- zh: Chinese- zu: Zulu", "items": { "$ref": "#/$defs/LanguageEnum" @@ -1541,6 +1577,9 @@ }, "measuredValue": { "description": "An integer value size of the measured property, such as \u20181000\u2019 for 1000 people in the study or \u201887\u2019 for 87 MRI scans in the dataset.", + "examples": [ + 1000 + ], "guidance": "An integer value size of the measured property, such as \u20181000\u2019 for 1000 people in the study or \u201887\u2019 for 87 MRI scans in the dataset.", "title": "Measured value", "type": "integer" @@ -1571,6 +1610,9 @@ } ], "description": "Provide the date, or datetime that the observation was made. Multiple observations of the same property can be provided, for example an observation of cumulative COVID positive cases by specimen on the 1/1/2021 with a measuredValue of 2000000, and a second observation entry on 8/2/2021 recording a measuredValue of as 3100000.", + "examples": [ + "2024-10-24" + ], "guidance": "Provide the date, or datetime that the observation was made. Multiple observations of the same property can be provided, for example an observation of cumulative COVID positive cases by specimen on the 1/1/2021 with a measuredValue of 2000000, and a second observation entry on 8/2/2021 recording a measuredValue of as 3100000.", "title": "Observation date" }, @@ -1581,6 +1623,9 @@ } ], "description": "Descriptive term for the observation property measured. For example, people, procedures, x-rays, or diagnosis of type 1 diabetes. This could also be a specific SNOMED CT term.", + "examples": [ + "Count" + ], "guidance": "Descriptive term for the observation property measured.", "title": "Measured property" } @@ -1650,6 +1695,9 @@ } ], "description": "Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation.", + "examples": [ + "30f16703-28bc-4f45-9ce5-625d2d3db27d" + ], "guidance": "Example: https://ror.org/053fq8t95If your organisation does not have a ROR identifier please use the \u201csuggest and institute\u201d function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform", "title": "identifier" }, @@ -1660,6 +1708,9 @@ } ], "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata.", + "examples": [ + "Health Data Research UK" + ], "guidance": "In most this will be the same as the Team you have on the Gateway. However, in some cases this will be different. For example, Tissue Directory are a Team on the Gateway but coordinate activities across a number of Data Providers such as Cambridge Blood and Stem Cell Biobank.", "title": "Name of data provider" }, @@ -1702,6 +1753,9 @@ } ], "description": "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata.", + "examples": [ + "test@test.co.uk" + ], "title": "contact point" }, "memberOf": { @@ -1748,6 +1802,9 @@ }, "datasetType": { "description": "The topic areas to which the dataset content relates.", + "examples": [ + "Health and disease" + ], "guidance": "Types include those listed below. Datasets can have more than one type associated.- Health and disease: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.- Treatments/Interventions: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.- Measurements/Tests: Includes any data related to laboratory or other diagnostics.- Imaging types: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.- Imaging area of the body: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.- Omics: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics.- Socioeconomic: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.- Lifestyle: Includes any data related to smoking, physical activity, dietary habits or alcohol.- Registry: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.- Environment and energy: Includes any data related to the monitoring or study of environmental or energy factors or events.- Information and communication: Includes any data related to the study or application of information and communication.- Politics: Includes any data related to political views, activities, voting, etc.", "items": { "$ref": "#/$defs/DatasetTypeV2" @@ -2131,6 +2188,9 @@ }, "populationSize": { "description": "Input the number of people captured within the dataset.", + "examples": [ + 1000 + ], "guidance": "This number informs a filter for Researchers to differentiate dataset search results based on the number of people in the dataset, and does not pull from the Observations fields. The filter also allows for Researchers to search datasets which have no population size reported, but will not pull any population size captured in the Observations section.", "title": "Dataset population size", "type": "integer" @@ -2185,7 +2245,7 @@ ], "description": "Please provide a valid email address that can be used to coordinate data access requests.", "examples": [ - "SAILDatabank@swansea.ac.uk" + "gateway@hdruk.ac.uk" ], "guidance": "Organisations are expected to provide a dedicated email address associated with the data access request process. If no contact point is provided in this field, this field will be defaulted to the teams support email provided in the teams setting.Note: An employee's email address can only be provided on a temporary basis and if one is provided, you must obtain explicit consent for this purpose.", "title": "Contact point" @@ -2238,6 +2298,9 @@ } ], "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "examples": [ + "Continuous" + ], "guidance": "Please indicate the frequency of publishing.- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/. Options:- Static: Dataset published once.- Irregular: Dataset published at uneven intervals.- Continuous: Dataset published without interruption.- Biennial: Dataset published every two years.- Annual: Dataset published occurs once a year.- Biannual: Dataset published twice a year.- Quarterly: Dataset published every three months.- Bimonthly: Dataset published every two months.- Monthly: Dataset published once a month.- Biweekly: Dataset published every two weeks.- Weekly: Dataset published once a week.- Twice weekly: Dataset published twice a week.- Daily: Dataset published once a day.- Other: Dataset published using other interval.", "title": "Publishing frequency" }, @@ -2257,6 +2320,9 @@ ], "default": null, "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "guidance": "Please indicate the frequency the dataset is published.- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.- If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null.- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", "title": "Distribution release date" }, @@ -2272,6 +2338,9 @@ } ], "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "guidance": "The start of the time period that the dataset provides coverage for.- If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", "title": "Start date" }, @@ -2294,6 +2363,9 @@ ], "default": null, "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "guidance": "The end of the time period that the dataset provides coverage for.- If the dataset is \u201cContinuous\u201d and has no known end date, please leave blank.- If there are multiple cohorts in the dataset with varying end dates, please provide the latest date.", "title": "End date" }, @@ -2304,6 +2376,9 @@ } ], "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.", + "examples": [ + "Not applicable" + ], "guidance": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.- Less than 1 week: Typical time lag of less than a week.- 1-2 weeks: Typical time-lag of one to two weeks.- 2-4 weeks: Typical time-lag of two to four weeks.- 1-2 months: Typical time-lag of one to two months.- 2-6 months: Typical time-lag of two to six months.- 6 months plus: Typical time-lag of more than six months.- Variable: Variable time-lag.- Not applicable: Not Applicable i.e. static dataset.- Other: Other time-lag.", "title": "Time lag" } @@ -2489,12 +2564,18 @@ }, "issued": { "description": "Datetime stamp of when this metadata version was initially issued", + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "format": "date-time", "title": "Metadata Issued Datetime", "type": "string" }, "modified": { "description": "Datetime stamp of when this metadata was last modified", + "examples": [ + "2024-10-24T00:00:00.000Z" + ], "format": "date-time", "title": "Last Modified Datetime", "type": "string" diff --git a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml index 3b17b56..df8a19f 100644 --- a/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml +++ b/hdr_schemata/models/HDRUK/v3_0_0/annotations/config.yaml @@ -1,3 +1,9 @@ +issued: + examples: + - "2024-10-24T00:00:00.000Z" +modified: + examples: + - "2024-10-24T00:00:00.000Z" summary: dataCustodian: title: "Dataset Custodian" @@ -5,7 +11,7 @@ summary: contactPoint: guidance: "Organisations are expected to provide a dedicated email address associated with the data access request process. If no contact point is provided in this field, this field will be defaulted to the teams support email provided in the teams setting.\\n**Note:** An employee's email address can only be provided on a temporary basis and if one is provided, **you must obtain explicit consent for this purpose**." examples: - - "SAILDatabank@swansea.ac.uk" + - "gateway@hdruk.ac.uk" title: "Contact point" description: "Please provide a valid email address that can be used to coordinate data access requests." doiName: @@ -18,6 +24,8 @@ summary: guidance: "This number informs a filter for Researchers to differentiate dataset search results based on the number of people in the dataset, and does not pull from the **Observations** fields. The filter also allows for Researchers to search datasets which have no population size reported, but will **not** pull any population size captured in the **Observations** section." title: "Dataset population size" description: "Input the number of people captured within the dataset." + examples: + - 1000 keywords: guidance: "- Please provide **relevant** and **specific keywords** that can **improve the search engine optimization** of your dataset.\\n- Please **enter one keyword at a time** and click **Add New Field** to add further keywords.\\n- Text from the title is automatically included in the search, there is no need to include this in the keywords.\\n- Include words that researcher may include in their searches." title: "Keywords" @@ -47,10 +55,14 @@ summary: guidance: "**Example**: https://ror.org/053fq8t95\\nIf your organisation does not have a ROR identifier please use the “suggest and institute” function here: https://docs.google.com/forms/d/e/1FAIpQLSdJYaMTCwS7muuTa-B_CnAtCSkKzt19lkirAKG4u7umH9Nosg/viewform" title: "identifier" description: "Please provide a Research Organization Registry (ROR) identifier (see https://ror.org/) for your organisation." + examples: + - "30f16703-28bc-4f45-9ce5-625d2d3db27d" name: guidance: In most this will be the same as the Team you have on the Gateway. However, in some cases this will be different. For example, Tissue Directory are a Team on the Gateway but coordinate activities across a number of Data Providers such as Cambridge Blood and Stem Cell Biobank. title: "Name of data provider" description: "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata." + examples: + - "Health Data Research UK" logo: title: "Organisation Logo" description: "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. If a logo is not submitted this will default to the logo for the team submitting the metadata." @@ -60,6 +72,8 @@ summary: contactPoint: title: "contact point" description: "Organisation contact point(s) which will be used for receiving queries from HDR, and enquiries and data access requests from Researchers. If a contact point is not provided this will default to the contact point for the team submitting the metadata." + examples: + - "test@test.co.uk" memberOf: title: "Organisation Membership" description: "Please indicate if the organisation is an Alliance Member or a Hub. If this field is not submitted this will default to the membership for the team submitting the metadata." @@ -77,22 +91,32 @@ provenance: guidance: "- Please indicate the frequency the dataset is published.\\n- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.\\n- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.\\n- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.\\n- If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null.\\n- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.\\n- Notes: see [https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/](https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/)" title: "Distribution release date" description: "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020." + examples: + - "2024-10-24T00:00:00.000Z" startDate: guidance: "- The start of the time period that the dataset provides coverage for.\\n- If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information." title: "Start date" description: "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information." + examples: + - "2024-10-24T00:00:00.000Z" timeLag: guidance: "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset.\\n- **Less than 1 week**: Typical time lag of less than a week.\\n- **1-2 weeks**: Typical time-lag of one to two weeks.\\n- **2-4 weeks**: Typical time-lag of two to four weeks.\\n- **1-2 months**: Typical time-lag of one to two months.\\n- **2-6 months**: Typical time-lag of two to six months.\\n- **6 months plus**: Typical time-lag of more than six months.\\n- **Variable**: Variable time-lag.\\n- **Not applicable**: Not Applicable i.e. static dataset.\\n- **Other**: Other time-lag." title: "Time lag" description: "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset." + examples: + - "Not applicable" endDate: guidance: "- The end of the time period that the dataset provides coverage for.\\n- If the dataset is **“Continuous”** and has no known end date, **please leave blank**.\\n- If there are **multiple cohorts** in the dataset with varying end dates, please provide the **latest date**." title: "End date" description: "The end of the time period that the dataset provides coverage for. If the dataset is “Continuous” and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information." + examples: + - "2024-10-24T00:00:00.000Z" publishingFrequency: guidance: "Please indicate the frequency of publishing.\\n- If a dataset is published regularly please choose a publishing periodicity from the constrained list and indicate the next release date.\\n- When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.\\n- If a dataset has been published and will remain static please indicate that it is static and indicate when it was released.\\n- If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null.\\n- If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null.\\n- Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/.\\n\\n Options:\\n- **Static**: Dataset published once.\\n- **Irregular**: Dataset published at uneven intervals.\\n- **Continuous**: Dataset published without interruption.\\n- **Biennial**: Dataset published every two years.\\n- **Annual**: Dataset published occurs once a year.\\n- **Biannual**: Dataset published twice a year.\\n- **Quarterly**: Dataset published every three months.\\n- **Bimonthly**: Dataset published every two months.\\n- **Monthly**: Dataset published once a month.\\n- **Biweekly**: Dataset published every two weeks.\\n- **Weekly**: Dataset published once a week.\\n- **Twice weekly**: Dataset published twice a week.\\n- **Daily**: Dataset published once a day.\\n- **Other**: Dataset published using other interval." title: "Publishing frequency" description: "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/" + examples: + - "Continuous" origin: title: "Origin Coverage" description: "Coverage by origin (geographical and situations)." @@ -108,6 +132,8 @@ provenance: guidance: "Types include those listed below. Datasets can have more than one type associated.\\n- **Health and disease**: Includes any data related to mental health, cardiovascular, cancer, rare diseases, metabolic and endocrine, neurological, reproductive, maternity and neonatology, respiratory, immunity, musculoskeletal, vision, renal and urogenital, oral and gastrointestinal, cognitive function or hearing.\\n- **Treatments/Interventions**: Includes any data related to treatment or interventions related to vaccines or which are preventative or therapeutic in nature.\\n- **Measurements/Tests**: Includes any data related to laboratory or other diagnostics.\\n- **Imaging types**: Includes any data related to CT, MRI, PET, x-ray, ultrasound or pathology imaging.\\n- **Imaging area of the body**: Indicates whether the dataset relates to head, chest, arm abdomen or leg imaging.\\n- **Omics**: Includes any data related to proteomics, transcriptomics, epigenomics, metabolomics, multiomics, metagenomics or genomics.\\n- **Socioeconomic**: Includes any data related to education, crime and justice, ethnicity, housing, labour, ageing, economics, marital status, social support, deprivation, religion, occupation, finances or family circumstances.\\n- **Lifestyle**: Includes any data related to smoking, physical activity, dietary habits or alcohol.\\n- **Registry**: Includes any data related to disease registries for research, national disease registries, audits, or birth and deaths records.\\n- **Environment and energy**: Includes any data related to the monitoring or study of environmental or energy factors or events.\\n- **Information and communication**: Includes any data related to the study or application of information and communication.\\n- **Politics**: Includes any data related to political views, activities, voting, etc." title: "Dataset type" description: "The topic areas to which the dataset content relates." + examples: + - "Health and disease" source: guidance: " - **EPR**: Data Extracted from Electronic Patient Record.\\n- **Electronic survey**: Data has been extracted from electronic surveys.\\n- **LIMS**: Data has been extracted from a laboratory information management system.\\n- **Paper-based**: Data has been extracted from paper forms.\\n- **Free text NLP**: Data has been extracted from unstructured freetext using natural language processing.\\n- **Machine generated**: Data has been machine generated i.e. imaging.\\n- **Other**: Data has been extracted by other means." title: "Source of data extraction" @@ -138,14 +164,20 @@ observations: guidance: "Descriptive term for the observation property measured." title: "Measured property" description: "Descriptive term for the observation property measured. For example, people, procedures, x-rays, or diagnosis of type 1 diabetes. This could also be a specific SNOMED CT term." + examples: + - "Count" measuredValue: guidance: "An integer value size of the measured property, such as ‘1000’ for 1000 people in the study or ‘87’ for 87 MRI scans in the dataset." title: "Measured value" description: "An integer value size of the measured property, such as ‘1000’ for 1000 people in the study or ‘87’ for 87 MRI scans in the dataset." + examples: + - 1000 observationDate: guidance: "Provide the date, or datetime that the observation was made. Multiple observations of the same property can be provided, for example an observation of cumulative COVID positive cases by specimen on the 1/1/2021 with a measuredValue of 2000000, and a second observation entry on 8/2/2021 recording a measuredValue of as 3100000." title: "Observation date" description: "Provide the date, or datetime that the observation was made. Multiple observations of the same property can be provided, for example an observation of cumulative COVID positive cases by specimen on the 1/1/2021 with a measuredValue of 2000000, and a second observation entry on 8/2/2021 recording a measuredValue of as 3100000." + examples: + - "2024-10-24" structuralMetadata: tables: _description: "Tables in the dataset" @@ -243,6 +275,8 @@ accessibility: guidance: "- The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both separated by a comma.\\n- If such a resource or the underlying process doesn’t exist, please provide “In Progress”, until both the process and the documentation are ready." title: "Access rights" description: "Please provide details for the data access rights." + examples: + - "In Progress" accessServiceCategory: guidance: "Select the category which best matches how a Researcher will access the dataset, if approved for access. If the access method changes based on the data required for the project (e.g. the dataset can be shared via secure email if the extract is fully anonymised, but must be accessed via a TRE/SDE if the extract is only pseudonymised) then select 'varies based on project'." examples: @@ -273,14 +307,22 @@ accessibility: guidance: "https://www.iso.org/iso-639-language-code\\n- **aa**: Afar\\n- **ab**: Abkhazian\\n- **af**: Afrikaans\\n- **ak**: Akan\\n- **sq**: Albanian\\n- **am**: Amharic\\n- **ar**: Arabic\\n- **an**: Aragonese\\n- **hy**: Armenian\\n- **as**: Assamese\\n- **av**: Avaric\\n- **ae**: Avestan\\n- **ay**: Aymara\\n- **az**: Azerbaijani\\n- **ba**: Bashkir\\n- **bm**: Bambara\\n- **eu**: Basque\\n- **be**: Belarusian\\n- **bn**: Bengali\\n- **bh**: Bihari languages\\n- **bi**: Bislama\\n- **bo**: Tibetan\\n- **bs**: Bosnian\\n- **br**: Breton\\n- **bg**: Bulgarian\\n- **my**: Burmese\\n- **ca**: Catalan; Valencian\\n- **cs**: Czech\\n- **ch**: Chamorro\\n- **ce**: Chechen\\n- **zh**: Chinese\\n- **cu**: Church Slavic; Old Slavonic; Church Slavonic; Old Bulgarian; Old Church Slavonic\\n- **cv**: Chuvash\\n- **kw**: Cornish\\n- **co**: Corsican\\n- **cr**: Cree\\n- **cy**: Welsh\\n- **cs**: Czech\\n- **da**: Danish\\n- **de**: German\\n- **dv**: Divehi; Dhivehi; Maldivian\\n- **nl**: Dutch; Flemish\\n- **dz**: Dzongkha\\n- **el**: Greek, Modern (1453-)\\n- **en**: English\\n- **eo**: Esperanto\\n- **et**: Estonian\\n- **eu**: Basque\\n- **ee**: Ewe\\n- **fo**: Faroese\\n- **fa**: Persian\\n- **fj**: Fijian\\n- **fi**: Finnish\\n- **fr**: French\\n- **fy**: Western Frisian\\n- **ff**: Fulah\\n- **ka**: Georgian\\n- **de**: German\\n- **gd**: Gaelic; Scottish Gaelic\\n- **ga**: Irish\\n- **gl**: Galician\\n- **gv**: Manx\\n- **el**: Greek, Modern (1453-)\\n- **gn**: Guarani\\n- **gu**: Gujarati\\n- **ht**: Haitian; Haitian Creole\\n- **ha**: Hausa\\n- **ho**: Hiri Motu\\n- **hr**: Croatian\\n- **hu**: Hungarian\\n- **hy**: Armenian\\n- **ig**: Igbo\\n- **is**: Icelandic\\n- **io**: Ido\\n- **ii**: Sichuan Yi; Nuosu\\n- **iu**: Inuktitut\\n- **ie**: Interlingue; Occidental\\n- **ia**: Interlingua (International Auxiliary Language Association)\\n- **id**: Indonesian\\n- **ik**: Inupiaq\\n- **is**: Icelandic\\n- **it**: Italian\\n- **jv**: Javanese\\n- **ja**: Japanese\\n- **kl**: Kalaallisut; Greenlandic\\n- **kn**: Kannada\\n- **ks**: Kashmiri\\n- **ka**: Georgian\\n- **kr**: Kanuri\\n- **kk**: Kazakh\\n- **km**: Central Khmer\\n- **ki**: Kikuyu; Gikuyu\\n- **rw**: Kinyarwanda\\n- **ky**: Kirghiz; Kyrgyz\\n- **kv**: Komi\\n- **kg**: Kongo\\n- **ko**: Korean\\n- **kj**: Kuanyama; Kwanyama\\n- **ku**: Kurdish\\n- **lo**: Lao\\n- **la**: Latin\\n- **lv**: Latvian\\n- **li**: Limburgan; Limburger; limburgish\\n- **ln**: Lingala\\n- **lt**: Lithuanian\\n- **lb**: Luxembourgish; Letzeburgesch\\n- **lu**: Luba-Katanga\\n- **lg**: Ganda\\n- **mk**: Macedonian\\n- **mh**: Marshallese\\n- **ml**: Malayalam\\n- **mi**: Maori\\n- **mr**: Marathi\\n- **ms**: Malay\\n- **mk**: Macedonian\\n- **mg**: Malagasy\\n- **mt**: Maltese\\n- **mn**: Mongolian\\n- **mi**: Maori\\n- **ms**: Malay\\n- **my**: Burmese\\n- **na**: Nauru\\n- **nv**: Navajo; Navaho\\n- **nr**: Ndebele, South; South Ndebele\\n- **nd**: Ndebele, North; North Ndebele\\n- **ng**: Ndonga\\n- **ne**: Nepali\\n- **nl**: Dutch; Flemish\\n- **nn**: Norwegian Nynorsk; Nynorsk, Norwegian\\n- **nb**: Bokmål, Norwegian; Norwegian Bokmål\\n- **no**: Norwegian\\n- **ny**: Chichewa; Chewa; Nyanja\\n- **oc**: Occitan (post 1500)\\n- **oj**: Ojibwa\\n- **or**: Oriya\\n- **om**: Oromo\\n- **os**: Ossetian; Ossetic\\n- **pa**: Panjabi; Punjabi\\n- **fa**: Persian\\n- **pi**: Pali\\n- **pl**: Polish\\n- **pt**: Portuguese\\n- **ps**: Pushto; Pashto\\n- **qu**: Quechua\\n- **rm**: Romansh\\n- **ro**: Romanian; Moldavian; Moldovan\\n- **rn**: Rundi\\n- **ru**: Russian\\n- **sg**: Sango\\n- **sa**: Sanskrit\\n- **si**: Sinhala; Sinhalese\\n- **sk**: Slovak\\n- **sl**: Slovenian\\n- **se**: Northern Sami\\n- **sm**: Samoan\\n- **sn**: Shona\\n- **sd**: Sindhi\\n- **so**: Somali\\n- **st**: Sotho, Southern\\n- **es**: Spanish; Castilian\\n- **sq**: Albanian\\n- **sc**: Sardinian\\n- **sr**: Serbian\\n- **ss**: Swati\\n- **su**: Sundanese\\n- **sw**: Swahili\\n- **sv**: Swedish\\n- **ty**: Tahitian\\n- **ta**: Tamil\\n- **tt**: Tatar\\n- **te**: Telugu\\n- **tg**: Tajik\\n- **tl**: Tagalog\\n- **th**: Thai\\n- **bo**: Tibetan\\n- **ti**: Tigrinya\\n- **to**: Tonga (Tonga Islands)\\n- **tn**: Tswana\\n- **ts**: Tsonga\\n- **tk**: Turkmen\\n- **tr**: Turkish\\n- **tw**: Twi\\n- **ug**: Uighur; Uyghur\\n- **uk**: Ukrainian\\n- **ur**: Urdu\\n- **uz**: Uzbek\\n- **ve**: Venda\\n- **vi**: Vietnamese\\n- **vo**: Volapük\\n- **cy**: Welsh\\n- **wa**: Walloon\\n- **wo**: Wolof\\n- **xh**: Xhosa\\n- **yi**: Yiddish\\n- **yo**: Yoruba\\n- **za**: Zhuang; Chuang\\n- **zh**: Chinese\\n- **zu**: Zulu" title: "Language" description: "This should list all the languages in which the dataset metadata and underlying data is made available complaint with ISO 639." + examples: + - "en" vocabularyEncodingScheme: guidance: "- List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset.\\n- If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition.\\n- Notes: More than one vocabulary may be provided.\\n- **Local**: Local Coding Standard.\\n- **OPCS4**: .\\n- **READ**: .\\n- **SNOMED CT**: .\\n- **SNOMED RT**: .\\n- **DM+D**: .\\n- **NHS National Codes**: .\\n- **ODS**: .\\n- **LOINC**: .\\n- **ICD10**: .\\n- **ICD10CM**: .\\n- **ICD10PCS**: .\\n- **ICD9CM**: .\\n- **ICD9**: .\\n- **ICDO3**: .\\n- **AMT**: .\\n- **APC**: .\\n- **ATC**: .\\n- **CIEL**: .\\n- **HPO**: .\\n- **CPT4**: .\\n- **DPD**: .\\n- **DRG**: .\\n- **HEMONC**: .\\n- **JMDC**: .\\n- **KCD7**: .\\n- **MULTUM**: .\\n- **NAACCR**: .\\n- **NDC**: .\\n- **NDFRT** <:https://www.nlm.nih.gov/research/umls/sourcereleasedocs/current/NDFRT/index.html>.\\n- **OXMIS**: .\\n- **RXNORM**: .\\n- **RXNORM EXTENSION**: .\\n- **SPL**: .\\n- **Other**: Please indicate if there is another standard that you are using. This will trigger a support ticket where you can request the addition of the terminology to the HOP.\\n- **NHS Scotland National Codes**: .\\n- **NHS Wales National Codes**: " title: "Controlled vocabulary" description: "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided." + examples: + - "LOCAL" + - "ICD10" conformsTo: guidance: "- List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR.\\n- If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition.\\n- **HL7 FHIR**: .\\n- **HL7 V2**: .\\n- **HL7 CDA**: .\\n- **HL7 CCOW**: .\\n- **DICOM**: .\\n- **I2B2**: .\\n- **IHE**: .\\n- **OMOP**: .\\n- **openEHR**: .\\n- **Sentinel**: .\\n- **PCORnet**: .\\n- **CDISC**: .\\n- **Local**: In-house developed data model.\\n- **Other**: Other standardised data model.\\n- **NHS Data Dictionary**: .\\n- **NHS Scotland Data Dictionary**: .\\n- **NHS Wales Data Dictionary**: ." title: "Alignment with standardised data models" description: "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition." + examples: + - "LOCAL" + - "NHS DATA DICTIONARY" enrichmentAndLinkage: title: "Enrichment and Linkage" @@ -343,6 +385,8 @@ documentation: guidance: "- An HTML account of the data that **provides context and scope** of the data, **limited to 10000 characters, and/or a resolvable URL** that describes the dataset.\\n- Additional information can be recorded and included using the Associated media field." title: "Description" description: "A free-text description of the dataset.\\nA URL can also be provided as the description of the dataset.\\nGateway Feature: Keywords and text may be extracted out of the description and indexed for search." + examples: + - "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web® including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." coverage: datasetCompleteness: guidance: "If your organisation has a publicly available site which contains information on the completeness of a dataset, add that URL here.\\n**Example**: https://bhfdatasciencecentre.org/dashboard/" @@ -377,6 +421,7 @@ coverage: spatial: guidance: "- The geographical area covered by the dataset.\\n- Please provide a valid location.\\n- For locations in the UK, this location should conform to [ONS standards](https://geoportal.statistics.gov.uk/datasets/208d9884575647c29f0dd5a1184e711a/about).\\n- For locations in other countries we use [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes)." examples: + - "United Kingdom" - "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" title: "Geographic coverage" description: "The geographical area covered by the dataset. It is recommended that links are to entries in one of the recommended standards:\\n- For locations in the UK: [ONS standards](https://geoportal.statistics.gov.uk/datasets/208d9884575647c29f0dd5a1184e711a/about)\\n- For locations in other countries: [ISO 3166-1 & ISO 3166-2](https://github.com/HDRUK/reference-codes)" @@ -408,24 +453,38 @@ demographicFrequency: description: "Array of age bins and their corresponding counts." bin: title: "Age grouping" + examples: + - "30-34 years" count: title: "Age count" + examples: + - 1000 ethnicity: title: "Ethnicity" description: "Array of ethnicity bins and their corresponding counts." bin: title: "Ethnicity grouping" + examples: + - "Black or Black British - Any other Black background" count: title: "Ethnicity count" + examples: + - 1000 disease: title: "Disease" description: "Array of diseases and their corresponding counts." diseaseCode: title: "Disease code" + examples: + - "J45" diseaseCodeVocabulary: title: "Disease code vocabulary" + examples: + - "ICD10" count: title: "Disease count" + examples: + - 1000 omics: title: "Omics" description: "Omics" diff --git a/hdr_schemata/utils/create_example.py b/hdr_schemata/utils/create_example.py index 5384b4c..2dcae6b 100644 --- a/hdr_schemata/utils/create_example.py +++ b/hdr_schemata/utils/create_example.py @@ -1,6 +1,7 @@ import json - - +import datetime +import re +import ast def get_subItems(item): return ( @@ -9,11 +10,33 @@ def get_subItems(item): else " | ".join(item["type"]) ) -def create_example(path): +def create_template(path): data = json.load(open(f"{path}.structure.json")) example = {item["name"]: get_subItems(item) for item in data} - print(json.dumps(example, indent=6)) + # print(json.dumps(example, indent=6)) + with open(f"{path}.template.json", "w") as f: + json.dump(example, f, indent=6) + +def make_example(item): + if item["name"] == "structuralMetadata": + return None + if item.get("subItems"): + if item["is_list"]: + return [{subItem["name"]: make_example(subItem) for subItem in item["subItems"]}] + else: + return {subItem["name"]: make_example(subItem) for subItem in item["subItems"]} + elif "null" in item["type"]: + return None + elif item["is_list"]: + return item["examples"] if item["examples"] else None + else: + return item["examples"][0] if item["examples"] else None + +def create_example(path): + data = json.load(open(f"{path}.structure.json")) + example = {item["name"]: make_example(item) for item in data} with open(f"{path}.example.json", "w") as f: json.dump(example, f, indent=6) +create_template("./docs/HDRUK/3.0.0") create_example("./docs/HDRUK/3.0.0")