From 771f840569d1ef1ed4b212182bd1d4f0b210ad31 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 14:31:17 +0000 Subject: [PATCH 01/21] initial adding of 2.2.0 schema --- hdr_schemata/models/HDRUK/2.2.0/schema.json | 2015 +++++++++++++++++ .../models/HDRUK/create_json_schema.py | 4 +- hdr_schemata/models/HDRUK/v2_2_0/__init__.py | 11 + 3 files changed, 2029 insertions(+), 1 deletion(-) create mode 100644 hdr_schemata/models/HDRUK/2.2.0/schema.json create mode 100644 hdr_schemata/models/HDRUK/v2_2_0/__init__.py diff --git a/hdr_schemata/models/HDRUK/2.2.0/schema.json b/hdr_schemata/models/HDRUK/2.2.0/schema.json new file mode 100644 index 0000000..277f51c --- /dev/null +++ b/hdr_schemata/models/HDRUK/2.2.0/schema.json @@ -0,0 +1,2015 @@ +{ + "$defs": { + "AbstractText": { + "anyOf": [ + { + "maxLength": 500, + "minLength": 5, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "AbstractText" + }, + "Access": { + "additionalProperties": false, + "properties": { + "accessRights": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "title": "Access Rights" + }, + "accessService": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "examples": [ + "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + ], + "title": "Access Service" + }, + "accessRequestCost": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", + "title": "Organisation Access Request Cost" + }, + "deliveryLeadTime": { + "anyOf": [ + { + "$ref": "#/$defs/DeliveryLeadTime" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", + "title": "Access Request Duration" + }, + "jurisdiction": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Isocountrycode" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", + "title": "Jurisdiction" + }, + "dataController": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", + "title": "Data Controller" + }, + "dataProcessor": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", + "title": "Data Processor" + } + }, + "required": [ + "accessRights", + "jurisdiction", + "dataController" + ], + "title": "Access", + "type": "object" + }, + "Accessibility": { + "additionalProperties": false, + "properties": { + "usage": { + "anyOf": [ + { + "$ref": "#/$defs/Usage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "This section includes information about how the data can be used and how it is currently being used", + "title": "Usage" + }, + "access": { + "allOf": [ + { + "$ref": "#/$defs/Access" + } + ], + "description": "This section includes information about data access" + }, + "formatAndStandards": { + "anyOf": [ + { + "$ref": "#/$defs/FormatAndStandards" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "title": "Format and Standards" + } + }, + "required": [ + "access" + ], + "title": "Accessibility", + "type": "object" + }, + "AgeRange": { + "anyOf": [ + { + "pattern": "Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "AgeRange" + }, + "CommaSeparatedValues": { + "anyOf": [ + { + "pattern": "([^,]+)", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "CommaSeparatedValues" + }, + "ControlledVocabulary": { + "anyOf": [ + { + "$ref": "#/$defs/ControlledVocabularyEnum" + }, + { + "type": "null" + } + ], + "default": null, + "title": "ControlledVocabulary" + }, + "ControlledVocabularyEnum": { + "enum": [ + "LOCAL", + "OPCS4", + "READ", + "SNOMED CT", + "SNOMED RT", + "DM PLUS D", + "DM+D", + "NHS NATIONAL CODES", + "NHS SCOTLAND NATIONAL CODES", + "NHS WALES NATIONAL CODES", + "ODS", + "LOINC", + "ICD10", + "ICD10CM", + "ICD10PCS", + "ICD9CM", + "ICD9", + "ICDO3", + "AMT", + "APC", + "ATC", + "CIEL", + "HPO", + "CPT4", + "DPD", + "DRG", + "HEMONC", + "JMDC", + "KCD7", + "MULTUM", + "NAACCR", + "NDC", + "NDFRT", + "OXMIS", + "RXNORM", + "RXNORM EXTENSION", + "SPL", + "OTHER" + ], + "title": "ControlledVocabularyEnum", + "type": "string" + }, + "Coverage": { + "additionalProperties": false, + "properties": { + "spatial": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", + "examples": [ + "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" + ], + "title": "Geographic Coverage" + }, + "typicalAgeRange": { + "anyOf": [ + { + "$ref": "#/$defs/AgeRange" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "title": "Age Range" + }, + "physicalSampleAvailability": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": {}, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide \u201cAVAILABILITY TO BE CONFIRMED\u201d. If samples are not available, then please provide \u201cNOT AVAILABLE\u201d.", + "examples": [ + "BONE MARROW" + ], + "title": "Physical Sample Availability" + }, + "followup": { + "anyOf": [ + { + "$ref": "#/$defs/Followup" + }, + { + "type": "null" + } + ], + "default": "UNKNOWN", + "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", + "title": "Followup" + }, + "pathway": { + "anyOf": [ + { + "$ref": "#/$defs/Description" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "title": "Pathway" + } + }, + "title": "Coverage", + "type": "object" + }, + "DataClass": { + "additionalProperties": false, + "properties": { + "name": { + "anyOf": [ + { + "maxLength": 500, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The name of a table in a dataset.", + "title": "Table Name" + }, + "description": { + "anyOf": [ + { + "maxLength": 20000, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of a table in a dataset.", + "title": "Table Description" + }, + "elements": { + "description": "A list of data elements contained within a table in a dataset.", + "items": { + "$ref": "#/$defs/DataElement" + }, + "title": "Data Elements", + "type": "array" + } + }, + "required": [ + "name", + "elements" + ], + "title": "DataClass", + "type": "object" + }, + "DataElement": { + "additionalProperties": true, + "properties": { + "name": { + "allOf": [ + { + "$ref": "#/$defs/Name" + } + ], + "description": "The name of a column in a table.", + "title": "Column Name" + }, + "dataType": { + "description": "The data type of values in the column", + "title": "Data Type", + "type": "string" + }, + "description": { + "anyOf": [ + { + "maxLength": 20000, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of a column in a table.", + "title": "Column Description" + }, + "sensitive": { + "description": "A True or False value, indicating if the field is sensitive or not", + "title": "Sensitive", + "type": "boolean" + } + }, + "required": [ + "name", + "dataType", + "sensitive" + ], + "title": "DataElement", + "type": "object" + }, + "DataUseLimitation": { + "enum": [ + "GENERAL RESEARCH USE", + "COMMERCIAL RESEARCH USE", + "GENETIC STUDIES ONLY", + "NO GENERAL METHODS RESEARCH", + "NO RESTRICTION", + "GEOGRAPHICAL RESTRICTIONS", + "INSTITUTION SPECIFIC RESTRICTIONS", + "NOT FOR PROFIT USE", + "PROJECT SPECIFIC RESTRICTIONS", + "RESEARCH SPECIFIC RESTRICTIONS", + "USER SPECIFIC RESTRICTION", + "RESEARCH USE ONLY", + "NO LINKAGE" + ], + "title": "DataUseLimitation", + "type": "string" + }, + "DataUseRequirements": { + "enum": [ + "COLLABORATION REQUIRED", + "PROJECT SPECIFIC RESTRICTIONS", + "ETHICS APPROVAL REQUIRED", + "INSTITUTION SPECIFIC RESTRICTIONS", + "GEOGRAPHICAL RESTRICTIONS", + "PUBLICATION MORATORIUM", + "PUBLICATION REQUIRED", + "RETURN TO DATABASE OR RESOURCE", + "TIME LIMIT ON USE", + "DISCLOSURE CONTROL", + "NOT FOR PROFIT USE", + "USER SPECIFIC RESTRICTION", + null + ], + "title": "DataUseRequirements" + }, + "DeliveryLeadTime": { + "enum": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NOT APPLICABLE", + "OTHER", + null + ], + "title": "DeliveryLeadTime" + }, + "Description": { + "anyOf": [ + { + "maxLength": 10000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "Documentation": { + "additionalProperties": false, + "properties": { + "description": { + "anyOf": [ + { + "$ref": "#/$defs/Description" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A free-text description of the record.", + "title": "Description" + }, + "associatedMedia": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "examples": [ + "PDF Document that describes study protocol" + ], + "title": "Associated Media" + }, + "isPartOf": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + { + "$ref": "#/$defs/IsPartOfEnum" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": "NOT APPLICABLE", + "description": "Please complete only if the dataset is part of a group or family", + "examples": [ + "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)." + ], + "title": "Group" + } + }, + "title": "Documentation", + "type": "object" + }, + "Doi": { + "anyOf": [ + { + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Doi" + }, + "EmailAddress": { + "anyOf": [ + { + "format": "email", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "EmailAddress" + }, + "EndDateEnum": { + "enum": [ + "CONTINUOUS", + null + ], + "title": "EndDateEnum" + }, + "EnrichmentAndLinkage": { + "additionalProperties": false, + "properties": { + "qualifiedRelation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate \u201cALL\u201d and the onboarding portal will automate linkage across the datasets submitted.", + "title": "Linked Datasets" + }, + "derivation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset.", + "title": "Derivations" + }, + "tools": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", + "title": "Tools" + } + }, + "title": "EnrichmentAndLinkage", + "type": "object" + }, + "Followup": { + "enum": [ + "0 - 6 MONTHS", + "6 - 12 MONTHS", + "1 - 10 YEARS", + "> 10 YEARS", + "UNKNOWN", + "CONTINUOUS", + "OTHER", + null + ], + "title": "Followup" + }, + "Format": { + "minLength": 1, + "title": "Format", + "type": "string" + }, + "FormatAndStandards": { + "additionalProperties": false, + "properties": { + "vocabularyEncodingScheme": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/ControlledVocabulary" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", + "title": "Controlled Vocabulary" + }, + "conformsTo": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/StandardisedDataModels" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", + "title": "Conforms To" + }, + "language": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Language" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "This should list all the languages in which the dataset metadata and underlying data is made available.", + "title": "Language" + }, + "format": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Format" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", + "title": "Format" + } + }, + "required": [ + "vocabularyEncodingScheme", + "conformsTo", + "language", + "format" + ], + "title": "FormatAndStandards", + "type": "object" + }, + "IsPartOfEnum": { + "const": "NOT APPLICABLE", + "title": "IsPartOfEnum" + }, + "Isocountrycode": { + "pattern": "^[A-Z]{2}(-[A-Z]{2,3})?$", + "title": "Isocountrycode", + "type": "string" + }, + "Language": { + "anyOf": [ + { + "$ref": "#/$defs/LanguageEnum" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Language" + }, + "LanguageEnum": { + "enum": [ + "aa", + "ab", + "ae", + "af", + "ak", + "am", + "an", + "ar", + "as", + "av", + "ay", + "az", + "ba", + "be", + "bg", + "bh", + "bi", + "bm", + "bn", + "bo", + "br", + "bs", + "ca", + "ce", + "ch", + "co", + "cr", + "cs", + "cu", + "cv", + "cy", + "da", + "de", + "dv", + "dz", + "ee", + "el", + "en", + "eo", + "es", + "et", + "eu", + "fa", + "ff", + "fi", + "fj", + "fo", + "fr", + "fy", + "ga", + "gd", + "gl", + "gn", + "gu", + "gv", + "ha", + "he", + "hi", + "ho", + "hr", + "ht", + "hu", + "hy", + "hz", + "ia", + "id", + "ie", + "ig", + "ii", + "ik", + "io", + "is", + "it", + "iu", + "ja", + "jv", + "ka", + "kg", + "ki", + "kj", + "kk", + "kl", + "km", + "kn", + "ko", + "kr", + "ks", + "ku", + "kv", + "kw", + "ky", + "la", + "lb", + "lg", + "li", + "ln", + "lo", + "lt", + "lu", + "lv", + "mg", + "mh", + "mi", + "mk", + "ml", + "mn", + "mr", + "ms", + "mt", + "my", + "na", + "nb", + "nd", + "ne", + "ng", + "nl", + "nn", + "no", + "nr", + "nv", + "ny", + "oc", + "oj", + "om", + "or", + "os", + "pa", + "pi", + "pl", + "ps", + "pt", + "qu", + "rm", + "rn", + "ro", + "ru", + "rw", + "sa", + "sc", + "sd", + "se", + "sg", + "si", + "sk", + "sl", + "sm", + "sn", + "so", + "sq", + "sr", + "ss", + "st", + "su", + "sv", + "sw", + "ta", + "te", + "tg", + "th", + "ti", + "tk", + "tl", + "tn", + "to", + "tr", + "ts", + "tt", + "tw", + "ty", + "ug", + "uk", + "ur", + "uz", + "ve", + "vi", + "vo", + "wa", + "wo", + "xh", + "yi", + "yo", + "za", + "zh", + "zu" + ], + "title": "LanguageEnum", + "type": "string" + }, + "LongDescription": { + "anyOf": [ + { + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "LongDescription" + }, + "MeasuredProperty": { + "title": "MeasuredProperty" + }, + "MemberOf": { + "enum": [ + "HUB", + "ALLIANCE", + "OTHER", + "NCS" + ], + "title": "MemberOf", + "type": "string" + }, + "Name": { + "title": "Name" + }, + "Observation": { + "additionalProperties": false, + "properties": { + "observedNode": { + "allOf": [ + { + "$ref": "#/$defs/StatisticalPopulationConstrained" + } + ], + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "title": "Statistical Population" + }, + "measuredValue": { + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "title": "Measured Value", + "type": "integer" + }, + "disambiguatingDescription": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ], + "default": null, + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "title": "Disambiguating Description" + }, + "observationDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + } + ], + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "title": "Observation Date" + }, + "measuredProperty": { + "allOf": [ + { + "$ref": "#/$defs/MeasuredProperty" + } + ], + "description": "Initially this will be defaulted to \"COUNT\"", + "title": "Measured Property" + } + }, + "required": [ + "observedNode", + "measuredValue", + "observationDate", + "measuredProperty" + ], + "title": "Observation", + "type": "object" + }, + "OneHundredFiftyCharacters": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "Organisation": { + "additionalProperties": false, + "properties": { + "identifier": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the \u201csuggest and institute\u201d function here: https://www.grid.ac/institutes#", + "title": "Organisation Identifier" + }, + "name": { + "allOf": [ + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + } + ], + "description": "Name of the organisation", + "title": "Organisation Name" + }, + "logo": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "title": "Organisation Logo" + }, + "description": { + "anyOf": [ + { + "$ref": "#/$defs/Description" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a URL that describes the organisation.", + "title": "Organisation Description" + }, + "contactPoint": { + "anyOf": [ + { + "$ref": "#/$defs/EmailAddress" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/EmailAddress" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Organisation contact point(s)", + "title": "Organisation Contact Point" + }, + "memberOf": { + "anyOf": [ + { + "$ref": "#/$defs/MemberOf" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate if the organisation is an Alliance Member or a Hub.", + "title": "Organisation Membership" + } + }, + "required": [ + "name", + "contactPoint" + ], + "title": "Organisation", + "type": "object" + }, + "Origin": { + "additionalProperties": false, + "properties": { + "purpose": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Purpose" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Pleases indicate the purpose(s) that the dataset was collected.", + "title": "Purpose" + }, + "source": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Source" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Pleases indicate the source of the data extraction", + "title": "Source" + }, + "collectionSituation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Setting" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Pleases indicate the setting(s) where data was collected. Multiple settings may be provided", + "title": "Setting" + } + }, + "title": "Origin", + "type": "object" + }, + "Periodicity": { + "enum": [ + "STATIC", + "IRREGULAR", + "CONTINUOUS", + "BIENNIAL", + "ANNUAL", + "BIANNUAL", + "QUARTERLY", + "BIMONTHLY", + "MONTHLY", + "BIWEEKLY", + "WEEKLY", + "SEMIWEEKLY", + "DAILY", + "OTHER", + null + ], + "title": "Periodicity" + }, + "Provenance": { + "additionalProperties": false, + "properties": { + "origin": { + "anyOf": [ + { + "$ref": "#/$defs/Origin" + }, + { + "type": "null" + } + ], + "default": null + }, + "temporal": { + "$ref": "#/$defs/Temporal" + } + }, + "required": [ + "temporal" + ], + "title": "Provenance", + "type": "object" + }, + "Purpose": { + "enum": [ + "STUDY", + "DISEASE REGISTRY", + "TRIAL", + "CARE", + "AUDIT", + "ADMINISTRATIVE", + "FINANCIAL", + "STATUTORY", + "OTHER", + null + ], + "title": "Purpose" + }, + "Revision": { + "additionalProperties": false, + "properties": { + "version": { + "allOf": [ + { + "$ref": "#/$defs/Semver" + } + ], + "description": "Semantic Version" + }, + "url": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "description": "URL endpoint to obtain the version" + } + }, + "required": [ + "version", + "url" + ], + "title": "Revision", + "type": "object" + }, + "Semver": { + "pattern": "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$", + "title": "Semver", + "type": "string" + }, + "Setting": { + "enum": [ + "CLINIC", + "PRIMARY CARE", + "ACCIDENT AND EMERGENCY", + "OUTPATIENTS", + "IN-PATIENTS", + "SERVICES", + "COMMUNITY", + "HOME", + "PRIVATE", + "PHARMACY", + "SOCIAL CARE", + "LOCAL AUTHORITY", + "NATIONAL GOVERNMENT", + "OTHER" + ], + "title": "Setting", + "type": "string" + }, + "ShortDescription": { + "anyOf": [ + { + "maxLength": 1000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "ShortDescription" + }, + "Source": { + "enum": [ + "EPR", + "ELECTRONIC SURVEY", + "LIMS", + "OTHER INFORMATION SYSTEM", + "PAPER BASED", + "FREETEXT NLP", + "MACHINE GENERATED", + "OTHER" + ], + "title": "Source", + "type": "string" + }, + "StandardisedDataModels": { + "anyOf": [ + { + "$ref": "#/$defs/StandardisedDataModelsEnum" + }, + { + "type": "null" + } + ], + "default": null, + "title": "StandardisedDataModels" + }, + "StandardisedDataModelsEnum": { + "enum": [ + "HL7 FHIR", + "HL7 V2", + "HL7 CDA", + "HL7 CCOW", + "LOINC", + "DICOM", + "I2B2", + "IHE", + "OMOP", + "OPENEHR", + "SENTINEL", + "PCORNET", + "CDISC", + "NHS DATA DICTIONARY", + "NHS SCOTLAND DATA DICTIONARY", + "NHS WALES DATA DICTIONARY", + "LOCAL", + "OTHER" + ], + "title": "StandardisedDataModelsEnum", + "type": "string" + }, + "StatisticalPopulationConstrained": { + "enum": [ + "PERSONS", + "EVENTS", + "FINDINGS" + ], + "title": "StatisticalPopulationConstrained", + "type": "string" + }, + "Summary": { + "additionalProperties": false, + "properties": { + "title": { + "allOf": [ + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + } + ], + "description": "Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers.", + "examples": [ + [ + "North West London COVID-19 Patient Level Situation Report" + ] + ], + "title": "Title" + }, + "abstract": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ], + "description": "Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible", + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], + "title": "Dataset Abstract" + }, + "publisher": { + "allOf": [ + { + "$ref": "#/$defs/Organisation" + } + ], + "description": "This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", + "title": "Dataset publisher" + }, + "contactPoint": { + "anyOf": [ + { + "$ref": "#/$defs/EmailAddress" + }, + { + "type": "null" + } + ], + "description": "Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose.", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "title": "Contact Point" + }, + "keywords": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", + "title": "Keywords" + }, + "alternateIdentifiers": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/ShortDescription" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Alternate dataset identifiers or local identifiers", + "title": "Alternate dataset identifiers" + }, + "doiName": { + "anyOf": [ + { + "$ref": "#/$defs/Doi" + }, + { + "type": "null" + } + ], + "default": null, + "description": "All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI.", + "examples": [ + "10.3399/bjgp17X692645" + ], + "title": "Digital Object Identifier" + } + }, + "required": [ + "title", + "abstract", + "publisher", + "contactPoint", + "keywords" + ], + "title": "Summary", + "type": "object" + }, + "Temporal": { + "additionalProperties": false, + "properties": { + "distributionReleaseDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "title": "Release Date" + }, + "startDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "title": "Start Date" + }, + "endDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "$ref": "#/$defs/EndDateEnum" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", + "title": "End Date" + }, + "timeLag": { + "allOf": [ + { + "$ref": "#/$defs/TimeLag" + } + ], + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "title": "Time Lag" + }, + "publishingFrequency": { + "allOf": [ + { + "$ref": "#/$defs/Periodicity" + } + ], + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "title": "Publishing Frequency" + } + }, + "required": [ + "startDate", + "timeLag", + "publishingFrequency" + ], + "title": "Temporal", + "type": "object" + }, + "TimeLag": { + "enum": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NO TIMELAG", + "NOT APPLICABLE", + "OTHER", + null + ], + "title": "TimeLag" + }, + "Url": { + "anyOf": [ + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Url" + }, + "Usage": { + "additionalProperties": false, + "properties": { + "dataUseLimitation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/DataUseLimitation" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", + "title": "Data Use Limitation" + }, + "dataUseRequirements": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/DataUseRequirements" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", + "title": "Data Use Requirements" + }, + "resourceCreator": { + "anyOf": [ + { + "$ref": "#/$defs/ShortDescription" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/ShortDescription" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.", + "title": "Citation Requirements" + }, + "investigations": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Investigations" + }, + "isReferencedBy": { + "anyOf": [ + { + "$ref": "#/$defs/Doi" + }, + { + "type": "string" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Doi" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list.", + "title": "Citations" + } + }, + "title": "Usage", + "type": "object" + }, + "Uuidv4": { + "maxLength": 36, + "minLength": 36, + "pattern": "^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$", + "title": "Uuidv4", + "type": "string" + } + }, + "additionalProperties": false, + "properties": { + "identifier": { + "anyOf": [ + { + "$ref": "#/$defs/Uuidv4" + }, + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "description": "System dataset identifier", + "examples": [ + [ + "226fb3f1-4471-400a-8c39-2b66d46a39b6", + "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" + ] + ], + "title": "Dataset identifier" + }, + "version": { + "allOf": [ + { + "$ref": "#/$defs/Semver" + } + ], + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "title": "Dataset Version" + }, + "revisions": { + "description": "Revisions of Dataset metadata", + "items": { + "$ref": "#/$defs/Revision" + }, + "title": "Dataset Revisions", + "type": "array" + }, + "issued": { + "description": "Dataset Metadata Creation Date", + "format": "date-time", + "title": "Creation Date", + "type": "string" + }, + "modified": { + "description": "Dataset Metadata Creation Date", + "format": "date-time", + "title": "Modification Date", + "type": "string" + }, + "summary": { + "allOf": [ + { + "$ref": "#/$defs/Summary" + } + ], + "description": "Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP." + }, + "documentation": { + "anyOf": [ + { + "$ref": "#/$defs/Documentation" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media.", + "title": "Documentation" + }, + "coverage": { + "anyOf": [ + { + "$ref": "#/$defs/Coverage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data.", + "title": "Coverage" + }, + "provenance": { + "anyOf": [ + { + "$ref": "#/$defs/Provenance" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", + "title": "Provenance" + }, + "accessibility": { + "allOf": [ + { + "$ref": "#/$defs/Accessibility" + } + ], + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets." + }, + "enrichmentAndLinkage": { + "anyOf": [ + { + "$ref": "#/$defs/EnrichmentAndLinkage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", + "title": "Enrichment and Linkage" + }, + "observations": { + "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d", + "items": { + "$ref": "#/$defs/Observation" + }, + "title": "Observations", + "type": "array" + }, + "structuralMetadata": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/DataClass" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "title": "Structural Metadata" + } + }, + "required": [ + "identifier", + "version", + "revisions", + "issued", + "modified", + "summary", + "accessibility", + "observations" + ], + "title": "Hdruk220", + "type": "object" +} \ No newline at end of file diff --git a/hdr_schemata/models/HDRUK/create_json_schema.py b/hdr_schemata/models/HDRUK/create_json_schema.py index 67d69b0..b944120 100644 --- a/hdr_schemata/models/HDRUK/create_json_schema.py +++ b/hdr_schemata/models/HDRUK/create_json_schema.py @@ -1,8 +1,10 @@ from pydantic import ValidationError import v2_1_2 import v2_1_3 +import v2_2_0 import json v2_1_2.Hdruk212.save_schema() v2_1_3.Hdruk213.save_schema() -v2_1_3.Hdruk213.save_schema("latest/dev/schema.json") +v2_2_0.Hdruk220.save_schema() + diff --git a/hdr_schemata/models/HDRUK/v2_2_0/__init__.py b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py new file mode 100644 index 0000000..b6cc7c8 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py @@ -0,0 +1,11 @@ +from hdr_schemata.models.HDRUK.v2_1_3 import Hdruk213 +import json +from typing import Optional +from pydantic import Field + + +class Hdruk220(Hdruk213): + @classmethod + def save_schema(cls, location="./2.2.0/schema.json"): + with open(location, "w") as f: + json.dump(cls.model_json_schema(), f, indent=6) From 4e7a96326845d5655c4267dfbf790200e8874937 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 14:33:54 +0000 Subject: [PATCH 02/21] adding some files --- .../HDRUK/v2_2_0/TissueSampleMetadata.py | 61 +++++++++++++++++++ .../HDRUK/v2_2_0/TissuesSampleCollection.py | 58 ++++++++++++++++++ hdr_schemata/models/HDRUK/v2_2_0/__init__.py | 8 +++ 3 files changed, 127 insertions(+) create mode 100644 hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py create mode 100644 hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py diff --git a/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py b/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py new file mode 100644 index 0000000..269c49b --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py @@ -0,0 +1,61 @@ +from typing import Optional, Union, List +from pydantic import BaseModel, Field, constr +from datetime import date, datetime +from .SampleDonor import SampleDonor + +from hdr_schemata.definitions.HDRUK import CommaSeparatedValues + + +class TissueSampleMetadata(BaseModel): + id: Optional[constr(min_length=2, max_length=50)] = Field( + None, title="Metadata ID", description="ID of the tissue sample metadata" + ) + + sampleDonor: Optional[SampleDonor] = Field( + None, title="Sample Donor", description="Information about the sample donor" + ) + + sampleType: Optional[CommaSeparatedValues] = Field( + None, title="Sample Type", description="Type of the tissue sample" + ) + + storageTemperature: Optional[str] = Field( + None, + title="Storage Temperature", + description="Storage temperature of the tissue sample", + ) + + creationDate: Optional[Union[date, datetime]] = Field( + None, + title="Creation Date", + description="Date when the tissue sample metadata was created", + ) + + AnatomicalSiteOntologyCode: Optional[CommaSeparatedValues] = Field( + None, + title="Anatomical Site Ontology Code", + description="Ontology code for the anatomical site", + ) + AnatomicalSiteOntologyDescription: Optional[CommaSeparatedValues] = Field( + None, + title="Anatomical Site Ontology Description", + description="Ontology description for the anatomical site", + ) + + AnatomicalSiteFreeText: Optional[CommaSeparatedValues] = Field( + None, + title="Anatomical Site Free Text", + description="Free text describing the anatomical site", + ) + + sampleContentDiagnosis: Optional[CommaSeparatedValues] = Field( + None, + title="Sample Content Diagnosis", + description="Diagnosis related to the sample content", + ) + + useReCommaSeparatedValuesictions: Optional[CommaSeparatedValues] = Field( + None, + title="Use Restrictions", + description="Restrictions on the use of the tissue sample", + ) diff --git a/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py b/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py new file mode 100644 index 0000000..87145a2 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py @@ -0,0 +1,58 @@ +from typing import Optional +from pydantic import BaseModel, Field +from .TissueSampleMetadata import TissueSampleMetadata +from hdr_schemata.definitions.HDRUK import CommaSeparatedValues + + +class TissuesSampleCollection(BaseModel): + id: Optional[CommaSeparatedValues] = Field( + None, title="ID", description="ID of the tissue sample collection" + ) + + dataCategories: Optional[CommaSeparatedValues] = Field( + None, + title="Data Categories", + description="Data categories related to the tissue sample collection", + ) + + materialType: Optional[CommaSeparatedValues] = Field( + None, + title="Material Type", + description="Material type of the tissue sample collection", + ) + + accessConditions: Optional[CommaSeparatedValues] = Field( + None, + title="Access Conditions", + description="Access conditions for the tissue sample collection", + ) + + collectionType: Optional[CommaSeparatedValues] = Field( + None, + title="Collection Type", + description="Type of the tissue sample collection", + ) + + disease: Optional[CommaSeparatedValues] = Field( + None, + title="Disease", + description="Disease associated with the tissue sample collection", + ) + + storageTemperature: Optional[CommaSeparatedValues] = Field( + None, + title="Storage Temperature", + description="Storage temperature of the tissue sample collection", + ) + + sampleAgeRange: Optional[CommaSeparatedValues] = Field( + None, + title="Sample Age Range", + description="Age range of the tissue sample collection", + ) + + tissueSampleMetadata: Optional[TissueSampleMetadata] = Field( + None, + title="Tissue Sample Metadata", + description="Metadata related to the tissue sample", + ) diff --git a/hdr_schemata/models/HDRUK/v2_2_0/__init__.py b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py index b6cc7c8..87271ef 100644 --- a/hdr_schemata/models/HDRUK/v2_2_0/__init__.py +++ b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py @@ -3,8 +3,16 @@ from typing import Optional from pydantic import Field +from .TissuesSampleCollection import TissuesSampleCollection + class Hdruk220(Hdruk213): + tissuesSampleCollection: Optional[TissuesSampleCollection] = Field( + None, + description="Metadata collection for Tissue Samples datasets", + title="Tissues Sample Collection", + ) + @classmethod def save_schema(cls, location="./2.2.0/schema.json"): with open(location, "w") as f: From 595946fde15500546db7a4dbdbe53639c2b5ed76 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 14:58:52 +0000 Subject: [PATCH 03/21] adding some new tighter definitions for the schema --- hdr_schemata/definitions/HDRUK/ICD_0_3.py | 6 +++ .../HDRUK/MaterialTypeCategories.py | 17 +++++++ .../HDRUK/TissueDataCategoriesEnum.py | 12 +++++ hdr_schemata/definitions/HDRUK/__init__.py | 3 ++ .../HDRUK/v2_2_0/TissueSampleMetadata.py | 50 ++----------------- .../HDRUK/v2_2_0/TissuesSampleCollection.py | 41 +++------------ 6 files changed, 49 insertions(+), 80 deletions(-) create mode 100644 hdr_schemata/definitions/HDRUK/ICD_0_3.py create mode 100644 hdr_schemata/definitions/HDRUK/MaterialTypeCategories.py create mode 100644 hdr_schemata/definitions/HDRUK/TissueDataCategoriesEnum.py diff --git a/hdr_schemata/definitions/HDRUK/ICD_0_3.py b/hdr_schemata/definitions/HDRUK/ICD_0_3.py new file mode 100644 index 0000000..b2bae2a --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/ICD_0_3.py @@ -0,0 +1,6 @@ +from typing import Optional +from pydantic import RootModel, constr + + +class ICD_0_3(RootModel): + root: Optional[constr(pattern=r"^[C\d]{3}\.\d{4}\/\d{1,4}$")] diff --git a/hdr_schemata/definitions/HDRUK/MaterialTypeCategories.py b/hdr_schemata/definitions/HDRUK/MaterialTypeCategories.py new file mode 100644 index 0000000..e7743db --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/MaterialTypeCategories.py @@ -0,0 +1,17 @@ +from enum import Enum + + +class MaterialTypeCategories(Enum): + BLOOD = "Blood" + DNA = "DNA" + FAECES = "Faeces" + IMMORTALIZED_CELL_LINES = "Immortalized Cell Lines" + ISOLATED_PATHOGEN = "Isolated Pathogen" + OTHER = "Other" + PLASMA = "Plasma" + RNA = "RNA" + SALIVA = "Saliva" + SERUM = "Serum" + TISSUE_FROZEN = "Tissue (Frozen)" + TISSUE_FFPE = "Tissue (FFPE)" + URINE = "Urine" diff --git a/hdr_schemata/definitions/HDRUK/TissueDataCategoriesEnum.py b/hdr_schemata/definitions/HDRUK/TissueDataCategoriesEnum.py new file mode 100644 index 0000000..c2cbad5 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/TissueDataCategoriesEnum.py @@ -0,0 +1,12 @@ +from enum import Enum + + +class TissueDataCategoriesEnum(Enum): + BIOLOGICAL_SAMPLES = "Biological samples" + SURVEY_DATA = "Survey data" + IMAGING_DATA = "Imaging data" + MEDICAL_RECORDS = "Medical records" + NATIONAL_REGISTRIES = "National registries" + GENEALOGICAL_RECORDS = "Genealogical records" + PHYSIOLOGICAL_BIOCHEMICAL_MEASUREMENTS = "Physiological/Biochemical measurements" + OTHER = "Other" diff --git a/hdr_schemata/definitions/HDRUK/__init__.py b/hdr_schemata/definitions/HDRUK/__init__.py index eef093b..25cba41 100644 --- a/hdr_schemata/definitions/HDRUK/__init__.py +++ b/hdr_schemata/definitions/HDRUK/__init__.py @@ -14,12 +14,14 @@ from .EndDateEnum import EndDateEnum from .Followup import Followup from .Format import Format +from .ICD_0_3 import ICD_0_3 from .IsPartOfEnum import IsPartOfEnum from .Isocountrycode import Isocountrycode from .Language import Language from .LanguageEnum import LanguageEnum from .LongAbstractText import LongAbstractText from .LongDescription import LongDescription +from .MaterialTypeCategories import MaterialTypeCategories from .MeasuredProperty import MeasuredProperty from .MemberOf import MemberOf from .Name import Name @@ -36,6 +38,7 @@ from .StandardisedDataModelsEnum import StandardisedDataModelsEnum from .StatisticalPopulationConstrained import StatisticalPopulationConstrained from .TimeLag import TimeLag +from .TissueDataCategoriesEnum import TissueDataCategoriesEnum from .TwoHundredFiftyFiveCharacters import TwoHundredFiftyFiveCharacters from .Url import Url from .Uuidv4 import Uuidv4 diff --git a/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py b/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py index 269c49b..91c2961 100644 --- a/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py +++ b/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py @@ -1,61 +1,19 @@ -from typing import Optional, Union, List -from pydantic import BaseModel, Field, constr +from typing import Optional, Union +from pydantic import BaseModel, Field from datetime import date, datetime -from .SampleDonor import SampleDonor -from hdr_schemata.definitions.HDRUK import CommaSeparatedValues +from hdr_schemata.definitions.HDRUK import ICD_0_3 class TissueSampleMetadata(BaseModel): - id: Optional[constr(min_length=2, max_length=50)] = Field( - None, title="Metadata ID", description="ID of the tissue sample metadata" - ) - - sampleDonor: Optional[SampleDonor] = Field( - None, title="Sample Donor", description="Information about the sample donor" - ) - - sampleType: Optional[CommaSeparatedValues] = Field( - None, title="Sample Type", description="Type of the tissue sample" - ) - - storageTemperature: Optional[str] = Field( - None, - title="Storage Temperature", - description="Storage temperature of the tissue sample", - ) - creationDate: Optional[Union[date, datetime]] = Field( None, title="Creation Date", description="Date when the tissue sample metadata was created", ) - AnatomicalSiteOntologyCode: Optional[CommaSeparatedValues] = Field( + AnatomicalSiteOntologyCode: Optional[ICD_0_3] = Field( None, title="Anatomical Site Ontology Code", description="Ontology code for the anatomical site", ) - AnatomicalSiteOntologyDescription: Optional[CommaSeparatedValues] = Field( - None, - title="Anatomical Site Ontology Description", - description="Ontology description for the anatomical site", - ) - - AnatomicalSiteFreeText: Optional[CommaSeparatedValues] = Field( - None, - title="Anatomical Site Free Text", - description="Free text describing the anatomical site", - ) - - sampleContentDiagnosis: Optional[CommaSeparatedValues] = Field( - None, - title="Sample Content Diagnosis", - description="Diagnosis related to the sample content", - ) - - useReCommaSeparatedValuesictions: Optional[CommaSeparatedValues] = Field( - None, - title="Use Restrictions", - description="Restrictions on the use of the tissue sample", - ) diff --git a/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py b/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py index 87145a2..0b2e6c8 100644 --- a/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py +++ b/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py @@ -1,15 +1,18 @@ -from typing import Optional +from typing import Optional, List from pydantic import BaseModel, Field from .TissueSampleMetadata import TissueSampleMetadata -from hdr_schemata.definitions.HDRUK import CommaSeparatedValues +from hdr_schemata.definitions.HDRUK import ( + TissueDataCategoriesEnum, + TissueDataCategoriesEnum, +) class TissuesSampleCollection(BaseModel): - id: Optional[CommaSeparatedValues] = Field( + id: Optional[str] = Field( None, title="ID", description="ID of the tissue sample collection" ) - dataCategories: Optional[CommaSeparatedValues] = Field( + dataCategories: Optional[List[TissueDataCategoriesEnum]] = Field( None, title="Data Categories", description="Data categories related to the tissue sample collection", @@ -21,36 +24,6 @@ class TissuesSampleCollection(BaseModel): description="Material type of the tissue sample collection", ) - accessConditions: Optional[CommaSeparatedValues] = Field( - None, - title="Access Conditions", - description="Access conditions for the tissue sample collection", - ) - - collectionType: Optional[CommaSeparatedValues] = Field( - None, - title="Collection Type", - description="Type of the tissue sample collection", - ) - - disease: Optional[CommaSeparatedValues] = Field( - None, - title="Disease", - description="Disease associated with the tissue sample collection", - ) - - storageTemperature: Optional[CommaSeparatedValues] = Field( - None, - title="Storage Temperature", - description="Storage temperature of the tissue sample collection", - ) - - sampleAgeRange: Optional[CommaSeparatedValues] = Field( - None, - title="Sample Age Range", - description="Age range of the tissue sample collection", - ) - tissueSampleMetadata: Optional[TissueSampleMetadata] = Field( None, title="Tissue Sample Metadata", From ce8dc6cc13ca5265e5023ef25df6f7fe9456e0fd Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 14:59:31 +0000 Subject: [PATCH 04/21] updates --- hdr_schemata/models/HDRUK/2.2.0/schema.json | 146 ++++++++++++++++++ .../HDRUK/v2_2_0/TissuesSampleCollection.py | 8 +- 2 files changed, 148 insertions(+), 6 deletions(-) diff --git a/hdr_schemata/models/HDRUK/2.2.0/schema.json b/hdr_schemata/models/HDRUK/2.2.0/schema.json index 277f51c..6c2322e 100644 --- a/hdr_schemata/models/HDRUK/2.2.0/schema.json +++ b/hdr_schemata/models/HDRUK/2.2.0/schema.json @@ -818,6 +818,18 @@ "title": "FormatAndStandards", "type": "object" }, + "ICD_0_3": { + "anyOf": [ + { + "pattern": "^[C\\d]{3}\\.\\d{4}\\/\\d{1,4}$", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "ICD_0_3" + }, "IsPartOfEnum": { "const": "NOT APPLICABLE", "title": "IsPartOfEnum" @@ -1042,6 +1054,25 @@ ], "title": "LongDescription" }, + "MaterialTypeCategories": { + "enum": [ + "Blood", + "DNA", + "Faeces", + "Immortalized Cell Lines", + "Isolated Pathogen", + "Other", + "Plasma", + "RNA", + "Saliva", + "Serum", + "Tissue (Frozen)", + "Tissue (FFPE)", + "Urine" + ], + "title": "MaterialTypeCategories", + "type": "string" + }, "MeasuredProperty": { "title": "MeasuredProperty" }, @@ -1706,6 +1737,108 @@ ], "title": "TimeLag" }, + "TissueDataCategoriesEnum": { + "enum": [ + "Biological samples", + "Survey data", + "Imaging data", + "Medical records", + "National registries", + "Genealogical records", + "Physiological/Biochemical measurements", + "Other" + ], + "title": "TissueDataCategoriesEnum", + "type": "string" + }, + "TissueSampleMetadata": { + "properties": { + "creationDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Date when the tissue sample metadata was created", + "title": "Creation Date" + }, + "AnatomicalSiteOntologyCode": { + "anyOf": [ + { + "$ref": "#/$defs/ICD_0_3" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Ontology code for the anatomical site", + "title": "Anatomical Site Ontology Code" + } + }, + "title": "TissueSampleMetadata", + "type": "object" + }, + "TissuesSampleCollection": { + "properties": { + "dataCategories": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/TissueDataCategoriesEnum" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Data categories related to the tissue sample collection", + "title": "Data Categories" + }, + "materialType": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/MaterialTypeCategories" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Material type of the tissue sample collection", + "title": "Material Type" + }, + "tissueSampleMetadata": { + "anyOf": [ + { + "$ref": "#/$defs/TissueSampleMetadata" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Metadata related to the tissue sample", + "title": "Tissue Sample Metadata" + } + }, + "title": "TissuesSampleCollection", + "type": "object" + }, "Url": { "anyOf": [ { @@ -1998,6 +2131,19 @@ "default": null, "description": "Descriptions of all tables and data elements that can be included in the dataset", "title": "Structural Metadata" + }, + "tissuesSampleCollection": { + "anyOf": [ + { + "$ref": "#/$defs/TissuesSampleCollection" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Metadata collection for Tissue Samples datasets", + "title": "Tissues Sample Collection" } }, "required": [ diff --git a/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py b/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py index 0b2e6c8..95b6e93 100644 --- a/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py +++ b/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py @@ -3,22 +3,18 @@ from .TissueSampleMetadata import TissueSampleMetadata from hdr_schemata.definitions.HDRUK import ( TissueDataCategoriesEnum, - TissueDataCategoriesEnum, + MaterialTypeCategories, ) class TissuesSampleCollection(BaseModel): - id: Optional[str] = Field( - None, title="ID", description="ID of the tissue sample collection" - ) - dataCategories: Optional[List[TissueDataCategoriesEnum]] = Field( None, title="Data Categories", description="Data categories related to the tissue sample collection", ) - materialType: Optional[CommaSeparatedValues] = Field( + materialType: Optional[List[MaterialTypeCategories]] = Field( None, title="Material Type", description="Material type of the tissue sample collection", From ce8c07d746d60969872154114a71c5e3d085f5fa Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 15:04:36 +0000 Subject: [PATCH 05/21] update a description --- hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py b/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py index 91c2961..17cd23d 100644 --- a/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py +++ b/hdr_schemata/models/HDRUK/v2_2_0/TissueSampleMetadata.py @@ -15,5 +15,5 @@ class TissueSampleMetadata(BaseModel): AnatomicalSiteOntologyCode: Optional[ICD_0_3] = Field( None, title="Anatomical Site Ontology Code", - description="Ontology code for the anatomical site", + description="Ontology code for the anatomical site, this code must match an ICD-0-3 format", ) From b46aca4a29fd2265de32eb8d3f08affa5f22e81d Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 16:14:28 +0000 Subject: [PATCH 06/21] trigger a page build --- .github/workflows/ci.yml | 2 +- available.json | 3 +- docs/GWDM/1.1.md | 932 +++++++++++++++++++------- docs/GWDM/1.1.structure.json | 221 +++++- docs/HDRUK/2.2.0.md | 849 +++++++++++++++++++++++ docs/HDRUK/2.2.0.structure.json | 741 ++++++++++++++++++++ hdr_schemata/models/HDRUK/__init__.py | 1 + hdr_schemata/utils/create_markdown.py | 3 +- 8 files changed, 2482 insertions(+), 270 deletions(-) create mode 100644 docs/HDRUK/2.2.0.md create mode 100644 docs/HDRUK/2.2.0.structure.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b2624c..8d28e48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: deploy: runs-on: ubuntu-latest needs: test - if: success() && github.ref == 'refs/heads/master' + #if: success() && github.ref == 'refs/heads/master' steps: - uses: actions/setup-python@v2 with: diff --git a/available.json b/available.json index 574f710..8e1a78f 100644 --- a/available.json +++ b/available.json @@ -3,7 +3,8 @@ "2.1.2", "2.1.3", "2.1.0", - "2.0.2" + "2.0.2", + "2.2.0" ], "GWDM": [ "1.0", diff --git a/docs/GWDM/1.1.md b/docs/GWDM/1.1.md index 22d48f0..f8bfb5b 100644 --- a/docs/GWDM/1.1.md +++ b/docs/GWDM/1.1.md @@ -5,37 +5,52 @@ required metadata needed for the GWDM + + + ### gatewayId Need a field in Mauro that captures the datasetID to link to gateway database - or can we just use the one created in Mauro? -| title | examples | required | type | -|:----------|:-----------|:-----------|:-------| -| Gatewayid | | True | str | +| title | required | type | +|:----------|:-----------|:-------| +| Gatewayid | True | str | + + + ### gatewayPid Need a field in Mauro that captures the dataset pid to link to gateway database -| title | examples | required | type | -|:-----------|:-----------|:-----------|:-------| -| Gatewaypid | | True | str | +| title | required | type | +|:-----------|:-----------|:-------| +| Gatewaypid | True | str | + + + ### issued Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different? -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------| -| Issued | | True | datetime | +| title | required | type | +|:--------|:-----------|:---------| +| Issued | True | datetime | + + + ### modified Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different? -| title | examples | required | type | -|:---------|:-----------|:-----------|:---------| -| Modified | | True | datetime | +| title | required | type | +|:---------|:-----------|:---------| +| Modified | True | datetime | + + + ### revisions @@ -43,29 +58,42 @@ None + + + #### version Version number used for previous version of this dataset -| title | examples | required | type | -|:-----------------|:-----------|:-----------|:-------| -| revision version | | True | str | +| title | required | type | +|:-----------------|:-----------|:-------| +| revision version | True | str | + + + #### url Some url with a reference to the record of a previous version of this dataset -| title | examples | required | type | -|:-------------|:-----------|:-----------|:-------| -| revision url | | True | Url | +| title | required | type | +|:-------------|:-----------|:-------| +| revision url | True | Url | + + + ### version Dataset metadata version -| title | examples | required | type | -|:----------------|:-----------|:-----------|:-------| -| Dataset Version | ['1.1.0'] | True | str | +| title | required | type | +|:----------------|:-----------|:-------| +| Dataset Version | True | str | + +Examples: + * 1.1.0 + ## summary @@ -73,77 +101,107 @@ Summary of metadata describing key pieces of information. + + + ### title The main title of the dataset -| title | examples | required | type | -|:--------|:-----------|:-----------|:------------------------------| -| Title | | True | TwoHundredFiftyFiveCharacters | +| title | required | type | +|:--------|:-----------|:------------------------------| +| Title | True | TwoHundredFiftyFiveCharacters | + + + ### shortTitle A shorter descriptive title of the dataset -| title | examples | required | type | -|:-----------|:-----------|:-----------|:-----------| -| Shorttitle | | True | ShortTitle | +| title | required | type | +|:-----------|:-----------|:-----------| +| Shorttitle | True | ShortTitle | + + + ### doiName DOI associated to this dataset -| title | examples | required | type | -|:--------|:-----------|:-----------|:-------| -| Doiname | | True | Doi | +| title | required | type | +|:--------|:-----------|:-------| +| Doiname | True | Doi | + + + ### abstract Longer abstract detailing the dataset. -| title | examples | required | type | -|:---------|:-----------|:-----------|:-----------------| -| Abstract | | True | LongAbstractText | +| title | required | type | +|:---------|:-----------|:-----------------| +| Abstract | True | LongAbstractText | + + + ### keywords Comma separated key words associated to this dataset. -| title | examples | required | type | -|:---------|:-----------|:-----------|:---------------------| -| Keywords | | True | CommaSeparatedValues | +| title | required | type | +|:---------|:-----------|:---------------------| +| Keywords | True | CommaSeparatedValues | + + + ### controlledKeywords Keywords that have been filtered and limited -| title | examples | required | type | -|:--------------------|:-----------|:-----------|:---------------------| -| Controlled Keywords | | True | CommaSeparatedValues | +| title | required | type | +|:--------------------|:-----------|:---------------------| +| Controlled Keywords | True | CommaSeparatedValues | + + + ### contactPoint email of a person who can be the main contact point of this dataset -| title | examples | required | type | -|:--------------|:-----------|:-----------|:---------| -| Contact Point | | True | EmailStr | +| title | required | type | +|:--------------|:-----------|:---------| +| Contact Point | True | EmailStr | + + + ### datasetType What type of dataset is this? -| title | examples | required | type | -|:-------------|:-----------|:-----------|:------------| -| Dataset type | | True | DatasetType | +| title | required | type | +|:-------------|:-----------|:------------| +| Dataset type | True | DatasetType | + + + ### description Longer description of the dataset in detail -| title | examples | required | type | -|:------------|:-----------|:-----------|:----------------| -| Description | | True | LongDescription | +| title | required | type | +|:------------|:-----------|:----------------| +| Description | True | LongDescription | + + + ### publisher @@ -151,29 +209,63 @@ Link to details about the publisher of this dataset + + + #### name The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/ -| title | examples | required | type | -|:--------|:-----------|:-----------|:-------| -| Name | | True | Name | +| title | required | type | +|:--------|:-----------|:-------| +| Name | True | Name | + + + #### gatewayId The link to an ID somewhere in the gateway where more information on the publisher can be retrieved. -| title | examples | required | type | -|:---------------------|:-----------|:-----------|:-------| -| Publisher gateway id | | False | str | +| title | required | type | +|:---------------------|:-----------|:-------| +| Publisher gateway id | False | str | + + + #### rorId The Research Organization Registry (ROR) for the organisation, if applicable -| title | examples | required | type | -|:------------------------------------------|:-----------|:-----------|:-------| -| Research Organization Registry Identifier | | False | str | +| title | required | type | +|:------------------------------------------|:-----------|:-------| +| Research Organization Registry Identifier | False | str | + + + + +### populationSize + +Summary population size of the cohort + +| title | required | type | +|:----------------|:-----------|:-------| +| Population size | False | int | + + + + +### datasetSubType + +What us the subtype for this dataset? + +| title | required | type | +|:-----------------|:-----------|:------------| +| Dataset sub type | False | DatasetType | + + + ## coverage @@ -181,93 +273,129 @@ Observational, Spatial and Temporal coverage + + + ### spatial List of countries where the data was taken from -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Spatial | | False | CommaSeparatedValues | +| title | required | type | +|:--------|:-----------|:---------------------| +| Spatial | False | CommaSeparatedValues | + + + ### pathway Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. -| title | examples | required | type | -|:--------|:-----------|:-----------|:----------------| -| Pathway | | False | LongDescription | +| title | required | type | +|:--------|:-----------|:----------------| +| Pathway | False | LongDescription | + + + ### followup What is the typical time span that a patient appears in the dataset (follow up period) -| title | examples | required | type | -|:---------|:-----------|:-----------|:---------| -| Followup | | False | Followup | +| title | required | type | +|:---------|:-----------|:---------| +| Followup | False | Followup | + + + ### typicalAgeRange Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). -| title | examples | required | type | -|:------------------|:-----------|:-----------|:---------| -| Typical Age Range | | False | AgeRange | +| title | required | type | +|:------------------|:-----------|:---------| +| Typical Age Range | False | AgeRange | + + + ### gender Male, Female, Other -| title | examples | required | type | -|:--------|:-----------|:-----------|:-------| -| Gender | | False | Gender | +| title | required | type | +|:--------|:-----------|:-------| +| Gender | False | Gender | + + + ### biologicalsamples Blood, Saliva, Urine, Other -| title | examples | required | type | -|:-------------------|:-----------|:-----------|:------------------| -| Biological Samples | | False | BiologicalSamples | +| title | required | type | +|:-------------------|:-----------|:------------------| +| Biological Samples | False | BiologicalSamples | + + + ### psychological Mental health, Cognitive function -| title | examples | required | type | -|:--------------|:-----------|:-----------|:--------------| -| Psychological | | False | Psychological | +| title | required | type | +|:--------------|:-----------|:--------------| +| Psychological | False | Psychological | + + + ### physical Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive -| title | examples | required | type | -|:---------|:-----------|:-----------|:---------| -| Physical | | False | Physical | +| title | required | type | +|:---------|:-----------|:---------| +| Physical | False | Physical | + + + ### anthropometric Height, Weight, Waist circumference, Hip circumference, Blood pressure -| title | examples | required | type | -|:---------------|:-----------|:-----------|:---------------| -| Anthropometric | | False | Anthropometric | +| title | required | type | +|:---------------|:-----------|:---------------| +| Anthropometric | False | Anthropometric | + + + ### lifestyle Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol -| title | examples | required | type | -|:----------|:-----------|:-----------|:-----------| -| Lifestyle | | False | Lifestyles | +| title | required | type | +|:----------|:-----------|:-----------| +| Lifestyle | False | Lifestyles | + + + ### socioeconomic Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support -| title | examples | required | type | -|:---------------|:-----------|:-----------|:--------------| -| Socio-economic | | False | SocioEconomic | +| title | required | type | +|:---------------|:-----------|:--------------| +| Socio-economic | False | SocioEconomic | + + + ## provenance @@ -275,35 +403,50 @@ Provenance information + + + ### origin None + + + #### purpose Indicates the purpose(s) that the dataset was collected. -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Purpose | | False | CommaSeparatedValues | +| title | required | type | +|:--------|:-----------|:---------------------| +| Purpose | False | CommaSeparatedValues | + + + #### source Indicates the source of the data extraction -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Source | | False | CommaSeparatedValues | +| title | required | type | +|:--------|:-----------|:---------------------| +| Source | False | CommaSeparatedValues | + + + #### collectionSituation Indicate the setting(s) where data was collected. Multiple settings may be provided -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Setting | | False | CommaSeparatedValues | +| title | required | type | +|:--------|:-----------|:---------------------| +| Setting | False | CommaSeparatedValues | + + + ### temporal @@ -311,45 +454,63 @@ None + + + #### startDate The start of the time period that the dataset provides coverage for -| title | examples | required | type | -|:-----------|:-----------|:-----------|:-------| -| Start Date | | True | date | +| title | required | type | +|:-----------|:-----------|:-------| +| Start Date | True | date | + + + #### endDate The end of the time period that the dataset provides coverage for -| title | examples | required | type | -|:---------|:-----------|:-----------|:-------| -| End Date | | False | date | +| title | required | type | +|:---------|:-----------|:-------| +| End Date | False | date | + + + #### timeLag Rypical time-lag between an event and the data for that event appearing in the dataset -| title | examples | required | type | -|:---------|:-----------|:-----------|:--------| -| Time Lag | | True | TimeLag | +| title | required | type | +|:---------|:-----------|:--------| +| Time Lag | True | TimeLag | + + + #### accrualPeriodicity frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. -| title | examples | required | type | -|:------------|:-----------|:-----------|:------------| -| Periodicity | | True | Periodicity | +| title | required | type | +|:------------|:-----------|:------------| +| Periodicity | True | Periodicity | + + + #### distributionReleaseDate Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. -| title | examples | required | type | -|:-------------|:-----------|:-----------|:-------| -| Release Date | | False | date | +| title | required | type | +|:-------------|:-----------|:-------| +| Release Date | False | date | + + + ## accessibility @@ -357,27 +518,39 @@ Accessibility information. + + + ### usage This section includes information about how the data can be used and how it is currently being used + + + #### dataUseLimitation Any restrictions to its usage -| title | examples | required | type | -|:--------------------|:-----------|:-----------|:---------------------| -| Data Use Limitation | | True | CommaSeparatedValues | +| title | required | type | +|:--------------------|:-----------|:---------------------| +| Data Use Limitation | True | CommaSeparatedValues | + + + #### dataUseRequirement Any requirements needed for data usage -| title | examples | required | type | -|:----------------------|:-----------|:-----------|:---------------------| -| Data Use Requirements | | True | CommaSeparatedValues | +| title | required | type | +|:----------------------|:-----------|:---------------------| +| Data Use Requirements | True | CommaSeparatedValues | + + + #### resourceCreator @@ -385,29 +558,41 @@ Who has created this resource + + + ##### name The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/ -| title | examples | required | type | -|:--------|:-----------|:-----------|:-------| -| Name | | True | Name | +| title | required | type | +|:--------|:-----------|:-------| +| Name | True | Name | + + + ##### gatewayId The link to an ID somewhere in the gateway where more information on the publisher can be retrieved. -| title | examples | required | type | -|:---------------------|:-----------|:-----------|:-------| -| Publisher gateway id | | False | str | +| title | required | type | +|:---------------------|:-----------|:-------| +| Publisher gateway id | False | str | + + + ##### rorId The Research Organization Registry (ROR) for the organisation, if applicable -| title | examples | required | type | -|:------------------------------------------|:-----------|:-----------|:-------| -| Research Organization Registry Identifier | | False | str | +| title | required | type | +|:------------------------------------------|:-----------|:-------| +| Research Organization Registry Identifier | False | str | + + + ### access @@ -415,61 +600,85 @@ This section includes information about data access + + + #### accessRights Optional link(s) or a description of where the license associated to accessing this dataset -| title | examples | required | type | -|:--------------|:-----------|:-----------|:---------------------| -| Access Rights | | True | CommaSeparatedValues | +| title | required | type | +|:--------------|:-----------|:---------------------| +| Access Rights | True | CommaSeparatedValues | + + + #### accessService -| title | examples | required | type | -|:---------------|:-----------|:-----------|:----------------| -| Access Service | | False | LongDescription | +| title | required | type | +|:---------------|:-----------|:----------------| +| Access Service | False | LongDescription | + + + #### accessRequestCost -| title | examples | required | type | -|:---------------------------------|:-----------|:-----------|:----------------| -| Organisation Access Request Cost | | False | LongDescription | +| title | required | type | +|:---------------------------------|:-----------|:----------------| +| Organisation Access Request Cost | False | LongDescription | + + + #### deliveryLeadTime An arbitrary guess at the time to gain access to the dataset... -| title | examples | required | type | -|:------------------------|:-----------|:-----------|:-----------------| -| Access Request Duration | | False | DeliveryLeadTime | +| title | required | type | +|:------------------------|:-----------|:-----------------| +| Access Request Duration | False | DeliveryLeadTime | + + + #### jurisdiction Comma separated country codes of where the data jurisdiction is. -| title | examples | required | type | -|:-------------|:-----------|:-----------|:---------------------| -| Jurisdiction | | True | CommaSeparatedValues | +| title | required | type | +|:-------------|:-----------|:---------------------| +| Jurisdiction | True | CommaSeparatedValues | + + + #### dataController Name of the data controller -| title | examples | required | type | -|:----------------|:-----------|:-----------|:----------------| -| Data Controller | | True | LongDescription | +| title | required | type | +|:----------------|:-----------|:----------------| +| Data Controller | True | LongDescription | + + + #### dataProcessor Name of the data processors -| title | examples | required | type | -|:---------------|:-----------|:-----------|:----------------| -| Data Processor | | False | LongDescription | +| title | required | type | +|:---------------|:-----------|:----------------| +| Data Processor | False | LongDescription | + + + ### formatAndStandards @@ -477,37 +686,52 @@ Section includes technical attributes for language vocabularies, sizes etc. and + + + #### vocabularyEncodingSchemes Code value of the ontology vocabulary encoding -| title | examples | required | type | -|:----------------------|:-----------|:-----------|:---------------------| -| Controlled Vocabulary | | True | CommaSeparatedValues | +| title | required | type | +|:----------------------|:-----------|:---------------------| +| Controlled Vocabulary | True | CommaSeparatedValues | + + + #### conformsTo What the vocabulary conforms to. -| title | examples | required | type | -|:------------|:-----------|:-----------|:---------------------| -| Conforms To | | True | CommaSeparatedValues | +| title | required | type | +|:------------|:-----------|:---------------------| +| Conforms To | True | CommaSeparatedValues | + + + #### languages Language code(s) of the language of the dataset metadata and underlying data is made available. -| title | examples | required | type | -|:-----------------|:-----------|:-----------|:---------------------| -| Language Code(s) | | True | CommaSeparatedValues | +| title | required | type | +|:-----------------|:-----------|:---------------------| +| Language Code(s) | True | CommaSeparatedValues | + + + #### formats Format(s) the dataset can be made available in -| title | examples | required | type | -|:---------------|:-----------|:-----------|:---------------------| -| Dataset Format | | True | CommaSeparatedValues | +| title | required | type | +|:---------------|:-----------|:---------------------| +| Dataset Format | True | CommaSeparatedValues | + + + ## linkage @@ -515,45 +739,63 @@ Linkage and enrichment. + + + ### isGeneratedUsing ?? -| title | examples | required | type | -|:-------------------|:-----------|:-----------|:---------------------| -| Is Generated Using | | False | CommaSeparatedValues | +| title | required | type | +|:-------------------|:-----------|:---------------------| +| Is Generated Using | False | CommaSeparatedValues | + + + ### associatedMedia Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question -| title | examples | required | type | -|:-----------------|:-----------|:-----------|:---------------------| -| Associated Media | | False | CommaSeparatedValues | +| title | required | type | +|:-----------------|:-----------|:---------------------| +| Associated Media | False | CommaSeparatedValues | + + + ### dataUses ?? -| title | examples | required | type | -|:----------|:-----------|:-----------|:---------------------| -| Data Uses | | False | CommaSeparatedValues | +| title | required | type | +|:----------|:-----------|:---------------------| +| Data Uses | False | CommaSeparatedValues | + + + ### isReferenceIn Rhe keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. -| title | examples | required | type | -|:----------------|:-----------|:-----------|:---------------------| -| Is Reference in | | False | CommaSeparatedValues | +| title | required | type | +|:----------------|:-----------|:---------------------| +| Is Reference in | False | CommaSeparatedValues | + + + ### tools URL of any analysis tools or models that have been created for this dataset and are available for further use -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Tools | | False | CommaSeparatedValues | +| title | required | type | +|:--------|:-----------|:---------------------| +| Tools | False | CommaSeparatedValues | + + + ### datasetLinkage @@ -561,120 +803,340 @@ Dataset Linkage copied over from + + + #### isDerivedFrom Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset -| title | examples | required | type | -|:------------|:-----------|:-----------|:---------------------| -| Derivations | | False | CommaSeparatedValues | +| title | required | type | +|:------------|:-----------|:---------------------| +| Derivations | False | CommaSeparatedValues | + + + #### isPartOf If the dataset is part of a group or family -| title | examples | required | type | -|:----------|:-----------|:-----------|:---------------------| -| Is PartOf | | False | CommaSeparatedValues | +| title | required | type | +|:----------|:-----------|:---------------------| +| Is PartOf | False | CommaSeparatedValues | + + + #### isMemberOf Dataset is a member of XXX(?) -| title | examples | required | type | -|:------------|:-----------|:-----------|:---------------------| -| Is MemberOf | | False | CommaSeparatedValues | +| title | required | type | +|:------------|:-----------|:---------------------| +| Is MemberOf | False | CommaSeparatedValues | + + + #### linkedDatasets Links to other datasets. -| title | examples | required | type | -|:----------------|:-----------|:-----------|:---------------------| -| Linked Datasets | | False | CommaSeparatedValues | +| title | required | type | +|:----------------|:-----------|:---------------------| +| Linked Datasets | False | CommaSeparatedValues | + + + ### investigations Please provide the keystone paper associated with the dataset. -| title | examples | required | type | -|:---------------|:-----------|:-----------|:---------------------| -| Investigations | | False | CommaSeparatedValues | +| title | required | type | +|:---------------|:-----------|:---------------------| +| Investigations | False | CommaSeparatedValues | + + + ## observations Obsservations -| title | examples | required | type | -|:-------------|:-----------|:-----------|:------------| -| Observations | | False | Observation | +| title | required | type | +|:-------------|:-----------|:------------| +| Observations | False | Observation | + + + ## structuralMetadata Descriptions of all tables and data elements that can be included in the dataset -| title | examples | required | type | -|:--------------------|:-----------|:-----------|:----------| -| Structural Metadata | | False | DataTable | +| title | required | type | +|:--------------------|:-----------|:----------| +| Structural Metadata | False | DataTable | + + + + +## tissuesSampleCollection + +Metadata collection for Tissue Samples datasets + + + + + + +### id + +ID of the tissue sample collection + +| title | required | type | +|:--------|:-----------|:---------------------| +| ID | False | CommaSeparatedValues | + + + + +### dataCategories + +Data categories related to the tissue sample collection + +| title | required | type | +|:----------------|:-----------|:---------------------| +| Data Categories | False | CommaSeparatedValues | + + + + +### materialType + +Material type of the tissue sample collection + +| title | required | type | +|:--------------|:-----------|:---------------------| +| Material Type | False | CommaSeparatedValues | + + + + +### accessConditions + +Access conditions for the tissue sample collection + +| title | required | type | +|:------------------|:-----------|:---------------------| +| Access Conditions | False | CommaSeparatedValues | -## omopIDs -Collection of OMOP IDs that are contained within the dataset. + + +### collectionType + +Type of the tissue sample collection + +| title | required | type | +|:----------------|:-----------|:---------------------| +| Collection Type | False | CommaSeparatedValues | + + + + +### disease + +Disease associated with the tissue sample collection + +| title | required | type | +|:--------|:-----------|:---------------------| +| Disease | False | CommaSeparatedValues | + + + + +### storageTemperature + +Storage temperature of the tissue sample collection + +| title | required | type | +|:--------------------|:-----------|:---------------------| +| Storage Temperature | False | CommaSeparatedValues | + + + + +### sampleAgeRange + +Age range of the tissue sample collection + +| title | required | type | +|:-----------------|:-----------|:---------------------| +| Sample Age Range | False | CommaSeparatedValues | + + + + +### tissueSampleMetadata + +Metadata related to the tissue sample + + + + + + +#### id + +ID of the tissue sample metadata + +| title | required | type | +|:------------|:-----------|:-------| +| Metadata ID | False | str | + + + + +#### sampleDonor + +Information about the sample donor + + + + + + +##### id + +ID of the sample donor +| title | required | type | +|:---------|:-----------|:-------| +| Donor ID | False | str | -### measurements -OMOP Concept IDs for measured quantities. E.g. 44810410 - https://athena.ohdsi.org/search-terms/terms/44810410 + +##### sex + +Sex of the sample donor -| title | examples | required | type | -|:------------------|:-----------|:-----------|:-----------------------| -| OMOP Measurements | | False | CommaSeparatedIntegers | +| title | required | type | +|:----------|:-----------|:-------| +| Donor Sex | False | str | + -### drug -OMOP Concept IDs for drug exposures. E.g. 602396 - https://athena.ohdsi.org/search-terms/terms/602396 + +##### birthDate + +Date of birth of the sample donor -| title | examples | required | type | -|:-----------|:-----------|:-----------|:-----------------------| -| OMOP Drugs | | False | CommaSeparatedIntegers | +| title | required | type | +|:-----------------|:-----------|:-------| +| Donor birth date | False | date | + -### observations -OMOP Concept IDs for observations. + +##### dataCategories + +Data categories related to the sample donor -| title | examples | required | type | -|:------------------|:-----------|:-----------|:-----------------------| -| OMOP Observations | | False | CommaSeparatedIntegers | +| title | required | type | +|:----------------------|:-----------|:---------------------| +| Donor Data Categories | False | CommaSeparatedValues | + -### specimens -OMOP Concept IDs for specimens. + +#### sampleType + +Type of the tissue sample -| title | examples | required | type | -|:---------------|:-----------|:-----------|:-----------------------| -| OMOP Specimens | | False | CommaSeparatedIntegers | +| title | required | type | +|:------------|:-----------|:---------------------| +| Sample Type | False | CommaSeparatedValues | + -### conditions -OMOP Concept IDs for condition occurrences. + +#### storageTemperature + +Storage temperature of the tissue sample -| title | examples | required | type | -|:---------------------------|:-----------|:-----------|:-----------------------| -| OMOP Condition Occurrences | | False | CommaSeparatedIntegers | +| title | required | type | +|:--------------------|:-----------|:-------| +| Storage Temperature | False | str | + -### procedures -OMOP Concept IDs for procedure occurrences. + +#### creationDate + +Date when the tissue sample metadata was created -| title | examples | required | type | -|:---------------------------|:-----------|:-----------|:-----------------------| -| OMOP Procedure Occurrences | | False | CommaSeparatedIntegers | +| title | required | type | +|:--------------|:-----------|:-------| +| Creation Date | False | date | + -### device_exposures -OMOP Concept IDs for device exposures. + +#### anatomicalSiteOntologyCode + +Ontology code for the anatomical site -| title | examples | required | type | -|:--------|:-----------|:-----------|:-----------------------| -| OMOP | | False | CommaSeparatedIntegers | +| title | required | type | +|:------------------------------|:-----------|:---------------------| +| Anatomical Site Ontology Code | False | CommaSeparatedValues | + + + + +#### anatomicalSiteOntologyDescription + +Ontology description for the anatomical site + +| title | required | type | +|:-------------------------------------|:-----------|:---------------------| +| Anatomical Site Ontology Description | False | CommaSeparatedValues | + + + + +#### anatomicalSiteFreeText + +Free text describing the anatomical site + +| title | required | type | +|:--------------------------|:-----------|:---------------------| +| Anatomical Site Free Text | False | CommaSeparatedValues | + + + + +#### sampleContentDiagnosis + +Diagnosis related to the sample content + +| title | required | type | +|:-------------------------|:-----------|:---------------------| +| Sample Content Diagnosis | False | CommaSeparatedValues | + + + + +#### useRestrictions + +Restrictions on the use of the tissue sample + +| title | required | type | +|:-----------------|:-----------|:---------------------| +| Use Restrictions | False | CommaSeparatedValues | + + + diff --git a/docs/GWDM/1.1.structure.json b/docs/GWDM/1.1.structure.json index 961e4de..0758d7c 100644 --- a/docs/GWDM/1.1.structure.json +++ b/docs/GWDM/1.1.structure.json @@ -200,6 +200,23 @@ "type": "str" } ] + }, + { + "name": "populationSize", + "required": false, + "title": "Population size", + "description": "Summary population size of the cohort", + "examples": null, + "type": "int" + }, + { + "name": "datasetSubType", + "required": false, + "title": "Dataset sub type", + "description": "What us the subtype for this dataset?", + "examples": null, + "type": "DatasetType", + "subItems": [] } ] }, @@ -726,75 +743,215 @@ "type": "DataTable" }, { - "name": "omopIDs", + "name": "tissuesSampleCollection", "required": false, - "title": "OMOP IDs", - "description": "Collection of OMOP IDs that are contained within the dataset.", + "title": "Tissues Sample Collection", + "description": "Metadata collection for Tissue Samples datasets", "examples": null, - "type": "OmopIDs", + "type": "TissuesSampleCollection", "subItems": [ { - "name": "measurements", + "name": "id", "required": false, - "title": "OMOP Measurements", - "description": "OMOP Concept IDs for measured quantities. E.g. 44810410 - https://athena.ohdsi.org/search-terms/terms/44810410", + "title": "ID", + "description": "ID of the tissue sample collection", "examples": null, - "type": "CommaSeparatedIntegers", + "type": "CommaSeparatedValues", "subItems": [] }, { - "name": "drug", + "name": "dataCategories", "required": false, - "title": "OMOP Drugs", - "description": "OMOP Concept IDs for drug exposures. E.g. 602396 - https://athena.ohdsi.org/search-terms/terms/602396", + "title": "Data Categories", + "description": "Data categories related to the tissue sample collection", "examples": null, - "type": "CommaSeparatedIntegers", + "type": "CommaSeparatedValues", "subItems": [] }, { - "name": "observations", + "name": "materialType", "required": false, - "title": "OMOP Observations", - "description": "OMOP Concept IDs for observations.", + "title": "Material Type", + "description": "Material type of the tissue sample collection", "examples": null, - "type": "CommaSeparatedIntegers", + "type": "CommaSeparatedValues", "subItems": [] }, { - "name": "specimens", + "name": "accessConditions", "required": false, - "title": "OMOP Specimens", - "description": "OMOP Concept IDs for specimens.", + "title": "Access Conditions", + "description": "Access conditions for the tissue sample collection", "examples": null, - "type": "CommaSeparatedIntegers", + "type": "CommaSeparatedValues", "subItems": [] }, { - "name": "conditions", + "name": "collectionType", "required": false, - "title": "OMOP Condition Occurrences", - "description": "OMOP Concept IDs for condition occurrences.", + "title": "Collection Type", + "description": "Type of the tissue sample collection", "examples": null, - "type": "CommaSeparatedIntegers", + "type": "CommaSeparatedValues", "subItems": [] }, { - "name": "procedures", + "name": "disease", "required": false, - "title": "OMOP Procedure Occurrences", - "description": "OMOP Concept IDs for procedure occurrences.", + "title": "Disease", + "description": "Disease associated with the tissue sample collection", "examples": null, - "type": "CommaSeparatedIntegers", + "type": "CommaSeparatedValues", "subItems": [] }, { - "name": "device_exposures", + "name": "storageTemperature", "required": false, - "title": "OMOP ", - "description": "OMOP Concept IDs for device exposures.", + "title": "Storage Temperature", + "description": "Storage temperature of the tissue sample collection", "examples": null, - "type": "CommaSeparatedIntegers", + "type": "CommaSeparatedValues", "subItems": [] + }, + { + "name": "sampleAgeRange", + "required": false, + "title": "Sample Age Range", + "description": "Age range of the tissue sample collection", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "tissueSampleMetadata", + "required": false, + "title": "Tissue Sample Metadata", + "description": "Metadata related to the tissue sample", + "examples": null, + "type": "TissueSampleMetadata", + "subItems": [ + { + "name": "id", + "required": false, + "title": "Metadata ID", + "description": "ID of the tissue sample metadata", + "examples": null, + "type": "str" + }, + { + "name": "sampleDonor", + "required": false, + "title": "Sample Donor", + "description": "Information about the sample donor", + "examples": null, + "type": "SampleDonor", + "subItems": [ + { + "name": "id", + "required": false, + "title": "Donor ID", + "description": "ID of the sample donor", + "examples": null, + "type": "str" + }, + { + "name": "sex", + "required": false, + "title": "Donor Sex", + "description": "Sex of the sample donor", + "examples": null, + "type": "str" + }, + { + "name": "birthDate", + "required": false, + "title": "Donor birth date", + "description": "Date of birth of the sample donor", + "examples": null, + "type": "date" + }, + { + "name": "dataCategories", + "required": false, + "title": "Donor Data Categories", + "description": "Data categories related to the sample donor", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + } + ] + }, + { + "name": "sampleType", + "required": false, + "title": "Sample Type", + "description": "Type of the tissue sample", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "storageTemperature", + "required": false, + "title": "Storage Temperature", + "description": "Storage temperature of the tissue sample", + "examples": null, + "type": "str" + }, + { + "name": "creationDate", + "required": false, + "title": "Creation Date", + "description": "Date when the tissue sample metadata was created", + "examples": null, + "type": "date" + }, + { + "name": "anatomicalSiteOntologyCode", + "required": false, + "title": "Anatomical Site Ontology Code", + "description": "Ontology code for the anatomical site", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "anatomicalSiteOntologyDescription", + "required": false, + "title": "Anatomical Site Ontology Description", + "description": "Ontology description for the anatomical site", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "anatomicalSiteFreeText", + "required": false, + "title": "Anatomical Site Free Text", + "description": "Free text describing the anatomical site", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "sampleContentDiagnosis", + "required": false, + "title": "Sample Content Diagnosis", + "description": "Diagnosis related to the sample content", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "useRestrictions", + "required": false, + "title": "Use Restrictions", + "description": "Restrictions on the use of the tissue sample", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + } + ] } ] } diff --git a/docs/HDRUK/2.2.0.md b/docs/HDRUK/2.2.0.md new file mode 100644 index 0000000..538182d --- /dev/null +++ b/docs/HDRUK/2.2.0.md @@ -0,0 +1,849 @@ + +## identifier + +System dataset identifier + +| title | required | type | +|:-------------------|:-----------|:-------| +| Dataset identifier | True | Uuidv4 | + +Examples: + * ['226fb3f1-4471-400a-8c39-2b66d46a39b6', 'https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6'] + + +## version + +Dataset metadata version + +| title | required | type | +|:----------------|:-----------|:-------| +| Dataset Version | True | Semver | + +Examples: + * 1.1.0 + + +## revisions + +Revisions of Dataset metadata + + + + + + +### version + +Semantic Version + +| title | required | type | +|:--------|:-----------|:-------| +| | True | Semver | + + + + +### url + +URL endpoint to obtain the version + +| title | required | type | +|:--------|:-----------|:-------| +| | True | Url | + + + + +## issued + +Dataset Metadata Creation Date + +| title | required | type | +|:--------------|:-----------|:---------| +| Creation Date | True | datetime | + + + + +## modified + +Dataset Metadata Creation Date + +| title | required | type | +|:------------------|:-----------|:---------| +| Modification Date | True | datetime | + + + + +## summary + +Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP. + + + + + + +### title + +Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers. + +| title | required | type | +|:--------|:-----------|:--------------------------| +| Title | True | OneHundredFiftyCharacters | + +Examples: + * ['North West London COVID-19 Patient Level Situation Report'] + + +### abstract + +Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible + +| title | required | type | +|:-----------------|:-----------|:-------------| +| Dataset Abstract | True | AbstractText | + +Examples: + * CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web® including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice. + + +### publisher + +This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank. + + + + + + +#### identifier + +Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the “suggest and institute” function here: https://www.grid.ac/institutes# + +| title | required | type | +|:------------------------|:-----------|:-------| +| Organisation Identifier | False | Url | + + + + +#### name + +Name of the organisation + +| title | required | type | +|:------------------|:-----------|:--------------------------| +| Organisation Name | True | OneHundredFiftyCharacters | + + + + +#### logo + +Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. + +| title | required | type | +|:------------------|:-----------|:-------| +| Organisation Logo | False | Url | + + + + +#### description + +Please provide a URL that describes the organisation. + +| title | required | type | +|:-------------------------|:-----------|:------------| +| Organisation Description | False | Description | + + + + +#### contactPoint + +Organisation contact point(s) + +| title | required | type | +|:---------------------------|:-----------|:-------------| +| Organisation Contact Point | True | EmailAddress | + + + + +#### memberOf + +Please indicate if the organisation is an Alliance Member or a Hub. + +| title | required | type | +|:------------------------|:-----------|:---------| +| Organisation Membership | False | MemberOf | + + + + +### contactPoint + +Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose. + +| title | required | type | +|:--------------|:-----------|:-------------| +| Contact Point | True | EmailAddress | + +Examples: + * SAILDatabank@swansea.ac.uk + + +### keywords + +Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users. + +| title | required | type | +|:---------|:-----------|:---------------------| +| Keywords | True | CommaSeparatedValues | + + + + +### alternateIdentifiers + +Alternate dataset identifiers or local identifiers + +| title | required | type | +|:------------------------------|:-----------|:---------------------| +| Alternate dataset identifiers | False | CommaSeparatedValues | + + + + +### doiName + +All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI. + +| title | required | type | +|:--------------------------|:-----------|:-------| +| Digital Object Identifier | False | Doi | + +Examples: + * 10.3399/bjgp17X692645 + + +## documentation + +Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media. + + + + + + +### description + +A free-text description of the record. + +| title | required | type | +|:------------|:-----------|:------------| +| Description | False | Description | + + + + +### associatedMedia + +Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal. + +| title | required | type | +|:-----------------|:-----------|:---------------------| +| Associated Media | False | CommaSeparatedValues | + +Examples: + * PDF Document that describes study protocol + + +### isPartOf + +Please complete only if the dataset is part of a group or family + +| title | required | type | +|:--------|:-----------|:---------------------| +| Group | False | CommaSeparatedValues | + +Examples: + * Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS). + + +## coverage + +This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data. + + + + + + +### spatial + +The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race. + +| title | required | type | +|:--------------------|:-----------|:---------------------| +| Geographic Coverage | False | CommaSeparatedValues | + +Examples: + * https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html + + +### typicalAgeRange + +Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). + +| title | required | type | +|:----------|:-----------|:---------| +| Age Range | False | AgeRange | + + + + +### physicalSampleAvailability + +Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide “AVAILABILITY TO BE CONFIRMED”. If samples are not available, then please provide “NOT AVAILABLE”. + +| title | required | type | +|:-----------------------------|:-----------|:---------------------| +| Physical Sample Availability | False | CommaSeparatedValues | + +Examples: + * BONE MARROW + + +### followup + +If known, what is the typical time span that a patient appears in the dataset (follow up period) + +| title | required | type | +|:---------|:-----------|:---------| +| Followup | False | Followup | + + + + +### pathway + +Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. + +| title | required | type | +|:--------|:-----------|:------------| +| Pathway | False | Description | + + + + +## provenance + +Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness. + + + + + + +### origin + +None + + + + + + +#### purpose + +Pleases indicate the purpose(s) that the dataset was collected. + +| title | required | type | +|:--------|:-----------|:---------------------| +| Purpose | False | CommaSeparatedValues | + + + + +#### source + +Pleases indicate the source of the data extraction + +| title | required | type | +|:--------|:-----------|:---------------------| +| Source | False | CommaSeparatedValues | + + + + +#### collectionSituation + +Pleases indicate the setting(s) where data was collected. Multiple settings may be provided + +| title | required | type | +|:--------|:-----------|:---------------------| +| Setting | False | CommaSeparatedValues | + + + + +### temporal + +None + + + + + + +#### distributionReleaseDate + +Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020. + +| title | required | type | +|:-------------|:-----------|:-------| +| Release Date | False | date | + + + + +#### startDate + +The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. + +| title | required | type | +|:-----------|:-----------|:-------| +| Start Date | True | date | + + + + +#### endDate + +The end of the time period that the dataset provides coverage for. If the dataset is “Continuous” and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information. + +| title | required | type | +|:---------|:-----------|:-------| +| End Date | False | date | + + + + +#### timeLag + +Please indicate the typical time-lag between an event and the data for that event appearing in the dataset + +| title | required | type | +|:---------|:-----------|:--------| +| Time Lag | True | TimeLag | + + + + +#### publishingFrequency + +Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/ + +| title | required | type | +|:---------------------|:-----------|:------------| +| Publishing Frequency | True | Periodicity | + + + + +## accessibility + +Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets. + + + + + + +### usage + +This section includes information about how the data can be used and how it is currently being used + + + + + + +#### dataUseLimitation + +Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE + +| title | required | type | +|:--------------------|:-----------|:---------------------| +| Data Use Limitation | False | CommaSeparatedValues | + + + + +#### dataUseRequirements + +Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information. + +| title | required | type | +|:----------------------|:-----------|:---------------------| +| Data Use Requirements | False | CommaSeparatedValues | + + + + +#### resourceCreator + +Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided. + +| title | required | type | +|:----------------------|:-----------|:-----------------| +| Citation Requirements | False | ShortDescription | + + + + +#### investigations + +None + +| title | required | type | +|:---------------|:-----------|:---------------------| +| Investigations | False | CommaSeparatedValues | + + + + +#### isReferencedBy + +Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list. + +| title | required | type | +|:----------|:-----------|:-------| +| Citations | False | Doi | + + + + +### access + +This section includes information about data access + + + + + + +#### accessRights + +None + +| title | required | type | +|:--------------|:-----------|:----------------| +| Access Rights | True | LongDescription | + + + + +#### accessService + +Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset. + +| title | required | type | +|:---------------|:-----------|:----------------| +| Access Service | False | LongDescription | + +Examples: + * https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide + + +#### accessRequestCost + +Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian. + +| title | required | type | +|:---------------------------------|:-----------|:----------------| +| Organisation Access Request Cost | False | LongDescription | + + + + +#### deliveryLeadTime + +Please provide an indication of the typical processing times based on the types of requests typically received. + +| title | required | type | +|:------------------------|:-----------|:-----------------| +| Access Request Duration | False | DeliveryLeadTime | + + + + +#### jurisdiction + +Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored. + +| title | required | type | +|:-------------|:-----------|:---------------------| +| Jurisdiction | True | CommaSeparatedValues | + + + + +#### dataController + +Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed. + +| title | required | type | +|:----------------|:-----------|:----------------| +| Data Controller | True | LongDescription | + + + + +#### dataProcessor + +A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller. + +| title | required | type | +|:---------------|:-----------|:----------------| +| Data Processor | False | LongDescription | + + + + +### formatAndStandards + +Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset. + + + + + + +#### vocabularyEncodingScheme + +List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided. + +| title | required | type | +|:----------------------|:-----------|:---------------------| +| Controlled Vocabulary | True | CommaSeparatedValues | + + + + +#### conformsTo + +List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. + +| title | required | type | +|:------------|:-----------|:---------------------| +| Conforms To | True | CommaSeparatedValues | + + + + +#### language + +This should list all the languages in which the dataset metadata and underlying data is made available. + +| title | required | type | +|:---------|:-----------|:---------------------| +| Language | True | CommaSeparatedValues | + + + + +#### format + +If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format. + +| title | required | type | +|:--------|:-----------|:---------------------| +| Format | True | CommaSeparatedValues | + + + + +## enrichmentAndLinkage + +This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers. + + + + + + +### qualifiedRelation + +If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate “ALL” and the onboarding portal will automate linkage across the datasets submitted. + +| title | required | type | +|:----------------|:-----------|:---------------------| +| Linked Datasets | False | CommaSeparatedValues | + + + + +### derivation + +Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset. + +| title | required | type | +|:------------|:-----------|:---------------------| +| Derivations | False | CommaSeparatedValues | + + + + +### tools + +Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/ + +| title | required | type | +|:--------|:-----------|:---------------------| +| Tools | False | CommaSeparatedValues | + + + + +## observations + +Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: “2017” + + + + + + +### observedNode + +Please select one of the following statistical populations for you observation + +| title | required | type | +|:-----------------------|:-----------|:---------------------------------| +| Statistical Population | True | StatisticalPopulationConstrained | + +Examples: + * PERSONS + + +### measuredValue + +Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset. + +| title | required | type | +|:---------------|:-----------|:-------| +| Measured Value | True | int | + + + + +### disambiguatingDescription + +If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type. + +| title | required | type | +|:---------------------------|:-----------|:-------------| +| Disambiguating Description | False | AbstractText | + + + + +### observationDate + +Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations. + +| title | required | type | +|:-----------------|:-----------|:-------| +| Observation Date | True | date | + + + + +### measuredProperty + +Initially this will be defaulted to "COUNT" + +| title | required | type | +|:------------------|:-----------|:-----------------| +| Measured Property | True | MeasuredProperty | + + + + +## structuralMetadata + +Descriptions of all tables and data elements that can be included in the dataset + +| title | required | type | +|:--------------------|:-----------|:----------| +| Structural Metadata | False | DataClass | + + + + +## tissuesSampleCollection + +Metadata collection for Tissue Samples datasets + + + + + + +### dataCategories + +Data categories related to the tissue sample collection + +| title | required | type | +|:----------------|:-----------|:-------------------------| +| Data Categories | False | TissueDataCategoriesEnum | + + + + +### materialType + +Material type of the tissue sample collection + +| title | required | type | +|:--------------|:-----------|:-----------------------| +| Material Type | False | MaterialTypeCategories | + + + + +### tissueSampleMetadata + +Metadata related to the tissue sample + + + + + + +#### creationDate + +Date when the tissue sample metadata was created + +| title | required | type | +|:--------------|:-----------|:-------| +| Creation Date | False | date | + + + + +#### AnatomicalSiteOntologyCode + +Ontology code for the anatomical site, this code must match an ICD-0-3 format + +| title | required | type | +|:------------------------------|:-----------|:--------| +| Anatomical Site Ontology Code | False | ICD_0_3 | + + + diff --git a/docs/HDRUK/2.2.0.structure.json b/docs/HDRUK/2.2.0.structure.json new file mode 100644 index 0000000..31c55e5 --- /dev/null +++ b/docs/HDRUK/2.2.0.structure.json @@ -0,0 +1,741 @@ +[ + { + "name": "identifier", + "required": true, + "title": "Dataset identifier", + "description": "System dataset identifier", + "examples": [ + [ + "226fb3f1-4471-400a-8c39-2b66d46a39b6", + "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" + ] + ], + "type": "Uuidv4", + "subItems": [] + }, + { + "name": "version", + "required": true, + "title": "Dataset Version", + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "type": "Semver", + "subItems": [] + }, + { + "name": "revisions", + "required": true, + "title": "Dataset Revisions", + "description": "Revisions of Dataset metadata", + "examples": null, + "type": "Revision", + "subItems": [ + { + "name": "version", + "required": true, + "title": null, + "description": "Semantic Version", + "examples": null, + "type": "Semver", + "subItems": [] + }, + { + "name": "url", + "required": true, + "title": null, + "description": "URL endpoint to obtain the version", + "examples": null, + "type": "Url", + "subItems": [] + } + ] + }, + { + "name": "issued", + "required": true, + "title": "Creation Date", + "description": "Dataset Metadata Creation Date", + "examples": null, + "type": "datetime" + }, + { + "name": "modified", + "required": true, + "title": "Modification Date", + "description": "Dataset Metadata Creation Date", + "examples": null, + "type": "datetime" + }, + { + "name": "summary", + "required": true, + "title": "Summary", + "description": "Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP.", + "examples": null, + "type": "Summary", + "subItems": [ + { + "name": "title", + "required": true, + "title": "Title", + "description": "Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers.", + "examples": [ + [ + "North West London COVID-19 Patient Level Situation Report" + ] + ], + "type": "OneHundredFiftyCharacters", + "subItems": [] + }, + { + "name": "abstract", + "required": true, + "title": "Dataset Abstract", + "description": "Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible", + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], + "type": "AbstractText", + "subItems": [] + }, + { + "name": "publisher", + "required": true, + "title": "Dataset publisher", + "description": "This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", + "examples": null, + "type": "Organisation", + "subItems": [ + { + "name": "identifier", + "required": false, + "title": "Organisation Identifier", + "description": "Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the \u201csuggest and institute\u201d function here: https://www.grid.ac/institutes#", + "examples": null, + "type": "Url", + "subItems": [] + }, + { + "name": "name", + "required": true, + "title": "Organisation Name", + "description": "Name of the organisation", + "examples": null, + "type": "OneHundredFiftyCharacters", + "subItems": [] + }, + { + "name": "logo", + "required": false, + "title": "Organisation Logo", + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "examples": null, + "type": "Url", + "subItems": [] + }, + { + "name": "description", + "required": false, + "title": "Organisation Description", + "description": "Please provide a URL that describes the organisation.", + "examples": null, + "type": "Description", + "subItems": [] + }, + { + "name": "contactPoint", + "required": true, + "title": "Organisation Contact Point", + "description": "Organisation contact point(s)", + "examples": null, + "type": "EmailAddress", + "subItems": [] + }, + { + "name": "memberOf", + "required": false, + "title": "Organisation Membership", + "description": "Please indicate if the organisation is an Alliance Member or a Hub.", + "examples": null, + "type": "MemberOf" + } + ] + }, + { + "name": "contactPoint", + "required": true, + "title": "Contact Point", + "description": "Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose.", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "type": "EmailAddress", + "subItems": [] + }, + { + "name": "keywords", + "required": true, + "title": "Keywords", + "description": "Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "alternateIdentifiers", + "required": false, + "title": "Alternate dataset identifiers", + "description": "Alternate dataset identifiers or local identifiers", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "doiName", + "required": false, + "title": "Digital Object Identifier", + "description": "All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI.", + "examples": [ + "10.3399/bjgp17X692645" + ], + "type": "Doi", + "subItems": [] + } + ] + }, + { + "name": "documentation", + "required": false, + "title": "Documentation", + "description": "Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media.", + "examples": null, + "type": "Documentation", + "subItems": [ + { + "name": "description", + "required": false, + "title": "Description", + "description": "A free-text description of the record.", + "examples": null, + "type": "Description", + "subItems": [] + }, + { + "name": "associatedMedia", + "required": false, + "title": "Associated Media", + "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "examples": [ + "PDF Document that describes study protocol" + ], + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "isPartOf", + "required": false, + "title": "Group", + "description": "Please complete only if the dataset is part of a group or family", + "examples": [ + "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)." + ], + "type": "CommaSeparatedValues", + "subItems": [] + } + ] + }, + { + "name": "coverage", + "required": false, + "title": "Coverage", + "description": "This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data.", + "examples": null, + "type": "Coverage", + "subItems": [ + { + "name": "spatial", + "required": false, + "title": "Geographic Coverage", + "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", + "examples": [ + "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" + ], + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "typicalAgeRange", + "required": false, + "title": "Age Range", + "description": "Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "examples": null, + "type": "AgeRange", + "subItems": [] + }, + { + "name": "physicalSampleAvailability", + "required": false, + "title": "Physical Sample Availability", + "description": "Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide \u201cAVAILABILITY TO BE CONFIRMED\u201d. If samples are not available, then please provide \u201cNOT AVAILABLE\u201d.", + "examples": [ + "BONE MARROW" + ], + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "followup", + "required": false, + "title": "Followup", + "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", + "examples": null, + "type": "Followup" + }, + { + "name": "pathway", + "required": false, + "title": "Pathway", + "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "examples": null, + "type": "Description", + "subItems": [] + } + ] + }, + { + "name": "provenance", + "required": false, + "title": "Provenance", + "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", + "examples": null, + "type": "Provenance", + "subItems": [ + { + "name": "origin", + "required": false, + "title": null, + "description": null, + "examples": null, + "type": "Origin", + "subItems": [ + { + "name": "purpose", + "required": false, + "title": "Purpose", + "description": "Pleases indicate the purpose(s) that the dataset was collected.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "source", + "required": false, + "title": "Source", + "description": "Pleases indicate the source of the data extraction", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "collectionSituation", + "required": false, + "title": "Setting", + "description": "Pleases indicate the setting(s) where data was collected. Multiple settings may be provided", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + } + ] + }, + { + "name": "temporal", + "required": true, + "title": null, + "description": null, + "examples": null, + "type": "Temporal", + "subItems": [ + { + "name": "distributionReleaseDate", + "required": false, + "title": "Release Date", + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "examples": null, + "type": "date" + }, + { + "name": "startDate", + "required": true, + "title": "Start Date", + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "examples": null, + "type": "date" + }, + { + "name": "endDate", + "required": false, + "title": "End Date", + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", + "examples": null, + "type": "date" + }, + { + "name": "timeLag", + "required": true, + "title": "Time Lag", + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "examples": null, + "type": "TimeLag" + }, + { + "name": "publishingFrequency", + "required": true, + "title": "Publishing Frequency", + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "examples": null, + "type": "Periodicity" + } + ] + } + ] + }, + { + "name": "accessibility", + "required": true, + "title": "Accessibility", + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", + "examples": null, + "type": "Accessibility", + "subItems": [ + { + "name": "usage", + "required": false, + "title": "Usage", + "description": "This section includes information about how the data can be used and how it is currently being used", + "examples": null, + "type": "Usage", + "subItems": [ + { + "name": "dataUseLimitation", + "required": false, + "title": "Data Use Limitation", + "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "dataUseRequirements", + "required": false, + "title": "Data Use Requirements", + "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "resourceCreator", + "required": false, + "title": "Citation Requirements", + "description": "Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.", + "examples": null, + "type": "ShortDescription", + "subItems": [] + }, + { + "name": "investigations", + "required": false, + "title": "Investigations", + "description": null, + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "isReferencedBy", + "required": false, + "title": "Citations", + "description": "Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list.", + "examples": null, + "type": "Doi", + "subItems": [] + } + ] + }, + { + "name": "access", + "required": true, + "title": "Access", + "description": "This section includes information about data access", + "examples": null, + "type": "Access", + "subItems": [ + { + "name": "accessRights", + "required": true, + "title": "Access Rights", + "description": null, + "examples": null, + "type": "LongDescription", + "subItems": [] + }, + { + "name": "accessService", + "required": false, + "title": "Access Service", + "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "examples": [ + "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + ], + "type": "LongDescription", + "subItems": [] + }, + { + "name": "accessRequestCost", + "required": false, + "title": "Organisation Access Request Cost", + "description": "Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", + "examples": null, + "type": "LongDescription", + "subItems": [] + }, + { + "name": "deliveryLeadTime", + "required": false, + "title": "Access Request Duration", + "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", + "examples": null, + "type": "DeliveryLeadTime" + }, + { + "name": "jurisdiction", + "required": true, + "title": "Jurisdiction", + "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "dataController", + "required": true, + "title": "Data Controller", + "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", + "examples": null, + "type": "LongDescription", + "subItems": [] + }, + { + "name": "dataProcessor", + "required": false, + "title": "Data Processor", + "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", + "examples": null, + "type": "LongDescription", + "subItems": [] + } + ] + }, + { + "name": "formatAndStandards", + "required": false, + "title": "Format and Standards", + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "examples": null, + "type": "FormatAndStandards", + "subItems": [ + { + "name": "vocabularyEncodingScheme", + "required": true, + "title": "Controlled Vocabulary", + "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "conformsTo", + "required": true, + "title": "Conforms To", + "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "language", + "required": true, + "title": "Language", + "description": "This should list all the languages in which the dataset metadata and underlying data is made available.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "format", + "required": true, + "title": "Format", + "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + } + ] + } + ] + }, + { + "name": "enrichmentAndLinkage", + "required": false, + "title": "Enrichment and Linkage", + "description": "This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", + "examples": null, + "type": "EnrichmentAndLinkage", + "subItems": [ + { + "name": "qualifiedRelation", + "required": false, + "title": "Linked Datasets", + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate \u201cALL\u201d and the onboarding portal will automate linkage across the datasets submitted.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "derivation", + "required": false, + "title": "Derivations", + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset.", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + }, + { + "name": "tools", + "required": false, + "title": "Tools", + "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", + "examples": null, + "type": "CommaSeparatedValues", + "subItems": [] + } + ] + }, + { + "name": "observations", + "required": true, + "title": "Observations", + "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d", + "examples": null, + "type": "Observation", + "subItems": [ + { + "name": "observedNode", + "required": true, + "title": "Statistical Population", + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "type": "StatisticalPopulationConstrained" + }, + { + "name": "measuredValue", + "required": true, + "title": "Measured Value", + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "examples": null, + "type": "int" + }, + { + "name": "disambiguatingDescription", + "required": false, + "title": "Disambiguating Description", + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "examples": null, + "type": "AbstractText", + "subItems": [] + }, + { + "name": "observationDate", + "required": true, + "title": "Observation Date", + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "examples": null, + "type": "date" + }, + { + "name": "measuredProperty", + "required": true, + "title": "Measured Property", + "description": "Initially this will be defaulted to \"COUNT\"", + "examples": null, + "type": "MeasuredProperty", + "subItems": [] + } + ] + }, + { + "name": "structuralMetadata", + "required": false, + "title": "Structural Metadata", + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "examples": null, + "type": "DataClass" + }, + { + "name": "tissuesSampleCollection", + "required": false, + "title": "Tissues Sample Collection", + "description": "Metadata collection for Tissue Samples datasets", + "examples": null, + "type": "TissuesSampleCollection", + "subItems": [ + { + "name": "dataCategories", + "required": false, + "title": "Data Categories", + "description": "Data categories related to the tissue sample collection", + "examples": null, + "type": "TissueDataCategoriesEnum" + }, + { + "name": "materialType", + "required": false, + "title": "Material Type", + "description": "Material type of the tissue sample collection", + "examples": null, + "type": "MaterialTypeCategories" + }, + { + "name": "tissueSampleMetadata", + "required": false, + "title": "Tissue Sample Metadata", + "description": "Metadata related to the tissue sample", + "examples": null, + "type": "TissueSampleMetadata", + "subItems": [ + { + "name": "creationDate", + "required": false, + "title": "Creation Date", + "description": "Date when the tissue sample metadata was created", + "examples": null, + "type": "date" + }, + { + "name": "AnatomicalSiteOntologyCode", + "required": false, + "title": "Anatomical Site Ontology Code", + "description": "Ontology code for the anatomical site, this code must match an ICD-0-3 format", + "examples": null, + "type": "ICD_0_3", + "subItems": [] + } + ] + } + ] + } +] \ No newline at end of file diff --git a/hdr_schemata/models/HDRUK/__init__.py b/hdr_schemata/models/HDRUK/__init__.py index aa7f085..1af2d8d 100644 --- a/hdr_schemata/models/HDRUK/__init__.py +++ b/hdr_schemata/models/HDRUK/__init__.py @@ -1,2 +1,3 @@ from .v2_1_2 import Hdruk212 from .v2_1_3 import Hdruk213 +from .v2_2_0 import Hdruk220 diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index 2f43abf..0d70a02 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -1,6 +1,7 @@ #from hdr_schemata.models.GWDM.v1_0 import Gwdm10 as Model -from hdr_schemata.models.HDRUK import Hdruk212 as Model #from hdr_schemata.models.HDRUK.base import Observation as Model +#from hdr_schemata.models.HDRUK import Hdruk220 as Model +from hdr_schemata.models.GWDM.v1_1 import Gwdm11 as Model from pydantic import BaseModel import pandas as pd import json From c6593628bdc7475cb85fde9a7a00ffe6c6b2bf1d Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 16:26:15 +0000 Subject: [PATCH 07/21] making updates for the 2.2.0 schema --- .github/workflows/ci.yml | 2 +- hdr_schemata/models/GWDM/v1_1/__init__.py | 2 +- hdr_schemata/models/HDRUK/2.2.0/schema.json | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d28e48..9b2624c 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: deploy: runs-on: ubuntu-latest needs: test - #if: success() && github.ref == 'refs/heads/master' + if: success() && github.ref == 'refs/heads/master' steps: - uses: actions/setup-python@v2 with: diff --git a/hdr_schemata/models/GWDM/v1_1/__init__.py b/hdr_schemata/models/GWDM/v1_1/__init__.py index dbca75c..4079031 100644 --- a/hdr_schemata/models/GWDM/v1_1/__init__.py +++ b/hdr_schemata/models/GWDM/v1_1/__init__.py @@ -5,7 +5,7 @@ from .Summary import Summary from .TissuesSampleCollection import TissuesSampleCollection from typing import Optional -from pydantic import Field, BaseModel, constr +from pydantic import Field class Gwdm11(Gwdm10): diff --git a/hdr_schemata/models/HDRUK/2.2.0/schema.json b/hdr_schemata/models/HDRUK/2.2.0/schema.json index 6c2322e..258d1ed 100644 --- a/hdr_schemata/models/HDRUK/2.2.0/schema.json +++ b/hdr_schemata/models/HDRUK/2.2.0/schema.json @@ -1781,7 +1781,7 @@ } ], "default": null, - "description": "Ontology code for the anatomical site", + "description": "Ontology code for the anatomical site, this code must match an ICD-0-3 format", "title": "Anatomical Site Ontology Code" } }, From 7930ff5024ec0ff158770fbaae2d97f532b543a9 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 16:29:46 +0000 Subject: [PATCH 08/21] some more updates --- .github/workflows/ci.yml | 2 +- docs/HDRUK/2.2.0.md | 120 +++++++--- docs/HDRUK/2.2.0.structure.json | 93 +++++--- hdr_schemata/models/GWDM/v1_1/Coverage.py | 168 +++++++------- hdr_schemata/models/HDRUK/2.2.0/schema.json | 222 ++++++++++++++++--- hdr_schemata/models/HDRUK/v2_2_0/__init__.py | 8 + hdr_schemata/utils/create_markdown.py | 4 +- 7 files changed, 445 insertions(+), 172 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 9b2624c..8d28e48 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -49,7 +49,7 @@ jobs: deploy: runs-on: ubuntu-latest needs: test - if: success() && github.ref == 'refs/heads/master' + #if: success() && github.ref == 'refs/heads/master' steps: - uses: actions/setup-python@v2 with: diff --git a/docs/HDRUK/2.2.0.md b/docs/HDRUK/2.2.0.md index 538182d..7efede7 100644 --- a/docs/HDRUK/2.2.0.md +++ b/docs/HDRUK/2.2.0.md @@ -276,7 +276,7 @@ Examples: ## coverage -This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data. +Observational, Spatial and Temporal coverage @@ -285,57 +285,121 @@ This information includes attributes for geographical and temporal coverage, coh ### spatial -The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race. +List of countries where the data was taken from -| title | required | type | -|:--------------------|:-----------|:---------------------| -| Geographic Coverage | False | CommaSeparatedValues | +| title | required | type | +|:--------|:-----------|:---------------------| +| Spatial | False | CommaSeparatedValues | + + + + +### pathway + +Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. + +| title | required | type | +|:--------|:-----------|:----------------| +| Pathway | False | LongDescription | + + + + +### followup + +What is the typical time span that a patient appears in the dataset (follow up period) + +| title | required | type | +|:---------|:-----------|:---------| +| Followup | False | Followup | + -Examples: - * https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html ### typicalAgeRange -Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). +Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). -| title | required | type | -|:----------|:-----------|:---------| -| Age Range | False | AgeRange | +| title | required | type | +|:------------------|:-----------|:---------| +| Typical Age Range | False | AgeRange | -### physicalSampleAvailability +### gender -Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide “AVAILABILITY TO BE CONFIRMED”. If samples are not available, then please provide “NOT AVAILABLE”. +Male, Female, Other -| title | required | type | -|:-----------------------------|:-----------|:---------------------| -| Physical Sample Availability | False | CommaSeparatedValues | +| title | required | type | +|:--------|:-----------|:-----------| +| Gender | False | GenderType | -Examples: - * BONE MARROW -### followup -If known, what is the typical time span that a patient appears in the dataset (follow up period) +### biologicalsamples + +Blood, Saliva, Urine, Other -| title | required | type | -|:---------|:-----------|:---------| -| Followup | False | Followup | +| title | required | type | +|:-------------------|:-----------|:---------------------| +| Biological Samples | False | BiologicalSampleType | -### pathway +### psychological + +Mental health, Cognitive function + +| title | required | type | +|:--------------|:-----------|:------------------| +| Psychological | False | PsychologicalType | + + + + +### physical + +Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive + +| title | required | type | +|:---------|:-----------|:-------------| +| Physical | False | PhysicalType | + + + + +### anthropometric + +Height, Weight, Waist circumference, Hip circumference, Blood pressure + +| title | required | type | +|:---------------|:-----------|:-------------------| +| Anthropometric | False | AnthropometricType | + + + + +### lifestyle + +Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol + +| title | required | type | +|:----------|:-----------|:---------------| +| Lifestyle | False | LifestylesType | + + + + +### socioeconomic -Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. +Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support -| title | required | type | -|:--------|:-----------|:------------| -| Pathway | False | Description | +| title | required | type | +|:---------------|:-----------|:------------------| +| Socio-economic | False | SocioEconomicType | diff --git a/docs/HDRUK/2.2.0.structure.json b/docs/HDRUK/2.2.0.structure.json index 31c55e5..7753296 100644 --- a/docs/HDRUK/2.2.0.structure.json +++ b/docs/HDRUK/2.2.0.structure.json @@ -250,57 +250,100 @@ "name": "coverage", "required": false, "title": "Coverage", - "description": "This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data.", + "description": "Observational, Spatial and Temporal coverage", "examples": null, "type": "Coverage", "subItems": [ { "name": "spatial", "required": false, - "title": "Geographic Coverage", - "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", - "examples": [ - "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" - ], + "title": "Spatial", + "description": "List of countries where the data was taken from", + "examples": null, "type": "CommaSeparatedValues", "subItems": [] }, + { + "name": "pathway", + "required": false, + "title": "Pathway", + "description": "Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "examples": null, + "type": "LongDescription", + "subItems": [] + }, + { + "name": "followup", + "required": false, + "title": "Followup", + "description": "What is the typical time span that a patient appears in the dataset (follow up period)", + "examples": null, + "type": "Followup" + }, { "name": "typicalAgeRange", "required": false, - "title": "Age Range", - "description": "Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "title": "Typical Age Range", + "description": "Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", "examples": null, "type": "AgeRange", "subItems": [] }, { - "name": "physicalSampleAvailability", + "name": "gender", "required": false, - "title": "Physical Sample Availability", - "description": "Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide \u201cAVAILABILITY TO BE CONFIRMED\u201d. If samples are not available, then please provide \u201cNOT AVAILABLE\u201d.", - "examples": [ - "BONE MARROW" - ], - "type": "CommaSeparatedValues", - "subItems": [] + "title": "Gender", + "description": "Male, Female, Other", + "examples": null, + "type": "GenderType" }, { - "name": "followup", + "name": "biologicalsamples", "required": false, - "title": "Followup", - "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", + "title": "Biological Samples", + "description": "Blood, Saliva, Urine, Other", "examples": null, - "type": "Followup" + "type": "BiologicalSampleType" }, { - "name": "pathway", + "name": "psychological", "required": false, - "title": "Pathway", - "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "title": "Psychological", + "description": "Mental health, Cognitive function", "examples": null, - "type": "Description", - "subItems": [] + "type": "PsychologicalType" + }, + { + "name": "physical", + "required": false, + "title": "Physical", + "description": "Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", + "examples": null, + "type": "PhysicalType" + }, + { + "name": "anthropometric", + "required": false, + "title": "Anthropometric", + "description": "Height, Weight, Waist circumference, Hip circumference, Blood pressure", + "examples": null, + "type": "AnthropometricType" + }, + { + "name": "lifestyle", + "required": false, + "title": "Lifestyle", + "description": "Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", + "examples": null, + "type": "LifestylesType" + }, + { + "name": "socioeconomic", + "required": false, + "title": "Socio-economic", + "description": "Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support", + "examples": null, + "type": "SocioEconomicType" } ] }, diff --git a/hdr_schemata/models/GWDM/v1_1/Coverage.py b/hdr_schemata/models/GWDM/v1_1/Coverage.py index f260ada..12d22b3 100644 --- a/hdr_schemata/models/GWDM/v1_1/Coverage.py +++ b/hdr_schemata/models/GWDM/v1_1/Coverage.py @@ -1,130 +1,134 @@ from hdr_schemata.models import remove_fields_from_cls from hdr_schemata.models.GWDM.v1_0 import Coverage as BaseCoverage import re -from typing import Optional,List +from typing import Optional, List from pydantic import Field, RootModel, constr def get_pattern(allowed_phrases): - return r'\b(?:' + '|'.join(allowed_phrases) + r')(?:,(?:' + '|'.join(allowed_phrases) + r'))*\b' + return ( + r"\b(?:" + + "|".join(allowed_phrases) + + r")(?:,(?:" + + "|".join(allowed_phrases) + + r"))*\b" + ) class Anthropometric(RootModel): - root: Optional[constr(pattern=get_pattern( - [ - 'Blood Pressure', - 'Hip Circumference', - 'Height', - 'Waist Circumference', - 'Weight', - ]))] + root: Optional[ + constr( + pattern=get_pattern( + [ + "Blood Pressure", + "Hip Circumference", + "Height", + "Waist Circumference", + "Weight", + ] + ) + ) + ] + class BiologicalSamples(RootModel): - root: Optional[constr(pattern=get_pattern( - [ - 'Blood', - 'Other', - 'Urine', - 'Saliva' - ]))] - + root: Optional[constr(pattern=get_pattern(["Blood", "Other", "Urine", "Saliva"]))] + + class Physical(RootModel): - root: Optional[constr(pattern=get_pattern( - [ - 'Respiratory', - 'Vision', - 'Hearing', - 'Musculoskeletal', - 'Cardiovascular', - 'Reproductive' - ]))] + root: Optional[ + constr( + pattern=get_pattern( + [ + "Respiratory", + "Vision", + "Hearing", + "Musculoskeletal", + "Cardiovascular", + "Reproductive", + ] + ) + ) + ] + class Psychological(RootModel): - root: Optional[constr(pattern=get_pattern( - [ - 'Cognitive Function', - 'Mental Health' - ]))] - + root: Optional[constr(pattern=get_pattern(["Cognitive Function", "Mental Health"]))] + + class Lifestyles(RootModel): - root: Optional[constr(pattern=get_pattern( - [ - 'Smoking', - 'Dietary Habits', - 'Physical Activity', - 'Alcohol' - ]))] + root: Optional[ + constr( + pattern=get_pattern( + ["Smoking", "Dietary Habits", "Physical Activity", "Alcohol"] + ) + ) + ] + class Gender(RootModel): - root: Optional[constr(pattern=get_pattern( - [ - 'Male', - 'Female', - 'Other' - ]))] + root: Optional[constr(pattern=get_pattern(["Male", "Female", "Other"]))] + class SocioEconomic(RootModel): - root: Optional[constr(pattern=get_pattern( - [ - 'Finances', - 'Family Circumstances', - 'Housing', - 'Education', - 'Marital Status', - 'Occupation', - 'Ethnic Group', - 'Social Support' - ]))] - + root: Optional[ + constr( + pattern=get_pattern( + [ + "Finances", + "Family Circumstances", + "Housing", + "Education", + "Marital Status", + "Occupation", + "Ethnic Group", + "Social Support", + ] + ) + ) + ] -class Coverage(BaseCoverage): +class Coverage(BaseCoverage): class Config: - extra = 'forbid' + extra = "forbid" - gender: Optional[Gender] = Field( - None, - title='Gender', - description='Male, Female, Other' + None, title="Gender", description="Male, Female, Other" ) biologicalsamples: Optional[BiologicalSamples] = Field( - None, - title='Biological Samples', - description='Blood, Saliva, Urine, Other' + None, title="Biological Samples", description="Blood, Saliva, Urine, Other" ) psychological: Optional[Psychological] = Field( - None, - title='Psychological', - description='Mental health, Cognitive function' + None, title="Psychological", description="Mental health, Cognitive function" ) physical: Optional[Physical] = Field( None, - title='Physical', - description='Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive' + title="Physical", + description="Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", ) anthropometric: Optional[Anthropometric] = Field( None, - title='Anthropometric', - description='Height, Weight, Waist circumference, Hip circumference, Blood pressure' + title="Anthropometric", + description="Height, Weight, Waist circumference, Hip circumference, Blood pressure", ) - + lifestyle: Optional[Lifestyles] = Field( None, - title='Lifestyle', - description='Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol' + title="Lifestyle", + description="Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", ) - socioeconomic: Optional[SocioEconomic] = Field( None, - title='Socio-economic', - description='Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support' + title="Socio-economic", + description="Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support", ) -#inherited physicalSampleAvailability but this has now been replaced by biologicalsamples -remove_fields_from_cls(Coverage,['physicalSampleAvailability']) + +# inherited physicalSampleAvailability but this has now been replaced by biologicalsamples +remove_fields_from_cls(Coverage, ["physicalSampleAvailability"]) diff --git a/hdr_schemata/models/HDRUK/2.2.0/schema.json b/hdr_schemata/models/HDRUK/2.2.0/schema.json index 258d1ed..0a8d0be 100644 --- a/hdr_schemata/models/HDRUK/2.2.0/schema.json +++ b/hdr_schemata/models/HDRUK/2.2.0/schema.json @@ -190,6 +190,27 @@ ], "title": "AgeRange" }, + "AnthropometricType": { + "enum": [ + "Blood Pressure", + "Hip Circumference", + "Height", + "Waist Circumference", + "Weight" + ], + "title": "AnthropometricType", + "type": "string" + }, + "BiologicalSampleType": { + "enum": [ + "Blood", + "Other", + "Urine", + "Saliva" + ], + "title": "BiologicalSampleType", + "type": "string" + }, "CommaSeparatedValues": { "anyOf": [ { @@ -267,28 +288,41 @@ "$ref": "#/$defs/CommaSeparatedValues" }, { - "items": { - "anyOf": [ - { - "$ref": "#/$defs/Url" - }, - { - "type": "null" - } - ] - }, - "type": "array" + "type": "null" + } + ], + "default": null, + "description": "List of countries where the data was taken from", + "example": "United Kingdom,Wales,England", + "title": "Spatial" + }, + "pathway": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" }, { "type": "null" } ], "default": null, - "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", - "examples": [ - "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" + "description": "Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "example": "The lookup contains references to link data held elsewhere on:\n\u2022 individuals appearing as defendants in criminal cases dealt with by the magistrates' or Crown Court in England and Wales (including Youth Courts). \n\u2022 individuals supervised by the probation service in England and Wales\n\u2022 individuals serving custodial sentences in England & Wales who appear within records from the prison data source, p-NOMIS. Young Offenders are included if resident at prisons or Young Offender Institutes (YOIs) that use p-NOMIS, however, this excludes the majority of Secure Schools and Secure Training Centres. \"\n\n\"The linking dataset includes a person ID and link to record in other data first datasets for: \n\u2022 Disposals in the magistrates\u2019 court from 1 January 2011 to 31 December 2020\n\u2022 Disposals in the Crown Court from 1 January 2013 to 31 December 2020\n\u2022 Custodial sentences of offenders in custody from January 2011 to September 2021 (including sentences begun before 2011) \n\u2022 Offender probation records from January 2014 to December 2020.", + "title": "Pathway" + }, + "followup": { + "anyOf": [ + { + "$ref": "#/$defs/Followup" + }, + { + "type": "null" + } ], - "title": "Geographic Coverage" + "default": null, + "description": "What is the typical time span that a patient appears in the dataset (follow up period)", + "example": "CONTINUOUS", + "title": "Followup" }, "typicalAgeRange": { "anyOf": [ @@ -300,16 +334,32 @@ } ], "default": null, - "description": "Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", - "title": "Age Range" + "description": "Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "example": "1-150", + "title": "Typical Age Range" }, - "physicalSampleAvailability": { + "gender": { "anyOf": [ { - "$ref": "#/$defs/CommaSeparatedValues" + "items": { + "$ref": "#/$defs/GenderType" + }, + "type": "array" }, { - "items": {}, + "type": "null" + } + ], + "default": null, + "description": "Male, Female, Other", + "title": "Gender" + }, + "biologicalsamples": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/BiologicalSampleType" + }, "type": "array" }, { @@ -317,37 +367,88 @@ } ], "default": null, - "description": "Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide \u201cAVAILABILITY TO BE CONFIRMED\u201d. If samples are not available, then please provide \u201cNOT AVAILABLE\u201d.", - "examples": [ - "BONE MARROW" + "description": "Blood, Saliva, Urine, Other", + "title": "Biological Samples" + }, + "psychological": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/PsychologicalType" + }, + "type": "array" + }, + { + "type": "null" + } ], - "title": "Physical Sample Availability" + "default": null, + "description": "Mental health, Cognitive function", + "title": "Psychological" }, - "followup": { + "physical": { "anyOf": [ { - "$ref": "#/$defs/Followup" + "items": { + "$ref": "#/$defs/PhysicalType" + }, + "type": "array" }, { "type": "null" } ], - "default": "UNKNOWN", - "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", - "title": "Followup" + "default": null, + "description": "Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", + "title": "Physical" }, - "pathway": { + "anthropometric": { "anyOf": [ { - "$ref": "#/$defs/Description" + "items": { + "$ref": "#/$defs/AnthropometricType" + }, + "type": "array" }, { "type": "null" } ], "default": null, - "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", - "title": "Pathway" + "description": "Height, Weight, Waist circumference, Hip circumference, Blood pressure", + "title": "Anthropometric" + }, + "lifestyle": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/LifestylesType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", + "title": "Lifestyle" + }, + "socioeconomic": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/SocioEconomicType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support", + "title": "Socio-economic" } }, "title": "Coverage", @@ -818,6 +919,15 @@ "title": "FormatAndStandards", "type": "object" }, + "GenderType": { + "enum": [ + "Male", + "Female", + "Other" + ], + "title": "GenderType", + "type": "string" + }, "ICD_0_3": { "anyOf": [ { @@ -1041,6 +1151,16 @@ "title": "LanguageEnum", "type": "string" }, + "LifestylesType": { + "enum": [ + "Smoking", + "Dietary Habits", + "Physical Activity", + "Alcohol" + ], + "title": "LifestylesType", + "type": "string" + }, "LongDescription": { "anyOf": [ { @@ -1342,6 +1462,18 @@ ], "title": "Periodicity" }, + "PhysicalType": { + "enum": [ + "Respiratory", + "Vision", + "Hearing", + "Musculoskeletal", + "Cardiovascular", + "Reproductive" + ], + "title": "PhysicalType", + "type": "string" + }, "Provenance": { "additionalProperties": false, "properties": { @@ -1366,6 +1498,14 @@ "title": "Provenance", "type": "object" }, + "PsychologicalType": { + "enum": [ + "Cognitive Function", + "Mental Health" + ], + "title": "PsychologicalType", + "type": "string" + }, "Purpose": { "enum": [ "STUDY", @@ -1449,6 +1589,20 @@ ], "title": "ShortDescription" }, + "SocioEconomicType": { + "enum": [ + "Finances", + "Family Circumstances", + "Housing", + "Education", + "Marital Status", + "Occupation", + "Ethnic Group", + "Social Support" + ], + "title": "SocioEconomicType", + "type": "string" + }, "Source": { "enum": [ "EPR", @@ -2071,7 +2225,7 @@ } ], "default": null, - "description": "This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data.", + "description": "Observational, Spatial and Temporal coverage", "title": "Coverage" }, "provenance": { diff --git a/hdr_schemata/models/HDRUK/v2_2_0/__init__.py b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py index 87271ef..d514b3e 100644 --- a/hdr_schemata/models/HDRUK/v2_2_0/__init__.py +++ b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py @@ -4,6 +4,7 @@ from pydantic import Field from .TissuesSampleCollection import TissuesSampleCollection +from .Coverage import Coverage class Hdruk220(Hdruk213): @@ -13,6 +14,13 @@ class Hdruk220(Hdruk213): title="Tissues Sample Collection", ) + # overload Coverage with an updated version of it.. + coverage: Optional[Coverage] = Field( + None, + description="Observational, Spatial and Temporal coverage", + title="Coverage", + ) + @classmethod def save_schema(cls, location="./2.2.0/schema.json"): with open(location, "w") as f: diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index 0d70a02..e364485 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -1,7 +1,7 @@ #from hdr_schemata.models.GWDM.v1_0 import Gwdm10 as Model #from hdr_schemata.models.HDRUK.base import Observation as Model -#from hdr_schemata.models.HDRUK import Hdruk220 as Model -from hdr_schemata.models.GWDM.v1_1 import Gwdm11 as Model +from hdr_schemata.models.HDRUK import Hdruk220 as Model +#from hdr_schemata.models.GWDM.v1_1 import Gwdm11 as Model from pydantic import BaseModel import pandas as pd import json From ba95f1af749580103b70b4136e444e34049a7412 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 16:36:10 +0000 Subject: [PATCH 09/21] adding in some datasetypes --- hdr_schemata/models/HDRUK/2.2.0/schema.json | 64 +++++++++++++++++++- hdr_schemata/models/HDRUK/v2_2_0/__init__.py | 16 +++-- 2 files changed, 75 insertions(+), 5 deletions(-) diff --git a/hdr_schemata/models/HDRUK/2.2.0/schema.json b/hdr_schemata/models/HDRUK/2.2.0/schema.json index 0a8d0be..39cd77f 100644 --- a/hdr_schemata/models/HDRUK/2.2.0/schema.json +++ b/hdr_schemata/models/HDRUK/2.2.0/schema.json @@ -585,6 +585,19 @@ ], "title": "DataUseRequirements" }, + "DatasetType": { + "anyOf": [ + { + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "DatasetType" + }, "DeliveryLeadTime": { "enum": [ "LESS 1 WEEK", @@ -1777,6 +1790,52 @@ "10.3399/bjgp17X692645" ], "title": "Digital Object Identifier" + }, + "datasetType": { + "anyOf": [ + { + "$ref": "#/$defs/DatasetType" + }, + { + "type": "null" + } + ], + "description": "Placeholder for dataset type", + "examples": [ + [ + "" + ] + ], + "title": "Datasetype" + }, + "datasetSubType": { + "anyOf": [ + { + "$ref": "#/$defs/DatasetType" + }, + { + "type": "null" + } + ], + "description": "Placeholder for dataset sub-type", + "examples": [ + [ + "" + ] + ], + "title": "Datasetype" + }, + "populationSize": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Summary population size of the cohort", + "title": "Population size" } }, "required": [ @@ -1784,7 +1843,10 @@ "abstract", "publisher", "contactPoint", - "keywords" + "keywords", + "datasetType", + "datasetSubType", + "populationSize" ], "title": "Summary", "type": "object" diff --git a/hdr_schemata/models/HDRUK/v2_2_0/__init__.py b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py index d514b3e..1f2ad0a 100644 --- a/hdr_schemata/models/HDRUK/v2_2_0/__init__.py +++ b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py @@ -4,14 +4,16 @@ from pydantic import Field from .TissuesSampleCollection import TissuesSampleCollection +from .Summary import Summary from .Coverage import Coverage class Hdruk220(Hdruk213): - tissuesSampleCollection: Optional[TissuesSampleCollection] = Field( - None, - description="Metadata collection for Tissue Samples datasets", - title="Tissues Sample Collection", + # update on summary to include datasetType and population size + summary: Summary = Field( + ..., + description="Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP.", + title="Summary", ) # overload Coverage with an updated version of it.. @@ -21,6 +23,12 @@ class Hdruk220(Hdruk213): title="Coverage", ) + tissuesSampleCollection: Optional[TissuesSampleCollection] = Field( + None, + description="Metadata collection for Tissue Samples datasets", + title="Tissues Sample Collection", + ) + @classmethod def save_schema(cls, location="./2.2.0/schema.json"): with open(location, "w") as f: From 9ddd0f51977669e7b941f1eeed3ab0445c2bfb72 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 16:36:35 +0000 Subject: [PATCH 10/21] update the docs again --- docs/HDRUK/2.2.0.md | 35 +++++++++++++++++++++++++++++++++ docs/HDRUK/2.2.0.structure.json | 34 ++++++++++++++++++++++++++++++++ 2 files changed, 69 insertions(+) diff --git a/docs/HDRUK/2.2.0.md b/docs/HDRUK/2.2.0.md index 7efede7..8f1b7b2 100644 --- a/docs/HDRUK/2.2.0.md +++ b/docs/HDRUK/2.2.0.md @@ -230,6 +230,41 @@ Examples: * 10.3399/bjgp17X692645 +### datasetType + +Placeholder for dataset type + +| title | required | type | +|:-----------|:-----------|:------------| +| Datasetype | True | DatasetType | + +Examples: + * [''] + + +### datasetSubType + +Placeholder for dataset sub-type + +| title | required | type | +|:-----------|:-----------|:------------| +| Datasetype | True | DatasetType | + +Examples: + * [''] + + +### populationSize + +Summary population size of the cohort + +| title | required | type | +|:----------------|:-----------|:-------| +| Population size | True | int | + + + + ## documentation Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media. diff --git a/docs/HDRUK/2.2.0.structure.json b/docs/HDRUK/2.2.0.structure.json index 7753296..2f8a7d1 100644 --- a/docs/HDRUK/2.2.0.structure.json +++ b/docs/HDRUK/2.2.0.structure.json @@ -202,6 +202,40 @@ ], "type": "Doi", "subItems": [] + }, + { + "name": "datasetType", + "required": true, + "title": "Datasetype", + "description": "Placeholder for dataset type", + "examples": [ + [ + "" + ] + ], + "type": "DatasetType", + "subItems": [] + }, + { + "name": "datasetSubType", + "required": true, + "title": "Datasetype", + "description": "Placeholder for dataset sub-type", + "examples": [ + [ + "" + ] + ], + "type": "DatasetType", + "subItems": [] + }, + { + "name": "populationSize", + "required": true, + "title": "Population size", + "description": "Summary population size of the cohort", + "examples": null, + "type": "int" } ] }, From 8310075da904a2d35f3bf797e4ecf4f87d599a96 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 16:37:32 +0000 Subject: [PATCH 11/21] forgot to add files --- .../definitions/HDRUK/BiologicalSamples.py | 54 +++++++++++++++++++ hdr_schemata/models/HDRUK/v2_2_0/Coverage.py | 50 +++++++++++++++++ hdr_schemata/models/HDRUK/v2_2_0/Summary.py | 26 +++++++++ 3 files changed, 130 insertions(+) create mode 100644 hdr_schemata/definitions/HDRUK/BiologicalSamples.py create mode 100644 hdr_schemata/models/HDRUK/v2_2_0/Coverage.py create mode 100644 hdr_schemata/models/HDRUK/v2_2_0/Summary.py diff --git a/hdr_schemata/definitions/HDRUK/BiologicalSamples.py b/hdr_schemata/definitions/HDRUK/BiologicalSamples.py new file mode 100644 index 0000000..d00f244 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/BiologicalSamples.py @@ -0,0 +1,54 @@ +from enum import Enum + + +class AnthropometricType(Enum): + BLOOD_PRESSURE = "Blood Pressure" + HIP_CIRCUMFERENCE = "Hip Circumference" + HEIGHT = "Height" + WAIST_CIRCUMFERENCE = "Waist Circumference" + WEIGHT = "Weight" + + +class BiologicalSampleType(Enum): + BLOOD = "Blood" + OTHER = "Other" + URINE = "Urine" + SALIVA = "Saliva" + + +class PhysicalType(Enum): + RESPIRATORY = "Respiratory" + VISION = "Vision" + HEARING = "Hearing" + MUSCULOSKELETAL = "Musculoskeletal" + CARDIOVASCULAR = "Cardiovascular" + REPRODUCTIVE = "Reproductive" + + +class PsychologicalType(Enum): + COGNITIVE_FUNCTION = "Cognitive Function" + MENTAL_HEALTH = "Mental Health" + + +class LifestylesType(Enum): + SMOKING = "Smoking" + DIETARY_HABITS = "Dietary Habits" + PHYSICAL_ACTIVITY = "Physical Activity" + ALCOHOL = "Alcohol" + + +class GenderType(Enum): + MALE = "Male" + FEMALE = "Female" + OTHER = "Other" + + +class SocioEconomicType(Enum): + FINANCES = "Finances" + FAMILY_CIRCUMSTANCES = "Family Circumstances" + HOUSING = "Housing" + EDUCATION = "Education" + MARITAL_STATUS = "Marital Status" + OCCUPATION = "Occupation" + ETHNIC_GROUP = "Ethnic Group" + SOCIAL_SUPPORT = "Social Support" diff --git a/hdr_schemata/models/HDRUK/v2_2_0/Coverage.py b/hdr_schemata/models/HDRUK/v2_2_0/Coverage.py new file mode 100644 index 0000000..498b197 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_2_0/Coverage.py @@ -0,0 +1,50 @@ +from hdr_schemata.models import remove_fields_from_cls +from hdr_schemata.models.GWDM.v1_0 import Coverage as BaseCoverage +from hdr_schemata.definitions.HDRUK.BiologicalSamples import * +from typing import Optional, List +from pydantic import Field + + +class Coverage(BaseCoverage): + class Config: + extra = "forbid" + + gender: Optional[List[GenderType]] = Field( + None, title="Gender", description="Male, Female, Other" + ) + + biologicalsamples: Optional[List[BiologicalSampleType]] = Field( + None, title="Biological Samples", description="Blood, Saliva, Urine, Other" + ) + + psychological: Optional[List[PsychologicalType]] = Field( + None, title="Psychological", description="Mental health, Cognitive function" + ) + + physical: Optional[List[PhysicalType]] = Field( + None, + title="Physical", + description="Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", + ) + + anthropometric: Optional[List[AnthropometricType]] = Field( + None, + title="Anthropometric", + description="Height, Weight, Waist circumference, Hip circumference, Blood pressure", + ) + + lifestyle: Optional[List[LifestylesType]] = Field( + None, + title="Lifestyle", + description="Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", + ) + + socioeconomic: Optional[List[SocioEconomicType]] = Field( + None, + title="Socio-economic", + description="Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support", + ) + + +# inherited physicalSampleAvailability but this has now been replaced by biologicalsamples +remove_fields_from_cls(Coverage, ["physicalSampleAvailability"]) diff --git a/hdr_schemata/models/HDRUK/v2_2_0/Summary.py b/hdr_schemata/models/HDRUK/v2_2_0/Summary.py new file mode 100644 index 0000000..6ede1f1 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_2_0/Summary.py @@ -0,0 +1,26 @@ +from typing import Optional +from pydantic import Field +from hdr_schemata.models.HDRUK.v2_1_2.Summary import Summary as BaseSummary +from hdr_schemata.definitions.HDRUK import DatasetType + + +class Summary(BaseSummary): + datasetType: Optional[DatasetType] = Field( + ..., + description="Placeholder for dataset type", + examples=[[""]], + title="Datasetype", + ) + + datasetSubType: Optional[DatasetType] = Field( + ..., + description="Placeholder for dataset sub-type", + examples=[[""]], + title="Datasetype", + ) + + populationSize: Optional[int] = Field( + ..., + description="Summary population size of the cohort", + title="Population size", + ) From 3ca25a04c23c232800593046dbf6e4aabb253778 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 16:38:28 +0000 Subject: [PATCH 12/21] add schema to the mkdocs --- mkdocs.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/mkdocs.yml b/mkdocs.yml index 09c3f26..5d65a4b 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -18,6 +18,7 @@ nav: - Version 1.1: GWDM/1.1.md - HDRUK Schema: - Version 2.1.2: HDRUK/2.1.2.md + - Version 2.2.0: HDRUK/2.2.0.md plugins: - search From 2121c7d7e9b48af0fdc7efd965b97351bafcbd62 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Wed, 31 Jan 2024 16:46:18 +0000 Subject: [PATCH 13/21] update the 2.2.0 docs --- docs/HDRUK/2.2.0.md | 12 ++++++++++++ hdr_schemata/utils/create_markdown.py | 5 +++-- 2 files changed, 15 insertions(+), 2 deletions(-) diff --git a/docs/HDRUK/2.2.0.md b/docs/HDRUK/2.2.0.md index 8f1b7b2..9e797ab 100644 --- a/docs/HDRUK/2.2.0.md +++ b/docs/HDRUK/2.2.0.md @@ -8,6 +8,7 @@ System dataset identifier | Dataset identifier | True | Uuidv4 | Examples: + * ['226fb3f1-4471-400a-8c39-2b66d46a39b6', 'https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6'] @@ -20,6 +21,7 @@ Dataset metadata version | Dataset Version | True | Semver | Examples: + * 1.1.0 @@ -94,6 +96,7 @@ Title of the dataset limited to 150 characters. It should provide a short descri | Title | True | OneHundredFiftyCharacters | Examples: + * ['North West London COVID-19 Patient Level Situation Report'] @@ -106,6 +109,7 @@ Provide a clear and brief descriptive signpost for researchers who are searching | Dataset Abstract | True | AbstractText | Examples: + * CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web® including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice. @@ -193,6 +197,7 @@ Please provide a valid email address that can be used to coordinate data access | Contact Point | True | EmailAddress | Examples: + * SAILDatabank@swansea.ac.uk @@ -227,6 +232,7 @@ All HDR UK registered datasets should either have a Digital Object Identifier (D | Digital Object Identifier | False | Doi | Examples: + * 10.3399/bjgp17X692645 @@ -239,6 +245,7 @@ Placeholder for dataset type | Datasetype | True | DatasetType | Examples: + * [''] @@ -251,6 +258,7 @@ Placeholder for dataset sub-type | Datasetype | True | DatasetType | Examples: + * [''] @@ -294,6 +302,7 @@ Please provide any media associated with the Gateway Organisation using a valid | Associated Media | False | CommaSeparatedValues | Examples: + * PDF Document that describes study protocol @@ -306,6 +315,7 @@ Please complete only if the dataset is part of a group or family | Group | False | CommaSeparatedValues | Examples: + * Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS). @@ -656,6 +666,7 @@ Please provide a brief description of the data access services that are availabl | Access Service | False | LongDescription | Examples: + * https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide @@ -827,6 +838,7 @@ Please select one of the following statistical populations for you observation | Statistical Population | True | StatisticalPopulationConstrained | Examples: + * PERSONS diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index e364485..70387a9 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -54,8 +54,8 @@ def json_to_markdown(structure,level=2): description = field.pop('description') examples = field.pop('examples') if examples: - examples = "\n".join([' * '+str(x) for x in examples]) - examples = "Examples: \n" + examples + examples = "\n".join([' * '+str(x) for x in examples]) + examples = "Examples: \n\n " + examples else: examples = "" @@ -75,6 +75,7 @@ def json_to_markdown(structure,level=2): {examples} ''' + if subItems: md += json_to_markdown(subItems,level=level+1) From fe534c2bbcdfead4a52accc40346743fbdf24a79 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Fri, 2 Feb 2024 09:24:18 +0000 Subject: [PATCH 14/21] small fix --- hdr_schemata/models/GWDM/v1_0/Temporal.py | 33 +++-- hdr_schemata/utils/create_markdown.py | 156 +++++++++++++++------- hdr_schemata/utils/print_structure.py | 9 +- 3 files changed, 130 insertions(+), 68 deletions(-) diff --git a/hdr_schemata/models/GWDM/v1_0/Temporal.py b/hdr_schemata/models/GWDM/v1_0/Temporal.py index d5f9b77..d593b4e 100644 --- a/hdr_schemata/models/GWDM/v1_0/Temporal.py +++ b/hdr_schemata/models/GWDM/v1_0/Temporal.py @@ -1,4 +1,4 @@ -from datetime import date,datetime +from datetime import date, datetime from typing import Optional, List, Union from pydantic import BaseModel, Field from hdr_schemata.definitions.HDRUK import * @@ -6,37 +6,36 @@ class Temporal(BaseModel): class Config: - extra = 'forbid' + extra = "forbid" - startDate: Optional[Union[date, datetime]] = Field( ..., - description='The start of the time period that the dataset provides coverage for', - example='12/03/2020', - title='Start Date', + description="The start of the time period that the dataset provides coverage for", + example="12/03/2020", + title="Start Date", ) endDate: Optional[Union[date, datetime]] = Field( None, - description='The end of the time period that the dataset provides coverage for', - example='12/03/2020', - title='End Date', + description="The end of the time period that the dataset provides coverage for", + example="12/03/2020", + title="End Date", ) timeLag: TimeLag = Field( ..., - description='Rypical time-lag between an event and the data for that event appearing in the dataset', + description="Typical time-lag between an event and the data for that event appearing in the dataset", example="LESS 1 WEEK", - title='Time Lag', + title="Time Lag", ) - + accrualPeriodicity: Periodicity = Field( ..., - description='frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.', + description="frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.", example="MONTHLY", - title='Periodicity', + title="Periodicity", ) - + distributionReleaseDate: Optional[Union[date, datetime]] = Field( None, - description='Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity.', - title='Release Date', + description="Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity.", + title="Release Date", ) diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index 70387a9..e36b303 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -1,71 +1,135 @@ -#from hdr_schemata.models.GWDM.v1_0 import Gwdm10 as Model -#from hdr_schemata.models.HDRUK.base import Observation as Model -from hdr_schemata.models.HDRUK import Hdruk220 as Model -#from hdr_schemata.models.GWDM.v1_1 import Gwdm11 as Model -from pydantic import BaseModel +# from hdr_schemata.models.GWDM.v1_0 import Gwdm10 as Model +# from hdr_schemata.models.HDRUK.base import Observation as Model +# from hdr_schemata.models.HDRUK import Hdruk220 as Model +from hdr_schemata.models.GWDM.v1_1 import Gwdm11 as Model + +from pydantic._internal._model_construction import ModelMetaclass +from pydantic import BaseModel, RootModel import pandas as pd import json import typing import enum -def get_fields(structure,model: type[BaseModel]): +from hdr_schemata.models.HDRUK.v2_1_2.Observations import Observation + +_type1 = typing.List[Observation] +_type2 = typing.Optional[Observation] +_type3 = typing.Union[Observation, str] + + +def extract_type_info(type_hint): + is_list = False + is_optional = False + inner_types = None + if getattr(type_hint, "__origin__", None) is list: + is_list = True + inner_types = type_hint.__args__ + elif getattr(type_hint, "__origin__", None) is typing.Union: + inner_types = type_hint.__args__ + inner_types_not_none = [ + _type for _type in inner_types if not _type is type(None) + ] + is_optional = len(inner_types_not_none) < len(inner_types) + + inner_types = inner_types_not_none + if is_optional: + inner_types += ["null"] + + inner_type = inner_types[0] + is_list = getattr(inner_type, "__origin__", None) is list + if hasattr(inner_type, "__args__"): + inner_types = inner_type.__args__ + + else: + inner_types = [type_hint] + + type_names = [] + for _type in inner_types: + type_name = getattr(_type, "__name__", str(_type)) + + try: + if _type and issubclass(_type, RootModel): + info = _type.model_json_schema() + title = info.pop("title") + type_name += "[" + json.dumps(info).replace('"', "'") + "]" + # type_name = {title: info} + except TypeError: + ... + + if type(_type) == enum.EnumMeta: + type_name += ( + "[" + + ",".join( + [ + "'" + member.value + "'" if member.value else "null" + for member in _type + ] + ) + + "]" + ) + + type_names.append(type_name) + + return is_list, is_optional, type_names + + +def get_fields(structure, model: type[BaseModel]): model_hints = typing.get_type_hints(model) for name, field in model.model_fields.items(): - if name == 'root': + if name == "root": continue + # if name != "structuralMetadata": + # continue t = field.annotation _type = model_hints[name] - if isinstance(model_hints[name],type): - _type = model_hints[name].__name__ - else: - _type = model_hints[name].__args__[0] - if not isinstance(_type,type): - _type = _type.__args__[0] - _type = _type.__name__ + is_list, is_optional, type_names = extract_type_info(_type) value = { - 'name':name, - 'required':field.is_required(), - 'title':field.title, - 'description':field.description, - 'title':field.title, - 'examples':field.examples, - 'type':_type + "name": name, + "required": field.is_required(), + "title": field.title, + "description": field.description, + "title": field.title, + "examples": field.examples, + "type": type_names, + "is_list": is_list, + "is_optional": is_optional, } - if hasattr(t,'__args__'): + if hasattr(t, "__args__"): t = t.__args__[0] - + if isinstance(t, type) and issubclass(t, BaseModel): subItems = [] - get_fields(subItems,t) - value['subItems'] = subItems + get_fields(subItems, t) + value["subItems"] = subItems structure.append(value) -def json_to_markdown(structure,level=2): + +def json_to_markdown(structure, level=2): md = "" for field in structure: - name = field.pop('name') - subItems = field.pop('subItems',None) - description = field.pop('description') - examples = field.pop('examples') + name = field.pop("name") + subItems = field.pop("subItems", None) + description = field.pop("description") + examples = field.pop("examples") if examples: - examples = "\n".join([' * '+str(x) for x in examples]) + examples = "\n".join([" * " + str(x) for x in examples]) examples = "Examples: \n\n " + examples else: examples = "" table = "" if not subItems: - table = pd.Series(field).sort_index().to_frame().T.set_index('title') + table = pd.Series(field).sort_index().to_frame().T.set_index("title") table = table.to_markdown() - - heading = "#"*level - md += rf''' + + heading = "#" * level + md += rf""" {heading} {name} {description} @@ -74,25 +138,23 @@ def json_to_markdown(structure,level=2): {examples} -''' +""" if subItems: - md += json_to_markdown(subItems,level=level+1) + md += json_to_markdown(subItems, level=level + 1) - return md + structure = [] -get_fields(structure,Model) -#get_fields(structure,Hdruk212) +get_fields(structure, Model) +# get_fields(structure,Hdruk212) -with open('temp.json','w') as f: - json.dump(structure,f,indent=6) +with open("temp.json", "w") as f: + print(json.dumps(structure, indent=6)) + json.dump(structure, f, indent=6) md = json_to_markdown(structure) - -with open('temp.md','w') as f: - f.write(md) - - +with open("temp.md", "w") as f: + f.write(md) diff --git a/hdr_schemata/utils/print_structure.py b/hdr_schemata/utils/print_structure.py index f6985e4..04cf59c 100644 --- a/hdr_schemata/utils/print_structure.py +++ b/hdr_schemata/utils/print_structure.py @@ -2,13 +2,14 @@ import json structure = {} -for schema in glob.glob('hdr_schemata/models/**/schema.json',recursive=True): - items = schema.split('/') - if len(items) != 5: continue +for schema in glob.glob("hdr_schemata/models/**/schema.json", recursive=True): + items = schema.split("/") + if len(items) != 5: + continue model = items[2] version = items[3] if model not in structure: structure[model] = [] structure[model].append(version) -json.dump(structure,open('available.json','w'),indent=6) +json.dump(structure, open("available.json", "w"), indent=6) From 2cda6adf98471261f475bdd3c64ff40c384a85b9 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Fri, 2 Feb 2024 11:18:43 +0000 Subject: [PATCH 15/21] add a script to create an example and add enums for collection type --- .../HDRUK/TissueCollectionTypeEnum.py | 14 ++++++++++++++ .../HDRUK/v2_2_0/TissuesSampleCollection.py | 11 +++++++++-- hdr_schemata/utils/create_example.py | 19 +++++++++++++++++++ 3 files changed, 42 insertions(+), 2 deletions(-) create mode 100644 hdr_schemata/definitions/HDRUK/TissueCollectionTypeEnum.py create mode 100644 hdr_schemata/utils/create_example.py diff --git a/hdr_schemata/definitions/HDRUK/TissueCollectionTypeEnum.py b/hdr_schemata/definitions/HDRUK/TissueCollectionTypeEnum.py new file mode 100644 index 0000000..3af6202 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/TissueCollectionTypeEnum.py @@ -0,0 +1,14 @@ +from enum import Enum + + +class TissueCollectionTypeEnum(Enum): + CASE_CONTROL = "Case-control" + COHORT = "Cohort" + CROSS_SECTIONAL = "Cross-sectional" + LONGITUDINAL = "Longitudinal" + TWIN_STUDY = "Twin-study" + QUALITY_CONTROL = "Quality control" + POPULATION_BASED = "Population-based" + DISEASE_SPECIFIC = "Disease specific" + BIRTH_COHORT = "Birth cohort" + OTHER = "Other" diff --git a/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py b/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py index 95b6e93..ebda98b 100644 --- a/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py +++ b/hdr_schemata/models/HDRUK/v2_2_0/TissuesSampleCollection.py @@ -3,6 +3,7 @@ from .TissueSampleMetadata import TissueSampleMetadata from hdr_schemata.definitions.HDRUK import ( TissueDataCategoriesEnum, + TissueCollectionTypeEnum, MaterialTypeCategories, ) @@ -11,13 +12,13 @@ class TissuesSampleCollection(BaseModel): dataCategories: Optional[List[TissueDataCategoriesEnum]] = Field( None, title="Data Categories", - description="Data categories related to the tissue sample collection", + description="The type of data that is associated with the samples in the study. Can be several values MIABIS-2.0-13", ) materialType: Optional[List[MaterialTypeCategories]] = Field( None, title="Material Type", - description="Material type of the tissue sample collection", + description="The biospecimen saved from a biological entity for propagation e.g. testing, diagnostics, treatment or research purposes. Can be several values MIABIS-2.0-14", ) tissueSampleMetadata: Optional[TissueSampleMetadata] = Field( @@ -25,3 +26,9 @@ class TissuesSampleCollection(BaseModel): title="Tissue Sample Metadata", description="Metadata related to the tissue sample", ) + + collectionType: Optional[TissueCollectionTypeEnum] = Field( + None, + title="Collection Type", + description="The type of the sample collection. Can be several values [MIABIS-2.0-16](https://github.com/BBMRI-ERIC/miabis/blob/master/Structured-data-and-lists.md#collection-type)", + ) diff --git a/hdr_schemata/utils/create_example.py b/hdr_schemata/utils/create_example.py new file mode 100644 index 0000000..1e4e9ac --- /dev/null +++ b/hdr_schemata/utils/create_example.py @@ -0,0 +1,19 @@ +import json + +data = json.load(open("temp.json")) + + +def get_subItems(item): + return ( + {subItem["name"]: get_subItems(subItem) for subItem in item["subItems"]} + if item.get("subItems") + else " | ".join(item["type"]) + ) + + +example = {item["name"]: get_subItems(item) for item in data} +print(json.dumps(example, indent=6)) +with open("temp2.json", "w") as f: + json.dump(example, f, indent=6) + +# print(json.dumps(data, indent=6)) From 3acab944f1a7db7fd147d1b1b4ffb2af5cb58404 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Fri, 2 Feb 2024 14:13:19 +0000 Subject: [PATCH 16/21] updating --- hdr_schemata/definitions/HDRUK/__init__.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hdr_schemata/definitions/HDRUK/__init__.py b/hdr_schemata/definitions/HDRUK/__init__.py index 25cba41..34fed8d 100644 --- a/hdr_schemata/definitions/HDRUK/__init__.py +++ b/hdr_schemata/definitions/HDRUK/__init__.py @@ -38,6 +38,7 @@ from .StandardisedDataModelsEnum import StandardisedDataModelsEnum from .StatisticalPopulationConstrained import StatisticalPopulationConstrained from .TimeLag import TimeLag +from .TissueCollectionTypeEnum import TissueCollectionTypeEnum from .TissueDataCategoriesEnum import TissueDataCategoriesEnum from .TwoHundredFiftyFiveCharacters import TwoHundredFiftyFiveCharacters from .Url import Url From 1acc347405a1d967e42cc6d14601b8388b15cd65 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 5 Feb 2024 09:12:04 +0000 Subject: [PATCH 17/21] automatically update the docs --- .github/workflows/ci.yml | 117 ++- docs/GWDM/1.0.md | 548 +++++++++---- docs/GWDM/1.0.structure.json | 477 +++++++++-- docs/GWDM/1.1.md | 537 ++++++------- docs/GWDM/1.1.structure.json | 737 ++++++++++++++--- docs/HDRUK/2.1.2.md | 366 +++++---- docs/HDRUK/2.1.2.structure.json | 575 ++++++++++--- docs/HDRUK/2.1.3.md | 799 +++++++++++++++++++ docs/HDRUK/2.1.3.structure.json | 1064 +++++++++++++++++++++++++ docs/HDRUK/2.2.0.md | 447 ++++++----- docs/HDRUK/2.2.0.structure.json | 634 ++++++++++++--- hdr_schemata/utils/create_markdown.py | 45 +- 12 files changed, 5037 insertions(+), 1309 deletions(-) create mode 100644 docs/HDRUK/2.1.3.md create mode 100644 docs/HDRUK/2.1.3.structure.json diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 8d28e48..1c8a838 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -3,62 +3,61 @@ name: Pytest on: [push] jobs: - check_json: - runs-on: ubuntu-latest - - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Check JSON - run: | - file_path="available.json" - if [ -f "$file_path" ]; then - if jq . "$file_path" >/dev/null 2>&1; then - echo "JSON file is valid." - else - echo "JSON file is not valid." - exit 1 - fi - else - echo "JSON file does not exist." - exit 1 - fi - - test: - runs-on: ubuntu-latest - steps: - - name: Checkout code - uses: actions/checkout@v2 - - - name: Set up Python - uses: actions/setup-python@v2 - with: - python-version: 3.9 - - - name: Install schema module - run: | - python -m pip install --upgrade pip - python -m pip install setuptools - python -m pip install -e . - - - name: Run pytest - run: | - cd hdr_schemata/tests/ - pytest - deploy: - runs-on: ubuntu-latest - needs: test - #if: success() && github.ref == 'refs/heads/master' - steps: - - uses: actions/setup-python@v2 - with: - python-version: 3.8 - - uses: actions/checkout@v2 - with: - submodules: 'recursive' - - run: pip3 install -r requirements.txt - - name: deploy - run: | - mkdocs gh-deploy --force - + check_json: + runs-on: ubuntu-latest + + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Check JSON + run: | + file_path="available.json" + if [ -f "$file_path" ]; then + if jq . "$file_path" >/dev/null 2>&1; then + echo "JSON file is valid." + else + echo "JSON file is not valid." + exit 1 + fi + else + echo "JSON file does not exist." + exit 1 + fi + + test: + runs-on: ubuntu-latest + steps: + - name: Checkout code + uses: actions/checkout@v2 + + - name: Set up Python + uses: actions/setup-python@v2 + with: + python-version: 3.9 + + - name: Install schema module + run: | + python -m pip install --upgrade pip + python -m pip install setuptools + python -m pip install -e . + + - name: Run pytest + run: | + cd hdr_schemata/tests/ + pytest + deploy: + runs-on: ubuntu-latest + needs: test + if: success() && github.ref == 'refs/heads/master' + steps: + - uses: actions/setup-python@v2 + with: + python-version: 3.8 + - uses: actions/checkout@v2 + with: + submodules: "recursive" + - run: pip3 install -r requirements.txt + - name: deploy + run: | + mkdocs gh-deploy --force diff --git a/docs/GWDM/1.0.md b/docs/GWDM/1.0.md index 931d439..c55394d 100644 --- a/docs/GWDM/1.0.md +++ b/docs/GWDM/1.0.md @@ -5,37 +5,52 @@ required metadata needed for the GWDM + + + ### gatewayId Need a field in Mauro that captures the datasetID to link to gateway database - or can we just use the one created in Mauro? -| title | examples | required | type | -|:----------|:-----------|:-----------|:-------| -| Gatewayid | | True | str | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:--------| +| Gatewayid | False | False | True | ['str'] | + + + ### gatewayPid Need a field in Mauro that captures the dataset pid to link to gateway database -| title | examples | required | type | -|:-----------|:-----------|:-----------|:-------| -| Gatewaypid | | True | str | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:--------| +| Gatewaypid | False | False | True | ['str'] | + + + ### issued Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different? -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------| -| Issued | | True | datetime | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------| +| Issued | False | False | True | ['datetime'] | + + + ### modified Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different? -| title | examples | required | type | -|:---------|:-----------|:-----------|:---------| -| Modified | | True | datetime | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-------------| +| Modified | False | False | True | ['datetime'] | + + + ### revisions @@ -43,21 +58,30 @@ None + + + #### version Version number used for previous version of this dataset -| title | examples | required | type | -|:-----------------|:-----------|:-----------|:-------| -| revision version | | True | str | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:--------| +| revision version | False | False | True | ['str'] | + + + #### url Some url with a reference to the record of a previous version of this dataset -| title | examples | required | type | -|:-------------|:-----------|:-----------|:-------| -| revision url | | True | Url | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------| +| revision url | False | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | + + + ## summary @@ -65,77 +89,107 @@ Summary of metadata describing key pieces of information. + + + ### title The main title of the dataset -| title | examples | required | type | -|:--------|:-----------|:-----------|:------------------------------| -| Title | | True | TwoHundredFiftyFiveCharacters | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------| +| Title | False | False | True | ["TwoHundredFiftyFiveCharacters[{'maxLength': 255, 'minLength': 2, 'type': 'string'}]"] | + + + ### shortTitle A shorter descriptive title of the dataset -| title | examples | required | type | -|:-----------|:-----------|:-----------|:-----------| -| Shorttitle | | True | ShortTitle | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------| +| Shorttitle | False | True | True | ["ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### doiName DOI associated to this dataset -| title | examples | required | type | -|:--------|:-----------|:-----------|:-------| -| Doiname | | True | Doi | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------| +| Doiname | False | True | True | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### abstract Longer abstract detailing the dataset. -| title | examples | required | type | -|:---------|:-----------|:-----------|:-----------------| -| Abstract | | True | LongAbstractText | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------| +| Abstract | False | False | True | ["LongAbstractText[{'anyOf': [{'maxLength': 5000, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]"] | + + + ### keywords Comma separated key words associated to this dataset. -| title | examples | required | type | -|:---------|:-----------|:-----------|:---------------------| -| Keywords | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### controlledKeywords Keywords that have been filtered and limited -| title | examples | required | type | -|:--------------------|:-----------|:-----------|:---------------------| -| Controlled Keywords | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Controlled Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### contactPoint email of a person who can be the main contact point of this dataset -| title | examples | required | type | -|:--------------|:-----------|:-----------|:---------| -| Contact Point | | True | EmailStr | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:---------------------| +| Contact Point | False | True | True | ['EmailStr', 'null'] | + + + ### datasetType What type of dataset is this? -| title | examples | required | type | -|:-------------|:-----------|:-----------|:------------| -| Dataset type | | True | DatasetType | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Dataset type | False | True | True | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### description Longer description of the dataset in detail -| title | examples | required | type | -|:------------|:-----------|:-----------|:----------------| -| Description | | True | LongDescription | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Description | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### publisher @@ -143,21 +197,30 @@ Link to details about the publisher of this dataset + + + #### publisherName The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/ -| title | examples | required | type | -|:---------------|:-----------|:-----------|:-------| -| Publisher name | | True | Name | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:---------------------| +| Publisher name | False | True | True | ['Name[{}]', 'null'] | + + + #### publisherGatewayId The link to an ID somewhere in the gateway where more information on the publisher can be retrieved. -| title | examples | required | type | -|:---------------------|:-----------|:-----------|:-------| -| Publisher gateway id | | False | str | +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:----------------| +| Publisher gateway id | False | True | False | ['str', 'null'] | + + + ## coverage @@ -165,45 +228,63 @@ Spatial and Temporal coverage + + + ### spatial List of countries where the data was taken from -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Spatial | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Spatial | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### physicalSampleAvailability A list of what the dataset actually contains in terms of sample measurements -| title | examples | required | type | -|:-----------------------------|:-----------|:-----------|:---------------------| -| Physical Sample Availability | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Physical Sample Availability | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### pathway Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. -| title | examples | required | type | -|:--------|:-----------|:-----------|:----------------| -| Pathway | | False | LongDescription | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Pathway | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### followup What is the typical time span that a patient appears in the dataset (follow up period) -| title | examples | required | type | -|:---------|:-----------|:-----------|:---------| -| Followup | | False | Followup | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------| +| Followup | False | True | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | + + + ### typicalAgeRange Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). -| title | examples | required | type | -|:------------------|:-----------|:-----------|:---------| -| Typical Age Range | | False | AgeRange | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Typical Age Range | False | True | False | ["AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ## provenance @@ -211,35 +292,50 @@ Provenance information + + + ### origin None + + + #### purpose Indicates the purpose(s) that the dataset was collected. -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Purpose | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Purpose | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### source Indicates the source of the data extraction -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Source | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Source | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### collectionSituation Indicate the setting(s) where data was collected. Multiple settings may be provided -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Setting | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Setting | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### temporal @@ -247,45 +343,63 @@ None + + + #### startDate The start of the time period that the dataset provides coverage for -| title | examples | required | type | -|:-----------|:-----------|:-----------|:-------| -| Start Date | | True | date | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-----------------------------| +| Start Date | False | True | True | ['date', 'datetime', 'null'] | + + + #### endDate The end of the time period that the dataset provides coverage for -| title | examples | required | type | -|:---------|:-----------|:-----------|:-------| -| End Date | | False | date | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------| +| End Date | False | True | False | ['date', 'datetime', 'null'] | + + + #### timeLag -Rypical time-lag between an event and the data for that event appearing in the dataset +Typical time-lag between an event and the data for that event appearing in the dataset -| title | examples | required | type | -|:---------|:-----------|:-----------|:--------| -| Time Lag | | True | TimeLag | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | False | False | True | ["TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]"] | + + + #### accrualPeriodicity frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. -| title | examples | required | type | -|:------------|:-----------|:-----------|:------------| -| Periodicity | | True | Periodicity | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Periodicity | False | False | True | ["Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]"] | + + + #### distributionReleaseDate Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. -| title | examples | required | type | -|:-------------|:-----------|:-----------|:-------| -| Release Date | | False | date | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-----------------------------| +| Release Date | False | True | False | ['date', 'datetime', 'null'] | + + + ## accessibility @@ -293,35 +407,50 @@ Accessibility information. + + + ### usage This section includes information about how the data can be used and how it is currently being used + + + #### dataUseLimitation Any restrictions to its usage -| title | examples | required | type | -|:--------------------|:-----------|:-----------|:---------------------| -| Data Use Limitation | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Limitation | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### dataUseRequirement Any requirements needed for data usage -| title | examples | required | type | -|:----------------------|:-----------|:-----------|:---------------------| -| Data Use Requirements | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Requirements | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### resourceCreator Who has created this resource -| title | examples | required | type | -|:-----------------|:-----------|:-----------|:-----------------| -| Resource Creator | | False | ShortDescription | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Resource Creator | False | True | False | ["ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### access @@ -329,61 +458,85 @@ This section includes information about data access + + + #### accessRights Optional link(s) or a description of where the license associated to accessing this dataset -| title | examples | required | type | -|:--------------|:-----------|:-----------|:---------------------| -| Access Rights | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Access Rights | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### accessService -| title | examples | required | type | -|:---------------|:-----------|:-----------|:----------------| -| Access Service | | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Service | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### accessRequestCost -| title | examples | required | type | -|:---------------------------------|:-----------|:-----------|:----------------| -| Organisation Access Request Cost | | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Organisation Access Request Cost | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### deliveryLeadTime An arbitrary guess at the time to gain access to the dataset... -| title | examples | required | type | -|:------------------------|:-----------|:-----------|:-----------------| -| Access Request Duration | | False | DeliveryLeadTime | +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------| +| Access Request Duration | False | True | False | ["DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", 'null'] | + + + #### jurisdiction Comma separated country codes of where the data jurisdiction is. -| title | examples | required | type | -|:-------------|:-----------|:-----------|:---------------------| -| Jurisdiction | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Jurisdiction | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### dataController Name of the data controller -| title | examples | required | type | -|:----------------|:-----------|:-----------|:----------------| -| Data Controller | | True | LongDescription | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### dataProcessor Name of the data processors -| title | examples | required | type | -|:---------------|:-----------|:-----------|:----------------| -| Data Processor | | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Processor | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### formatAndStandards @@ -391,37 +544,52 @@ Section includes technical attributes for language vocabularies, sizes etc. and + + + #### vocabularyEncodingSchemes Code value of the ontology vocabulary encoding -| title | examples | required | type | -|:----------------------|:-----------|:-----------|:---------------------| -| Controlled Vocabulary | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Controlled Vocabulary | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### conformsTo What the vocabulary conforms to. -| title | examples | required | type | -|:------------|:-----------|:-----------|:---------------------| -| Conforms To | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Conforms To | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### languages Language code(s) of the language of the dataset metadata and underlying data is made available. -| title | examples | required | type | -|:-----------------|:-----------|:-----------|:---------------------| -| Language Code(s) | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Language Code(s) | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### formats Format(s) the dataset can be made available in -| title | examples | required | type | -|:---------------|:-----------|:-----------|:---------------------| -| Dataset Format | | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Dataset Format | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ## linkage @@ -429,45 +597,63 @@ Linkage and enrichment. + + + ### isGeneratedUsing ?? -| title | examples | required | type | -|:-------------------|:-----------|:-----------|:---------------------| -| Is Generated Using | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is Generated Using | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### associatedMedia Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question -| title | examples | required | type | -|:-----------------|:-----------|:-----------|:---------------------| -| Associated Media | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Associated Media | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### dataUses ?? -| title | examples | required | type | -|:----------|:-----------|:-----------|:---------------------| -| Data Uses | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Uses | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### isReferenceIn Rhe keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. -| title | examples | required | type | -|:----------------|:-----------|:-----------|:---------------------| -| Is Reference in | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is Reference in | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### tools URL of any analysis tools or models that have been created for this dataset and are available for further use -| title | examples | required | type | -|:--------|:-----------|:-----------|:---------------------| -| Tools | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Tools | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### datasetLinkage @@ -475,58 +661,82 @@ Dataset Linkage copied over from + + + #### isDerivedFrom Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset -| title | examples | required | type | -|:------------|:-----------|:-----------|:---------------------| -| Derivations | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Derivations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### isPartOf If the dataset is part of a group or family -| title | examples | required | type | -|:----------|:-----------|:-----------|:---------------------| -| Is PartOf | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is PartOf | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### isMemberOf Dataset is a member of XXX(?) -| title | examples | required | type | -|:------------|:-----------|:-----------|:---------------------| -| Is MemberOf | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is MemberOf | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + #### linkedDatasets Links to other datasets. -| title | examples | required | type | -|:----------------|:-----------|:-----------|:---------------------| -| Linked Datasets | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Linked Datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ### investigations Please provide the keystone paper associated with the dataset. -| title | examples | required | type | -|:---------------|:-----------|:-----------|:---------------------| -| Investigations | | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Investigations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + ## observations Obsservations -| title | examples | required | type | -|:-------------|:-----------|:-----------|:------------| -| Observations | | False | Observation | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:----------------| +| Observations | True | True | False | ['Observation'] | + + + ## structuralMetadata Descriptions of all tables and data elements that can be included in the dataset -| title | examples | required | type | -|:--------------------|:-----------|:-----------|:----------| -| Structural Metadata | | False | DataTable | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:--------------| +| Structural Metadata | True | True | False | ['DataTable'] | + + + diff --git a/docs/GWDM/1.0.structure.json b/docs/GWDM/1.0.structure.json index 91bf40e..da5ce98 100644 --- a/docs/GWDM/1.0.structure.json +++ b/docs/GWDM/1.0.structure.json @@ -5,7 +5,11 @@ "title": "Required", "description": "required metadata needed for the GWDM", "examples": null, - "type": "Required", + "type": [ + "Required" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "gatewayId", @@ -13,7 +17,11 @@ "title": "Gatewayid", "description": "Need a field in Mauro that captures the datasetID to link to gateway database - or can we just use the one created in Mauro?", "examples": null, - "type": "str" + "type": [ + "str" + ], + "is_list": false, + "is_optional": false }, { "name": "gatewayPid", @@ -21,7 +29,11 @@ "title": "Gatewaypid", "description": "Need a field in Mauro that captures the dataset pid to link to gateway database", "examples": null, - "type": "str" + "type": [ + "str" + ], + "is_list": false, + "is_optional": false }, { "name": "issued", @@ -29,7 +41,11 @@ "title": "Issued", "description": "Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different?", "examples": null, - "type": "datetime" + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "modified", @@ -37,7 +53,11 @@ "title": "Modified", "description": "Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different?", "examples": null, - "type": "datetime" + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "revisions", @@ -45,7 +65,11 @@ "title": "Revisions", "description": null, "examples": null, - "type": "Revision", + "type": [ + "Revision" + ], + "is_list": true, + "is_optional": false, "subItems": [ { "name": "version", @@ -53,7 +77,11 @@ "title": "revision version", "description": "Version number used for previous version of this dataset", "examples": null, - "type": "str" + "type": [ + "str" + ], + "is_list": false, + "is_optional": false }, { "name": "url", @@ -61,7 +89,11 @@ "title": "revision url", "description": "Some url with a reference to the record of a previous version of this dataset", "examples": null, - "type": "Url", + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] } ] @@ -74,7 +106,11 @@ "title": "Summary", "description": "Summary of metadata describing key pieces of information.", "examples": null, - "type": "Summary", + "type": [ + "Summary" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "title", @@ -82,7 +118,11 @@ "title": "Title", "description": "The main title of the dataset", "examples": null, - "type": "TwoHundredFiftyFiveCharacters", + "type": [ + "TwoHundredFiftyFiveCharacters[{'maxLength': 255, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -91,7 +131,12 @@ "title": "Shorttitle", "description": "A shorter descriptive title of the dataset", "examples": null, - "type": "ShortTitle", + "type": [ + "ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -100,7 +145,12 @@ "title": "Doiname", "description": "DOI associated to this dataset", "examples": null, - "type": "Doi", + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -109,7 +159,11 @@ "title": "Abstract", "description": "Longer abstract detailing the dataset.", "examples": null, - "type": "LongAbstractText", + "type": [ + "LongAbstractText[{'anyOf': [{'maxLength': 5000, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -118,7 +172,12 @@ "title": "Keywords", "description": "Comma separated key words associated to this dataset.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -127,7 +186,12 @@ "title": "Controlled Keywords", "description": "Keywords that have been filtered and limited", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -136,7 +200,12 @@ "title": "Contact Point", "description": "email of a person who can be the main contact point of this dataset", "examples": null, - "type": "EmailStr" + "type": [ + "EmailStr", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "datasetType", @@ -144,7 +213,12 @@ "title": "Dataset type", "description": "What type of dataset is this?", "examples": null, - "type": "DatasetType", + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -153,7 +227,12 @@ "title": "Description", "description": "Longer description of the dataset in detail", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -162,7 +241,12 @@ "title": "Publisher", "description": "Link to details about the publisher of this dataset", "examples": null, - "type": "Publisher", + "type": [ + "Publisher", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "publisherName", @@ -170,7 +254,12 @@ "title": "Publisher name", "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/", "examples": null, - "type": "Name", + "type": [ + "Name[{}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -179,7 +268,12 @@ "title": "Publisher gateway id", "description": "The link to an ID somewhere in the gateway where more information on the publisher can be retrieved.", "examples": null, - "type": "str" + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true } ] } @@ -191,7 +285,12 @@ "title": "Coverage", "description": "Spatial and Temporal coverage", "examples": null, - "type": "Coverage", + "type": [ + "Coverage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "spatial", @@ -199,7 +298,12 @@ "title": "Spatial", "description": "List of countries where the data was taken from", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -208,7 +312,12 @@ "title": "Physical Sample Availability", "description": "A list of what the dataset actually contains in terms of sample measurements", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -217,7 +326,12 @@ "title": "Pathway", "description": "Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -226,7 +340,12 @@ "title": "Followup", "description": "What is the typical time span that a patient appears in the dataset (follow up period)", "examples": null, - "type": "Followup" + "type": [ + "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "typicalAgeRange", @@ -234,7 +353,12 @@ "title": "Typical Age Range", "description": "Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", "examples": null, - "type": "AgeRange", + "type": [ + "AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -245,7 +369,12 @@ "title": "Provenance", "description": "Provenance information", "examples": null, - "type": "Provenance", + "type": [ + "Provenance", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "origin", @@ -253,7 +382,12 @@ "title": null, "description": null, "examples": null, - "type": "Origin", + "type": [ + "Origin", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "purpose", @@ -261,7 +395,12 @@ "title": "Purpose", "description": "Indicates the purpose(s) that the dataset was collected.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -270,7 +409,12 @@ "title": "Source", "description": "Indicates the source of the data extraction", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -279,7 +423,12 @@ "title": "Setting", "description": "Indicate the setting(s) where data was collected. Multiple settings may be provided", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -290,7 +439,11 @@ "title": null, "description": null, "examples": null, - "type": "Temporal", + "type": [ + "Temporal" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "startDate", @@ -298,7 +451,13 @@ "title": "Start Date", "description": "The start of the time period that the dataset provides coverage for", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "endDate", @@ -306,15 +465,25 @@ "title": "End Date", "description": "The end of the time period that the dataset provides coverage for", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "timeLag", "required": true, "title": "Time Lag", - "description": "Rypical time-lag between an event and the data for that event appearing in the dataset", + "description": "Typical time-lag between an event and the data for that event appearing in the dataset", "examples": null, - "type": "TimeLag" + "type": [ + "TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]" + ], + "is_list": false, + "is_optional": false }, { "name": "accrualPeriodicity", @@ -322,7 +491,11 @@ "title": "Periodicity", "description": "frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.", "examples": null, - "type": "Periodicity" + "type": [ + "Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]" + ], + "is_list": false, + "is_optional": false }, { "name": "distributionReleaseDate", @@ -330,7 +503,13 @@ "title": "Release Date", "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true } ] } @@ -342,7 +521,11 @@ "title": "Accessibility", "description": "Accessibility information.", "examples": null, - "type": "Accessibility", + "type": [ + "Accessibility" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "usage", @@ -350,7 +533,12 @@ "title": "Usage", "description": "This section includes information about how the data can be used and how it is currently being used", "examples": null, - "type": "Usage", + "type": [ + "Usage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "dataUseLimitation", @@ -358,7 +546,12 @@ "title": "Data Use Limitation", "description": "Any restrictions to its usage", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -367,7 +560,12 @@ "title": "Data Use Requirements", "description": "Any requirements needed for data usage", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -376,7 +574,12 @@ "title": "Resource Creator", "description": "Who has created this resource", "examples": null, - "type": "ShortDescription", + "type": [ + "ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -387,7 +590,11 @@ "title": "Access", "description": "This section includes information about data access", "examples": null, - "type": "Access", + "type": [ + "Access" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "accessRights", @@ -395,7 +602,12 @@ "title": "Access Rights", "description": "Optional link(s) or a description of where the license associated to accessing this dataset", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -404,7 +616,12 @@ "title": "Access Service", "description": "", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -413,7 +630,12 @@ "title": "Organisation Access Request Cost", "description": "", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -422,7 +644,12 @@ "title": "Access Request Duration", "description": "An arbitrary guess at the time to gain access to the dataset...", "examples": null, - "type": "DeliveryLeadTime" + "type": [ + "DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "jurisdiction", @@ -430,7 +657,12 @@ "title": "Jurisdiction", "description": "Comma separated country codes of where the data jurisdiction is.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -439,7 +671,12 @@ "title": "Data Controller", "description": "Name of the data controller", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -448,7 +685,12 @@ "title": "Data Processor", "description": "Name of the data processors", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -459,7 +701,12 @@ "title": "Format and Standards", "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", "examples": null, - "type": "FormatAndStandards", + "type": [ + "FormatAndStandards", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "vocabularyEncodingSchemes", @@ -467,7 +714,12 @@ "title": "Controlled Vocabulary", "description": "Code value of the ontology vocabulary encoding", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -476,7 +728,12 @@ "title": "Conforms To", "description": "What the vocabulary conforms to.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -485,7 +742,12 @@ "title": "Language Code(s)", "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -494,7 +756,12 @@ "title": "Dataset Format", "description": "Format(s) the dataset can be made available in", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -507,7 +774,12 @@ "title": "Linkage", "description": "Linkage and enrichment.", "examples": null, - "type": "Linkage", + "type": [ + "Linkage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "isGeneratedUsing", @@ -515,7 +787,12 @@ "title": "Is Generated Using", "description": "??", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -524,7 +801,12 @@ "title": "Associated Media", "description": "Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -533,7 +815,12 @@ "title": "Data Uses", "description": "??", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -542,7 +829,12 @@ "title": "Is Reference in", "description": "Rhe keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -551,7 +843,12 @@ "title": "Tools", "description": "URL of any analysis tools or models that have been created for this dataset and are available for further use", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -560,7 +857,12 @@ "title": "Dataset Linkage", "description": "Dataset Linkage copied over from", "examples": null, - "type": "DatasetLinkage", + "type": [ + "DatasetLinkage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "isDerivedFrom", @@ -568,7 +870,12 @@ "title": "Derivations", "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -577,7 +884,12 @@ "title": "Is PartOf", "description": "If the dataset is part of a group or family", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -586,7 +898,12 @@ "title": "Is MemberOf", "description": "Dataset is a member of XXX(?)", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -595,7 +912,12 @@ "title": "Linked Datasets", "description": "Links to other datasets.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -606,7 +928,12 @@ "title": "Investigations", "description": "Please provide the keystone paper associated with the dataset.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -617,7 +944,11 @@ "title": "Observations", "description": "Obsservations", "examples": null, - "type": "Observation" + "type": [ + "Observation" + ], + "is_list": true, + "is_optional": true }, { "name": "structuralMetadata", @@ -625,6 +956,10 @@ "title": "Structural Metadata", "description": "Descriptions of all tables and data elements that can be included in the dataset", "examples": null, - "type": "DataTable" + "type": [ + "DataTable" + ], + "is_list": true, + "is_optional": true } ] \ No newline at end of file diff --git a/docs/GWDM/1.1.md b/docs/GWDM/1.1.md index f8bfb5b..e7cbd93 100644 --- a/docs/GWDM/1.1.md +++ b/docs/GWDM/1.1.md @@ -12,9 +12,9 @@ required metadata needed for the GWDM Need a field in Mauro that captures the datasetID to link to gateway database - or can we just use the one created in Mauro? -| title | required | type | -|:----------|:-----------|:-------| -| Gatewayid | True | str | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:--------| +| Gatewayid | False | False | True | ['str'] | @@ -23,9 +23,9 @@ Need a field in Mauro that captures the datasetID to link to gateway database - Need a field in Mauro that captures the dataset pid to link to gateway database -| title | required | type | -|:-----------|:-----------|:-------| -| Gatewaypid | True | str | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:--------| +| Gatewaypid | False | False | True | ['str'] | @@ -34,9 +34,9 @@ Need a field in Mauro that captures the dataset pid to link to gateway database Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different? -| title | required | type | -|:--------|:-----------|:---------| -| Issued | True | datetime | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------| +| Issued | False | False | True | ['datetime'] | @@ -45,9 +45,9 @@ Aren't issued and modified always the same because of versioning? Is that fine t Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different? -| title | required | type | -|:---------|:-----------|:---------| -| Modified | True | datetime | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-------------| +| Modified | False | False | True | ['datetime'] | @@ -65,9 +65,9 @@ None Version number used for previous version of this dataset -| title | required | type | -|:-----------------|:-----------|:-------| -| revision version | True | str | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:--------| +| revision version | False | False | True | ['str'] | @@ -76,9 +76,9 @@ Version number used for previous version of this dataset Some url with a reference to the record of a previous version of this dataset -| title | required | type | -|:-------------|:-----------|:-------| -| revision url | True | Url | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------| +| revision url | False | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | @@ -87,11 +87,12 @@ Some url with a reference to the record of a previous version of this dataset Dataset metadata version -| title | required | type | -|:----------------|:-----------|:-------| -| Dataset Version | True | str | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:--------| +| Dataset Version | False | False | True | ['str'] | Examples: + * 1.1.0 @@ -108,9 +109,9 @@ Summary of metadata describing key pieces of information. The main title of the dataset -| title | required | type | -|:--------|:-----------|:------------------------------| -| Title | True | TwoHundredFiftyFiveCharacters | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------| +| Title | False | False | True | ["TwoHundredFiftyFiveCharacters[{'maxLength': 255, 'minLength': 2, 'type': 'string'}]"] | @@ -119,9 +120,9 @@ The main title of the dataset A shorter descriptive title of the dataset -| title | required | type | -|:-----------|:-----------|:-----------| -| Shorttitle | True | ShortTitle | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------| +| Shorttitle | False | True | True | ["ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -130,9 +131,9 @@ A shorter descriptive title of the dataset DOI associated to this dataset -| title | required | type | -|:--------|:-----------|:-------| -| Doiname | True | Doi | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------| +| Doiname | False | True | True | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -141,9 +142,9 @@ DOI associated to this dataset Longer abstract detailing the dataset. -| title | required | type | -|:---------|:-----------|:-----------------| -| Abstract | True | LongAbstractText | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------| +| Abstract | False | False | True | ["LongAbstractText[{'anyOf': [{'maxLength': 5000, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]"] | @@ -152,9 +153,9 @@ Longer abstract detailing the dataset. Comma separated key words associated to this dataset. -| title | required | type | -|:---------|:-----------|:---------------------| -| Keywords | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -163,9 +164,9 @@ Comma separated key words associated to this dataset. Keywords that have been filtered and limited -| title | required | type | -|:--------------------|:-----------|:---------------------| -| Controlled Keywords | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Controlled Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -174,9 +175,9 @@ Keywords that have been filtered and limited email of a person who can be the main contact point of this dataset -| title | required | type | -|:--------------|:-----------|:---------| -| Contact Point | True | EmailStr | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:---------------------| +| Contact Point | False | True | True | ['EmailStr', 'null'] | @@ -185,9 +186,9 @@ email of a person who can be the main contact point of this dataset What type of dataset is this? -| title | required | type | -|:-------------|:-----------|:------------| -| Dataset type | True | DatasetType | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Dataset type | False | True | True | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -196,9 +197,9 @@ What type of dataset is this? Longer description of the dataset in detail -| title | required | type | -|:------------|:-----------|:----------------| -| Description | True | LongDescription | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Description | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -216,9 +217,9 @@ Link to details about the publisher of this dataset The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/ -| title | required | type | -|:--------|:-----------|:-------| -| Name | True | Name | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------| +| Name | False | True | True | ['Name[{}]', 'null'] | @@ -227,9 +228,9 @@ The organisation responsible for running or supporting the data access request p The link to an ID somewhere in the gateway where more information on the publisher can be retrieved. -| title | required | type | -|:---------------------|:-----------|:-------| -| Publisher gateway id | False | str | +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:----------------| +| Publisher gateway id | False | True | False | ['str', 'null'] | @@ -238,9 +239,9 @@ The link to an ID somewhere in the gateway where more information on the publish The Research Organization Registry (ROR) for the organisation, if applicable -| title | required | type | -|:------------------------------------------|:-----------|:-------| -| Research Organization Registry Identifier | False | str | +| title | is_list | is_optional | required | type | +|:------------------------------------------|:----------|:--------------|:-----------|:----------------| +| Research Organization Registry Identifier | False | True | False | ['str', 'null'] | @@ -249,9 +250,9 @@ The Research Organization Registry (ROR) for the organisation, if applicable Summary population size of the cohort -| title | required | type | -|:----------------|:-----------|:-------| -| Population size | False | int | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------| +| Population size | False | True | False | ['int', 'null'] | @@ -260,9 +261,9 @@ Summary population size of the cohort What us the subtype for this dataset? -| title | required | type | -|:-----------------|:-----------|:------------| -| Dataset sub type | False | DatasetType | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Dataset sub type | False | True | False | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -280,9 +281,9 @@ Observational, Spatial and Temporal coverage List of countries where the data was taken from -| title | required | type | -|:--------|:-----------|:---------------------| -| Spatial | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Spatial | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -291,9 +292,9 @@ List of countries where the data was taken from Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. -| title | required | type | -|:--------|:-----------|:----------------| -| Pathway | False | LongDescription | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Pathway | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -302,9 +303,9 @@ Long description of the clinical/diagnostic/treatment pathway if applicable. Thi What is the typical time span that a patient appears in the dataset (follow up period) -| title | required | type | -|:---------|:-----------|:---------| -| Followup | False | Followup | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------| +| Followup | False | True | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | @@ -313,9 +314,9 @@ What is the typical time span that a patient appears in the dataset (follow up p Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). -| title | required | type | -|:------------------|:-----------|:---------| -| Typical Age Range | False | AgeRange | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Typical Age Range | False | True | False | ["AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -324,9 +325,9 @@ Age range in whole years of participants in the dataset. Please provide range in Male, Female, Other -| title | required | type | -|:--------|:-----------|:-------| -| Gender | False | Gender | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------| +| Gender | False | True | False | ["Gender[{'anyOf': [{'pattern': '\\\\b(?:Male|Female|Other)(?:,(?:Male|Female|Other))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -335,9 +336,9 @@ Male, Female, Other Blood, Saliva, Urine, Other -| title | required | type | -|:-------------------|:-----------|:------------------| -| Biological Samples | False | BiologicalSamples | +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Biological Samples | False | True | False | ["BiologicalSamples[{'anyOf': [{'pattern': '\\\\b(?:Blood|Other|Urine|Saliva)(?:,(?:Blood|Other|Urine|Saliva))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -346,9 +347,9 @@ Blood, Saliva, Urine, Other Mental health, Cognitive function -| title | required | type | -|:--------------|:-----------|:--------------| -| Psychological | False | Psychological | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Psychological | False | True | False | ["Psychological[{'anyOf': [{'pattern': '\\\\b(?:Cognitive Function|Mental Health)(?:,(?:Cognitive Function|Mental Health))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -357,9 +358,9 @@ Mental health, Cognitive function Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive -| title | required | type | -|:---------|:-----------|:---------| -| Physical | False | Physical | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Physical | False | True | False | ["Physical[{'anyOf': [{'pattern': '\\\\b(?:Respiratory|Vision|Hearing|Musculoskeletal|Cardiovascular|Reproductive)(?:,(?:Respiratory|Vision|Hearing|Musculoskeletal|Cardiovascular|Reproductive))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -368,9 +369,9 @@ Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive Height, Weight, Waist circumference, Hip circumference, Blood pressure -| title | required | type | -|:---------------|:-----------|:---------------| -| Anthropometric | False | Anthropometric | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Anthropometric | False | True | False | ["Anthropometric[{'anyOf': [{'pattern': '\\\\b(?:Blood Pressure|Hip Circumference|Height|Waist Circumference|Weight)(?:,(?:Blood Pressure|Hip Circumference|Height|Waist Circumference|Weight))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -379,9 +380,9 @@ Height, Weight, Waist circumference, Hip circumference, Blood pressure Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol -| title | required | type | -|:----------|:-----------|:-----------| -| Lifestyle | False | Lifestyles | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Lifestyle | False | True | False | ["Lifestyles[{'anyOf': [{'pattern': '\\\\b(?:Smoking|Dietary Habits|Physical Activity|Alcohol)(?:,(?:Smoking|Dietary Habits|Physical Activity|Alcohol))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -390,9 +391,9 @@ Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support -| title | required | type | -|:---------------|:-----------|:--------------| -| Socio-economic | False | SocioEconomic | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Socio-economic | False | True | False | ["SocioEconomic[{'anyOf': [{'pattern': '\\\\b(?:Finances|Family Circumstances|Housing|Education|Marital Status|Occupation|Ethnic Group|Social Support)(?:,(?:Finances|Family Circumstances|Housing|Education|Marital Status|Occupation|Ethnic Group|Social Support))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -419,9 +420,9 @@ None Indicates the purpose(s) that the dataset was collected. -| title | required | type | -|:--------|:-----------|:---------------------| -| Purpose | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Purpose | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -430,9 +431,9 @@ Indicates the purpose(s) that the dataset was collected. Indicates the source of the data extraction -| title | required | type | -|:--------|:-----------|:---------------------| -| Source | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Source | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -441,9 +442,9 @@ Indicates the source of the data extraction Indicate the setting(s) where data was collected. Multiple settings may be provided -| title | required | type | -|:--------|:-----------|:---------------------| -| Setting | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Setting | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -461,9 +462,9 @@ None The start of the time period that the dataset provides coverage for -| title | required | type | -|:-----------|:-----------|:-------| -| Start Date | True | date | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-----------------------------| +| Start Date | False | True | True | ['date', 'datetime', 'null'] | @@ -472,20 +473,20 @@ The start of the time period that the dataset provides coverage for The end of the time period that the dataset provides coverage for -| title | required | type | -|:---------|:-----------|:-------| -| End Date | False | date | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------| +| End Date | False | True | False | ['date', 'datetime', 'null'] | #### timeLag -Rypical time-lag between an event and the data for that event appearing in the dataset +Typical time-lag between an event and the data for that event appearing in the dataset -| title | required | type | -|:---------|:-----------|:--------| -| Time Lag | True | TimeLag | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | False | False | True | ["TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]"] | @@ -494,9 +495,9 @@ Rypical time-lag between an event and the data for that event appearing in the d frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. -| title | required | type | -|:------------|:-----------|:------------| -| Periodicity | True | Periodicity | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Periodicity | False | False | True | ["Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]"] | @@ -505,9 +506,9 @@ frequency of distribution release. If a dataset is distributed regularly please Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. -| title | required | type | -|:-------------|:-----------|:-------| -| Release Date | False | date | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-----------------------------| +| Release Date | False | True | False | ['date', 'datetime', 'null'] | @@ -534,9 +535,9 @@ This section includes information about how the data can be used and how it is c Any restrictions to its usage -| title | required | type | -|:--------------------|:-----------|:---------------------| -| Data Use Limitation | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Limitation | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -545,9 +546,9 @@ Any restrictions to its usage Any requirements needed for data usage -| title | required | type | -|:----------------------|:-----------|:---------------------| -| Data Use Requirements | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Requirements | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -565,9 +566,9 @@ Who has created this resource The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/ -| title | required | type | -|:--------|:-----------|:-------| -| Name | True | Name | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------| +| Name | False | True | True | ['Name[{}]', 'null'] | @@ -576,9 +577,9 @@ The organisation responsible for running or supporting the data access request p The link to an ID somewhere in the gateway where more information on the publisher can be retrieved. -| title | required | type | -|:---------------------|:-----------|:-------| -| Publisher gateway id | False | str | +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:----------------| +| Publisher gateway id | False | True | False | ['str', 'null'] | @@ -587,9 +588,9 @@ The link to an ID somewhere in the gateway where more information on the publish The Research Organization Registry (ROR) for the organisation, if applicable -| title | required | type | -|:------------------------------------------|:-----------|:-------| -| Research Organization Registry Identifier | False | str | +| title | is_list | is_optional | required | type | +|:------------------------------------------|:----------|:--------------|:-----------|:----------------| +| Research Organization Registry Identifier | False | True | False | ['str', 'null'] | @@ -607,9 +608,9 @@ This section includes information about data access Optional link(s) or a description of where the license associated to accessing this dataset -| title | required | type | -|:--------------|:-----------|:---------------------| -| Access Rights | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Access Rights | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -618,9 +619,9 @@ Optional link(s) or a description of where the license associated to accessing t -| title | required | type | -|:---------------|:-----------|:----------------| -| Access Service | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Service | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -629,9 +630,9 @@ Optional link(s) or a description of where the license associated to accessing t -| title | required | type | -|:---------------------------------|:-----------|:----------------| -| Organisation Access Request Cost | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Organisation Access Request Cost | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -640,9 +641,9 @@ Optional link(s) or a description of where the license associated to accessing t An arbitrary guess at the time to gain access to the dataset... -| title | required | type | -|:------------------------|:-----------|:-----------------| -| Access Request Duration | False | DeliveryLeadTime | +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------| +| Access Request Duration | False | True | False | ["DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", 'null'] | @@ -651,9 +652,9 @@ An arbitrary guess at the time to gain access to the dataset... Comma separated country codes of where the data jurisdiction is. -| title | required | type | -|:-------------|:-----------|:---------------------| -| Jurisdiction | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Jurisdiction | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -662,9 +663,9 @@ Comma separated country codes of where the data jurisdiction is. Name of the data controller -| title | required | type | -|:----------------|:-----------|:----------------| -| Data Controller | True | LongDescription | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -673,9 +674,9 @@ Name of the data controller Name of the data processors -| title | required | type | -|:---------------|:-----------|:----------------| -| Data Processor | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Processor | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -693,9 +694,9 @@ Section includes technical attributes for language vocabularies, sizes etc. and Code value of the ontology vocabulary encoding -| title | required | type | -|:----------------------|:-----------|:---------------------| -| Controlled Vocabulary | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Controlled Vocabulary | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -704,9 +705,9 @@ Code value of the ontology vocabulary encoding What the vocabulary conforms to. -| title | required | type | -|:------------|:-----------|:---------------------| -| Conforms To | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Conforms To | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -715,9 +716,9 @@ What the vocabulary conforms to. Language code(s) of the language of the dataset metadata and underlying data is made available. -| title | required | type | -|:-----------------|:-----------|:---------------------| -| Language Code(s) | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Language Code(s) | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -726,9 +727,9 @@ Language code(s) of the language of the dataset metadata and underlying data is Format(s) the dataset can be made available in -| title | required | type | -|:---------------|:-----------|:---------------------| -| Dataset Format | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Dataset Format | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -746,9 +747,9 @@ Linkage and enrichment. ?? -| title | required | type | -|:-------------------|:-----------|:---------------------| -| Is Generated Using | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is Generated Using | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -757,9 +758,9 @@ Linkage and enrichment. Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question -| title | required | type | -|:-----------------|:-----------|:---------------------| -| Associated Media | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Associated Media | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -768,9 +769,9 @@ Any media associated with the Gateway Organisation using a valid URI for the con ?? -| title | required | type | -|:----------|:-----------|:---------------------| -| Data Uses | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Uses | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -779,9 +780,9 @@ Any media associated with the Gateway Organisation using a valid URI for the con Rhe keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. -| title | required | type | -|:----------------|:-----------|:---------------------| -| Is Reference in | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is Reference in | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -790,9 +791,9 @@ Rhe keystone paper associated with the dataset. Also include a list of known cit URL of any analysis tools or models that have been created for this dataset and are available for further use -| title | required | type | -|:--------|:-----------|:---------------------| -| Tools | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Tools | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -810,9 +811,9 @@ Dataset Linkage copied over from Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset -| title | required | type | -|:------------|:-----------|:---------------------| -| Derivations | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Derivations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -821,9 +822,9 @@ Indicate if derived datasets or predefined extracts are available and the type o If the dataset is part of a group or family -| title | required | type | -|:----------|:-----------|:---------------------| -| Is PartOf | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is PartOf | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -832,9 +833,9 @@ If the dataset is part of a group or family Dataset is a member of XXX(?) -| title | required | type | -|:------------|:-----------|:---------------------| -| Is MemberOf | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is MemberOf | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -843,9 +844,9 @@ Dataset is a member of XXX(?) Links to other datasets. -| title | required | type | -|:----------------|:-----------|:---------------------| -| Linked Datasets | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Linked Datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -854,9 +855,9 @@ Links to other datasets. Please provide the keystone paper associated with the dataset. -| title | required | type | -|:---------------|:-----------|:---------------------| -| Investigations | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Investigations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -865,9 +866,9 @@ Please provide the keystone paper associated with the dataset. Obsservations -| title | required | type | -|:-------------|:-----------|:------------| -| Observations | False | Observation | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:----------------| +| Observations | True | True | False | ['Observation'] | @@ -876,9 +877,9 @@ Obsservations Descriptions of all tables and data elements that can be included in the dataset -| title | required | type | -|:--------------------|:-----------|:----------| -| Structural Metadata | False | DataTable | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:--------------| +| Structural Metadata | True | True | False | ['DataTable'] | @@ -896,9 +897,9 @@ Metadata collection for Tissue Samples datasets ID of the tissue sample collection -| title | required | type | -|:--------|:-----------|:---------------------| -| ID | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| ID | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -907,9 +908,9 @@ ID of the tissue sample collection Data categories related to the tissue sample collection -| title | required | type | -|:----------------|:-----------|:---------------------| -| Data Categories | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Categories | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -918,9 +919,9 @@ Data categories related to the tissue sample collection Material type of the tissue sample collection -| title | required | type | -|:--------------|:-----------|:---------------------| -| Material Type | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Material Type | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -929,9 +930,9 @@ Material type of the tissue sample collection Access conditions for the tissue sample collection -| title | required | type | -|:------------------|:-----------|:---------------------| -| Access Conditions | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Access Conditions | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -940,9 +941,9 @@ Access conditions for the tissue sample collection Type of the tissue sample collection -| title | required | type | -|:----------------|:-----------|:---------------------| -| Collection Type | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Collection Type | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -951,9 +952,9 @@ Type of the tissue sample collection Disease associated with the tissue sample collection -| title | required | type | -|:--------|:-----------|:---------------------| -| Disease | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Disease | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -962,9 +963,9 @@ Disease associated with the tissue sample collection Storage temperature of the tissue sample collection -| title | required | type | -|:--------------------|:-----------|:---------------------| -| Storage Temperature | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Storage Temperature | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -973,9 +974,9 @@ Storage temperature of the tissue sample collection Age range of the tissue sample collection -| title | required | type | -|:-----------------|:-----------|:---------------------| -| Sample Age Range | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Sample Age Range | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -993,9 +994,9 @@ Metadata related to the tissue sample ID of the tissue sample metadata -| title | required | type | -|:------------|:-----------|:-------| -| Metadata ID | False | str | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------| +| Metadata ID | False | True | False | ['str', 'null'] | @@ -1013,9 +1014,9 @@ Information about the sample donor ID of the sample donor -| title | required | type | -|:---------|:-----------|:-------| -| Donor ID | False | str | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:----------------| +| Donor ID | False | True | False | ['str', 'null'] | @@ -1024,9 +1025,9 @@ ID of the sample donor Sex of the sample donor -| title | required | type | -|:----------|:-----------|:-------| -| Donor Sex | False | str | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------| +| Donor Sex | False | True | False | ['str', 'null'] | @@ -1035,9 +1036,9 @@ Sex of the sample donor Date of birth of the sample donor -| title | required | type | -|:-----------------|:-----------|:-------| -| Donor birth date | False | date | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:-----------------------------| +| Donor birth date | False | True | False | ['date', 'datetime', 'null'] | @@ -1046,9 +1047,9 @@ Date of birth of the sample donor Data categories related to the sample donor -| title | required | type | -|:----------------------|:-----------|:---------------------| -| Donor Data Categories | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Donor Data Categories | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -1057,9 +1058,9 @@ Data categories related to the sample donor Type of the tissue sample -| title | required | type | -|:------------|:-----------|:---------------------| -| Sample Type | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Sample Type | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -1068,9 +1069,9 @@ Type of the tissue sample Storage temperature of the tissue sample -| title | required | type | -|:--------------------|:-----------|:-------| -| Storage Temperature | False | str | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------| +| Storage Temperature | False | True | False | ['str', 'null'] | @@ -1079,9 +1080,9 @@ Storage temperature of the tissue sample Date when the tissue sample metadata was created -| title | required | type | -|:--------------|:-----------|:-------| -| Creation Date | False | date | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------| +| Creation Date | False | True | False | ['date', 'datetime', 'null'] | @@ -1090,9 +1091,9 @@ Date when the tissue sample metadata was created Ontology code for the anatomical site -| title | required | type | -|:------------------------------|:-----------|:---------------------| -| Anatomical Site Ontology Code | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Anatomical Site Ontology Code | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -1101,9 +1102,9 @@ Ontology code for the anatomical site Ontology description for the anatomical site -| title | required | type | -|:-------------------------------------|:-----------|:---------------------| -| Anatomical Site Ontology Description | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-------------------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Anatomical Site Ontology Description | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -1112,9 +1113,9 @@ Ontology description for the anatomical site Free text describing the anatomical site -| title | required | type | -|:--------------------------|:-----------|:---------------------| -| Anatomical Site Free Text | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Anatomical Site Free Text | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -1123,9 +1124,9 @@ Free text describing the anatomical site Diagnosis related to the sample content -| title | required | type | -|:-------------------------|:-----------|:---------------------| -| Sample Content Diagnosis | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Sample Content Diagnosis | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -1134,9 +1135,9 @@ Diagnosis related to the sample content Restrictions on the use of the tissue sample -| title | required | type | -|:-----------------|:-----------|:---------------------| -| Use Restrictions | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Use Restrictions | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | diff --git a/docs/GWDM/1.1.structure.json b/docs/GWDM/1.1.structure.json index 0758d7c..9d122ae 100644 --- a/docs/GWDM/1.1.structure.json +++ b/docs/GWDM/1.1.structure.json @@ -5,7 +5,11 @@ "title": "Required", "description": "required metadata needed for the GWDM", "examples": null, - "type": "Required", + "type": [ + "Required" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "gatewayId", @@ -13,7 +17,11 @@ "title": "Gatewayid", "description": "Need a field in Mauro that captures the datasetID to link to gateway database - or can we just use the one created in Mauro?", "examples": null, - "type": "str" + "type": [ + "str" + ], + "is_list": false, + "is_optional": false }, { "name": "gatewayPid", @@ -21,7 +29,11 @@ "title": "Gatewaypid", "description": "Need a field in Mauro that captures the dataset pid to link to gateway database", "examples": null, - "type": "str" + "type": [ + "str" + ], + "is_list": false, + "is_optional": false }, { "name": "issued", @@ -29,7 +41,11 @@ "title": "Issued", "description": "Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different?", "examples": null, - "type": "datetime" + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "modified", @@ -37,7 +53,11 @@ "title": "Modified", "description": "Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different?", "examples": null, - "type": "datetime" + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "revisions", @@ -45,7 +65,11 @@ "title": "Revisions", "description": null, "examples": null, - "type": "Revision", + "type": [ + "Revision" + ], + "is_list": true, + "is_optional": false, "subItems": [ { "name": "version", @@ -53,7 +77,11 @@ "title": "revision version", "description": "Version number used for previous version of this dataset", "examples": null, - "type": "str" + "type": [ + "str" + ], + "is_list": false, + "is_optional": false }, { "name": "url", @@ -61,7 +89,11 @@ "title": "revision url", "description": "Some url with a reference to the record of a previous version of this dataset", "examples": null, - "type": "Url", + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] } ] @@ -74,7 +106,11 @@ "examples": [ "1.1.0" ], - "type": "str" + "type": [ + "str" + ], + "is_list": false, + "is_optional": false } ] }, @@ -84,7 +120,11 @@ "title": "Summary", "description": "Summary of metadata describing key pieces of information.", "examples": null, - "type": "Summary", + "type": [ + "Summary" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "title", @@ -92,7 +132,11 @@ "title": "Title", "description": "The main title of the dataset", "examples": null, - "type": "TwoHundredFiftyFiveCharacters", + "type": [ + "TwoHundredFiftyFiveCharacters[{'maxLength': 255, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -101,7 +145,12 @@ "title": "Shorttitle", "description": "A shorter descriptive title of the dataset", "examples": null, - "type": "ShortTitle", + "type": [ + "ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -110,7 +159,12 @@ "title": "Doiname", "description": "DOI associated to this dataset", "examples": null, - "type": "Doi", + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -119,7 +173,11 @@ "title": "Abstract", "description": "Longer abstract detailing the dataset.", "examples": null, - "type": "LongAbstractText", + "type": [ + "LongAbstractText[{'anyOf': [{'maxLength': 5000, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -128,7 +186,12 @@ "title": "Keywords", "description": "Comma separated key words associated to this dataset.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -137,7 +200,12 @@ "title": "Controlled Keywords", "description": "Keywords that have been filtered and limited", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -146,7 +214,12 @@ "title": "Contact Point", "description": "email of a person who can be the main contact point of this dataset", "examples": null, - "type": "EmailStr" + "type": [ + "EmailStr", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "datasetType", @@ -154,7 +227,12 @@ "title": "Dataset type", "description": "What type of dataset is this?", "examples": null, - "type": "DatasetType", + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -163,7 +241,12 @@ "title": "Description", "description": "Longer description of the dataset in detail", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -172,7 +255,12 @@ "title": "Publisher", "description": "Link to details about the publisher of this dataset", "examples": null, - "type": "Organisation", + "type": [ + "Organisation", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "name", @@ -180,7 +268,12 @@ "title": "Name", "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/", "examples": null, - "type": "Name", + "type": [ + "Name[{}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -189,7 +282,12 @@ "title": "Publisher gateway id", "description": "The link to an ID somewhere in the gateway where more information on the publisher can be retrieved.", "examples": null, - "type": "str" + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "rorId", @@ -197,7 +295,12 @@ "title": "Research Organization Registry Identifier", "description": "The Research Organization Registry (ROR) for the organisation, if applicable", "examples": null, - "type": "str" + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true } ] }, @@ -207,7 +310,12 @@ "title": "Population size", "description": "Summary population size of the cohort", "examples": null, - "type": "int" + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "datasetSubType", @@ -215,7 +323,12 @@ "title": "Dataset sub type", "description": "What us the subtype for this dataset?", "examples": null, - "type": "DatasetType", + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -226,7 +339,12 @@ "title": "Coverage", "description": "Observational, Spatial and Temporal coverage", "examples": null, - "type": "Coverage", + "type": [ + "Coverage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "spatial", @@ -234,7 +352,12 @@ "title": "Spatial", "description": "List of countries where the data was taken from", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -243,7 +366,12 @@ "title": "Pathway", "description": "Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -252,7 +380,12 @@ "title": "Followup", "description": "What is the typical time span that a patient appears in the dataset (follow up period)", "examples": null, - "type": "Followup" + "type": [ + "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "typicalAgeRange", @@ -260,7 +393,12 @@ "title": "Typical Age Range", "description": "Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", "examples": null, - "type": "AgeRange", + "type": [ + "AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -269,7 +407,12 @@ "title": "Gender", "description": "Male, Female, Other", "examples": null, - "type": "Gender", + "type": [ + "Gender[{'anyOf': [{'pattern': '\\\\b(?:Male|Female|Other)(?:,(?:Male|Female|Other))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -278,7 +421,12 @@ "title": "Biological Samples", "description": "Blood, Saliva, Urine, Other", "examples": null, - "type": "BiologicalSamples", + "type": [ + "BiologicalSamples[{'anyOf': [{'pattern': '\\\\b(?:Blood|Other|Urine|Saliva)(?:,(?:Blood|Other|Urine|Saliva))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -287,7 +435,12 @@ "title": "Psychological", "description": "Mental health, Cognitive function", "examples": null, - "type": "Psychological", + "type": [ + "Psychological[{'anyOf': [{'pattern': '\\\\b(?:Cognitive Function|Mental Health)(?:,(?:Cognitive Function|Mental Health))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -296,7 +449,12 @@ "title": "Physical", "description": "Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", "examples": null, - "type": "Physical", + "type": [ + "Physical[{'anyOf': [{'pattern': '\\\\b(?:Respiratory|Vision|Hearing|Musculoskeletal|Cardiovascular|Reproductive)(?:,(?:Respiratory|Vision|Hearing|Musculoskeletal|Cardiovascular|Reproductive))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -305,7 +463,12 @@ "title": "Anthropometric", "description": "Height, Weight, Waist circumference, Hip circumference, Blood pressure", "examples": null, - "type": "Anthropometric", + "type": [ + "Anthropometric[{'anyOf': [{'pattern': '\\\\b(?:Blood Pressure|Hip Circumference|Height|Waist Circumference|Weight)(?:,(?:Blood Pressure|Hip Circumference|Height|Waist Circumference|Weight))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -314,7 +477,12 @@ "title": "Lifestyle", "description": "Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", "examples": null, - "type": "Lifestyles", + "type": [ + "Lifestyles[{'anyOf': [{'pattern': '\\\\b(?:Smoking|Dietary Habits|Physical Activity|Alcohol)(?:,(?:Smoking|Dietary Habits|Physical Activity|Alcohol))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -323,7 +491,12 @@ "title": "Socio-economic", "description": "Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support", "examples": null, - "type": "SocioEconomic", + "type": [ + "SocioEconomic[{'anyOf': [{'pattern': '\\\\b(?:Finances|Family Circumstances|Housing|Education|Marital Status|Occupation|Ethnic Group|Social Support)(?:,(?:Finances|Family Circumstances|Housing|Education|Marital Status|Occupation|Ethnic Group|Social Support))*\\\\b', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -334,7 +507,12 @@ "title": "Provenance", "description": "Provenance information", "examples": null, - "type": "Provenance", + "type": [ + "Provenance", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "origin", @@ -342,7 +520,12 @@ "title": null, "description": null, "examples": null, - "type": "Origin", + "type": [ + "Origin", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "purpose", @@ -350,7 +533,12 @@ "title": "Purpose", "description": "Indicates the purpose(s) that the dataset was collected.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -359,7 +547,12 @@ "title": "Source", "description": "Indicates the source of the data extraction", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -368,7 +561,12 @@ "title": "Setting", "description": "Indicate the setting(s) where data was collected. Multiple settings may be provided", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -379,7 +577,11 @@ "title": null, "description": null, "examples": null, - "type": "Temporal", + "type": [ + "Temporal" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "startDate", @@ -387,7 +589,13 @@ "title": "Start Date", "description": "The start of the time period that the dataset provides coverage for", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "endDate", @@ -395,15 +603,25 @@ "title": "End Date", "description": "The end of the time period that the dataset provides coverage for", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "timeLag", "required": true, "title": "Time Lag", - "description": "Rypical time-lag between an event and the data for that event appearing in the dataset", + "description": "Typical time-lag between an event and the data for that event appearing in the dataset", "examples": null, - "type": "TimeLag" + "type": [ + "TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]" + ], + "is_list": false, + "is_optional": false }, { "name": "accrualPeriodicity", @@ -411,7 +629,11 @@ "title": "Periodicity", "description": "frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.", "examples": null, - "type": "Periodicity" + "type": [ + "Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]" + ], + "is_list": false, + "is_optional": false }, { "name": "distributionReleaseDate", @@ -419,7 +641,13 @@ "title": "Release Date", "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true } ] } @@ -431,7 +659,11 @@ "title": "Accessibility", "description": "Accessibility information.", "examples": null, - "type": "Accessibility", + "type": [ + "Accessibility" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "usage", @@ -439,7 +671,12 @@ "title": "Usage", "description": "This section includes information about how the data can be used and how it is currently being used", "examples": null, - "type": "Usage", + "type": [ + "Usage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "dataUseLimitation", @@ -447,7 +684,12 @@ "title": "Data Use Limitation", "description": "Any restrictions to its usage", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -456,7 +698,12 @@ "title": "Data Use Requirements", "description": "Any requirements needed for data usage", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -465,7 +712,12 @@ "title": "Resource Creator", "description": "Who has created this resource", "examples": null, - "type": "Organisation", + "type": [ + "Organisation", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "name", @@ -473,7 +725,12 @@ "title": "Name", "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/", "examples": null, - "type": "Name", + "type": [ + "Name[{}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -482,7 +739,12 @@ "title": "Publisher gateway id", "description": "The link to an ID somewhere in the gateway where more information on the publisher can be retrieved.", "examples": null, - "type": "str" + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "rorId", @@ -490,7 +752,12 @@ "title": "Research Organization Registry Identifier", "description": "The Research Organization Registry (ROR) for the organisation, if applicable", "examples": null, - "type": "str" + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true } ] } @@ -502,7 +769,11 @@ "title": "Access", "description": "This section includes information about data access", "examples": null, - "type": "Access", + "type": [ + "Access" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "accessRights", @@ -510,7 +781,12 @@ "title": "Access Rights", "description": "Optional link(s) or a description of where the license associated to accessing this dataset", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -519,7 +795,12 @@ "title": "Access Service", "description": "", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -528,7 +809,12 @@ "title": "Organisation Access Request Cost", "description": "", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -537,7 +823,12 @@ "title": "Access Request Duration", "description": "An arbitrary guess at the time to gain access to the dataset...", "examples": null, - "type": "DeliveryLeadTime" + "type": [ + "DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "jurisdiction", @@ -545,7 +836,12 @@ "title": "Jurisdiction", "description": "Comma separated country codes of where the data jurisdiction is.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -554,7 +850,12 @@ "title": "Data Controller", "description": "Name of the data controller", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -563,7 +864,12 @@ "title": "Data Processor", "description": "Name of the data processors", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -574,7 +880,12 @@ "title": "Format and Standards", "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", "examples": null, - "type": "FormatAndStandards", + "type": [ + "FormatAndStandards", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "vocabularyEncodingSchemes", @@ -582,7 +893,12 @@ "title": "Controlled Vocabulary", "description": "Code value of the ontology vocabulary encoding", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -591,7 +907,12 @@ "title": "Conforms To", "description": "What the vocabulary conforms to.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -600,7 +921,12 @@ "title": "Language Code(s)", "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -609,7 +935,12 @@ "title": "Dataset Format", "description": "Format(s) the dataset can be made available in", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -622,7 +953,12 @@ "title": "Linkage", "description": "Linkage and enrichment.", "examples": null, - "type": "Linkage", + "type": [ + "Linkage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "isGeneratedUsing", @@ -630,7 +966,12 @@ "title": "Is Generated Using", "description": "??", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -639,7 +980,12 @@ "title": "Associated Media", "description": "Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -648,7 +994,12 @@ "title": "Data Uses", "description": "??", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -657,7 +1008,12 @@ "title": "Is Reference in", "description": "Rhe keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -666,7 +1022,12 @@ "title": "Tools", "description": "URL of any analysis tools or models that have been created for this dataset and are available for further use", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -675,7 +1036,12 @@ "title": "Dataset Linkage", "description": "Dataset Linkage copied over from", "examples": null, - "type": "DatasetLinkage", + "type": [ + "DatasetLinkage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "isDerivedFrom", @@ -683,7 +1049,12 @@ "title": "Derivations", "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -692,7 +1063,12 @@ "title": "Is PartOf", "description": "If the dataset is part of a group or family", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -701,7 +1077,12 @@ "title": "Is MemberOf", "description": "Dataset is a member of XXX(?)", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -710,7 +1091,12 @@ "title": "Linked Datasets", "description": "Links to other datasets.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -721,7 +1107,12 @@ "title": "Investigations", "description": "Please provide the keystone paper associated with the dataset.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -732,7 +1123,11 @@ "title": "Observations", "description": "Obsservations", "examples": null, - "type": "Observation" + "type": [ + "Observation" + ], + "is_list": true, + "is_optional": true }, { "name": "structuralMetadata", @@ -740,7 +1135,11 @@ "title": "Structural Metadata", "description": "Descriptions of all tables and data elements that can be included in the dataset", "examples": null, - "type": "DataTable" + "type": [ + "DataTable" + ], + "is_list": true, + "is_optional": true }, { "name": "tissuesSampleCollection", @@ -748,7 +1147,12 @@ "title": "Tissues Sample Collection", "description": "Metadata collection for Tissue Samples datasets", "examples": null, - "type": "TissuesSampleCollection", + "type": [ + "TissuesSampleCollection", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "id", @@ -756,7 +1160,12 @@ "title": "ID", "description": "ID of the tissue sample collection", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -765,7 +1174,12 @@ "title": "Data Categories", "description": "Data categories related to the tissue sample collection", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -774,7 +1188,12 @@ "title": "Material Type", "description": "Material type of the tissue sample collection", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -783,7 +1202,12 @@ "title": "Access Conditions", "description": "Access conditions for the tissue sample collection", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -792,7 +1216,12 @@ "title": "Collection Type", "description": "Type of the tissue sample collection", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -801,7 +1230,12 @@ "title": "Disease", "description": "Disease associated with the tissue sample collection", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -810,7 +1244,12 @@ "title": "Storage Temperature", "description": "Storage temperature of the tissue sample collection", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -819,7 +1258,12 @@ "title": "Sample Age Range", "description": "Age range of the tissue sample collection", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -828,7 +1272,12 @@ "title": "Tissue Sample Metadata", "description": "Metadata related to the tissue sample", "examples": null, - "type": "TissueSampleMetadata", + "type": [ + "TissueSampleMetadata", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "id", @@ -836,7 +1285,12 @@ "title": "Metadata ID", "description": "ID of the tissue sample metadata", "examples": null, - "type": "str" + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "sampleDonor", @@ -844,7 +1298,12 @@ "title": "Sample Donor", "description": "Information about the sample donor", "examples": null, - "type": "SampleDonor", + "type": [ + "SampleDonor", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "id", @@ -852,7 +1311,12 @@ "title": "Donor ID", "description": "ID of the sample donor", "examples": null, - "type": "str" + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "sex", @@ -860,7 +1324,12 @@ "title": "Donor Sex", "description": "Sex of the sample donor", "examples": null, - "type": "str" + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "birthDate", @@ -868,7 +1337,13 @@ "title": "Donor birth date", "description": "Date of birth of the sample donor", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "dataCategories", @@ -876,7 +1351,12 @@ "title": "Donor Data Categories", "description": "Data categories related to the sample donor", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -887,7 +1367,12 @@ "title": "Sample Type", "description": "Type of the tissue sample", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -896,7 +1381,12 @@ "title": "Storage Temperature", "description": "Storage temperature of the tissue sample", "examples": null, - "type": "str" + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "creationDate", @@ -904,7 +1394,13 @@ "title": "Creation Date", "description": "Date when the tissue sample metadata was created", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "anatomicalSiteOntologyCode", @@ -912,7 +1408,12 @@ "title": "Anatomical Site Ontology Code", "description": "Ontology code for the anatomical site", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -921,7 +1422,12 @@ "title": "Anatomical Site Ontology Description", "description": "Ontology description for the anatomical site", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -930,7 +1436,12 @@ "title": "Anatomical Site Free Text", "description": "Free text describing the anatomical site", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -939,7 +1450,12 @@ "title": "Sample Content Diagnosis", "description": "Diagnosis related to the sample content", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -948,7 +1464,12 @@ "title": "Use Restrictions", "description": "Restrictions on the use of the tissue sample", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] diff --git a/docs/HDRUK/2.1.2.md b/docs/HDRUK/2.1.2.md index 4e0ab5a..d7f0f35 100644 --- a/docs/HDRUK/2.1.2.md +++ b/docs/HDRUK/2.1.2.md @@ -3,11 +3,12 @@ System dataset identifier -| title | required | type | -|:-------------------|:-----------|:-------| -| Dataset identifier | True | Uuidv4 | +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset identifier | False | True | True | ["Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: + * ['226fb3f1-4471-400a-8c39-2b66d46a39b6', 'https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6'] @@ -15,11 +16,12 @@ Examples: Dataset metadata version -| title | required | type | -|:----------------|:-----------|:-------| -| Dataset Version | True | Semver | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| Dataset Version | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | Examples: + * 1.1.0 @@ -36,9 +38,9 @@ Revisions of Dataset metadata Semantic Version -| title | required | type | -|:--------|:-----------|:-------| -| | True | Semver | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | @@ -47,9 +49,9 @@ Semantic Version URL endpoint to obtain the version -| title | required | type | -|:--------|:-----------|:-------| -| | True | Url | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| | False | True | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -58,9 +60,9 @@ URL endpoint to obtain the version Dataset Metadata Creation Date -| title | required | type | -|:--------------|:-----------|:---------| -| Creation Date | True | datetime | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-------------| +| Creation Date | False | False | True | ['datetime'] | @@ -69,9 +71,9 @@ Dataset Metadata Creation Date Dataset Metadata Creation Date -| title | required | type | -|:------------------|:-----------|:---------| -| Modification Date | True | datetime | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------| +| Modification Date | False | False | True | ['datetime'] | @@ -89,11 +91,12 @@ Summary metadata must be completed by Data Custodians onboarding metadata into t Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers. -| title | required | type | -|:--------|:-----------|:--------------------------| -| Title | True | OneHundredFiftyCharacters | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Title | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | Examples: + * ['North West London COVID-19 Patient Level Situation Report'] @@ -101,11 +104,12 @@ Examples: Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible -| title | required | type | -|:-----------------|:-----------|:-------------| -| Dataset Abstract | True | AbstractText | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Dataset Abstract | False | True | True | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: + * CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web® including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice. @@ -122,9 +126,9 @@ This is the organisation responsible for running or supporting the data access r Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the “suggest and institute” function here: https://www.grid.ac/institutes# -| title | required | type | -|:------------------------|:-----------|:-------| -| Organisation Identifier | False | Url | +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Organisation Identifier | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -133,9 +137,9 @@ Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for you Name of the organisation -| title | required | type | -|:------------------|:-----------|:--------------------------| -| Organisation Name | True | OneHundredFiftyCharacters | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Organisation Name | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | @@ -144,9 +148,9 @@ Name of the organisation Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. -| title | required | type | -|:------------------|:-----------|:-------| -| Organisation Logo | False | Url | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Organisation Logo | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -155,9 +159,9 @@ Please provide a logo associated with the Gateway Organisation using a valid URL Please provide a URL that describes the organisation. -| title | required | type | -|:-------------------------|:-----------|:------------| -| Organisation Description | False | Description | +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Organisation Description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -166,9 +170,9 @@ Please provide a URL that describes the organisation. Organisation contact point(s) -| title | required | type | -|:---------------------------|:-----------|:-------------| -| Organisation Contact Point | True | EmailAddress | +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Organisation Contact Point | False | True | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.EmailAddress.EmailAddress]]', 'null'] | @@ -177,9 +181,9 @@ Organisation contact point(s) Please indicate if the organisation is an Alliance Member or a Hub. -| title | required | type | -|:------------------------|:-----------|:---------| -| Organisation Membership | False | MemberOf | +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-----------------------------------------------------| +| Organisation Membership | False | True | False | ["MemberOf['HUB','ALLIANCE','OTHER','NCS']", 'null'] | @@ -188,11 +192,12 @@ Please indicate if the organisation is an Alliance Member or a Hub. Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose. -| title | required | type | -|:--------------|:-----------|:-------------| -| Contact Point | True | EmailAddress | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------| +| Contact Point | False | True | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: + * SAILDatabank@swansea.ac.uk @@ -200,9 +205,9 @@ Examples: Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users. -| title | required | type | -|:---------|:-----------|:---------------------| -| Keywords | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]', 'null'] | @@ -211,9 +216,9 @@ Please provide relevant and specific keywords that can improve the SEO of your d Alternate dataset identifiers or local identifiers -| title | required | type | -|:------------------------------|:-----------|:---------------------| -| Alternate dataset identifiers | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Alternate dataset identifiers | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]', 'null'] | @@ -222,11 +227,12 @@ Alternate dataset identifiers or local identifiers All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI. -| title | required | type | -|:--------------------------|:-----------|:-------| -| Digital Object Identifier | False | Doi | +| title | is_list | is_optional | required | type | +|:--------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------| +| Digital Object Identifier | False | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: + * 10.3399/bjgp17X692645 @@ -243,9 +249,9 @@ Documentation can include a rich text description of the dataset or links to med A free-text description of the record. -| title | required | type | -|:------------|:-----------|:------------| -| Description | False | Description | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -254,11 +260,12 @@ A free-text description of the record. Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal. -| title | required | type | -|:-----------------|:-----------|:---------------------| -| Associated Media | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Associated Media | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | Examples: + * PDF Document that describes study protocol @@ -266,11 +273,12 @@ Examples: Please complete only if the dataset is part of a group or family -| title | required | type | -|:--------|:-----------|:---------------------| -| Group | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Group | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters, hdr_schemata.definitions.HDRUK.IsPartOfEnum.IsPartOfEnum]]', 'null'] | Examples: + * Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS). @@ -287,11 +295,12 @@ This information includes attributes for geographical and temporal coverage, coh The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race. -| title | required | type | -|:--------------------|:-----------|:---------------------| -| Geographic Coverage | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Geographic Coverage | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | Examples: + * https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html @@ -299,9 +308,9 @@ Examples: Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). -| title | required | type | -|:----------|:-----------|:---------| -| Age Range | False | AgeRange | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Age Range | False | True | False | ["AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -310,11 +319,12 @@ Please indicate the age range in whole years of participants in the dataset. Ple Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide “AVAILABILITY TO BE CONFIRMED”. If samples are not available, then please provide “NOT AVAILABLE”. -| title | required | type | -|:-----------------------------|:-----------|:---------------------| -| Physical Sample Availability | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------| +| Physical Sample Availability | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List', 'null'] | Examples: + * BONE MARROW @@ -322,9 +332,9 @@ Examples: If known, what is the typical time span that a patient appears in the dataset (follow up period) -| title | required | type | -|:---------|:-----------|:---------| -| Followup | False | Followup | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------| +| Followup | False | True | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | @@ -333,9 +343,9 @@ If known, what is the typical time span that a patient appears in the dataset (f Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. -| title | required | type | -|:--------|:-----------|:------------| -| Pathway | False | Description | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Pathway | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -362,9 +372,9 @@ None Pleases indicate the purpose(s) that the dataset was collected. -| title | required | type | -|:--------|:-----------|:---------------------| -| Purpose | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Purpose | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Purpose.Purpose]', 'null'] | @@ -373,9 +383,9 @@ Pleases indicate the purpose(s) that the dataset was collected. Pleases indicate the source of the data extraction -| title | required | type | -|:--------|:-----------|:---------------------| -| Source | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Source | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Source.Source]', 'null'] | @@ -384,9 +394,9 @@ Pleases indicate the source of the data extraction Pleases indicate the setting(s) where data was collected. Multiple settings may be provided -| title | required | type | -|:--------|:-----------|:---------------------| -| Setting | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Setting | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Setting.Setting]', 'null'] | @@ -404,9 +414,9 @@ None Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/ -| title | required | type | -|:------------|:-----------|:------------| -| Periodicity | True | Periodicity | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Periodicity | False | False | True | ["Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]"] | @@ -415,9 +425,9 @@ Please indicate the frequency of distribution release. If a dataset is distribut Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020. -| title | required | type | -|:-------------|:-----------|:-------| -| Release Date | False | date | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-----------------------------| +| Release Date | False | True | False | ['date', 'datetime', 'null'] | @@ -426,9 +436,9 @@ Date of the latest release of the dataset. If this is a regular release i.e. qua The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. -| title | required | type | -|:-----------|:-----------|:-------| -| Start Date | True | date | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-----------------------------| +| Start Date | False | True | True | ['date', 'datetime', 'null'] | @@ -437,9 +447,9 @@ The start of the time period that the dataset provides coverage for. If there ar The end of the time period that the dataset provides coverage for. If the dataset is “Continuous” and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information. -| title | required | type | -|:---------|:-----------|:-------| -| End Date | False | date | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------| +| End Date | False | True | False | ['date', 'datetime', "EndDateEnum['CONTINUOUS',null]", 'null'] | @@ -448,9 +458,9 @@ The end of the time period that the dataset provides coverage for. If the datase Please indicate the typical time-lag between an event and the data for that event appearing in the dataset -| title | required | type | -|:---------|:-----------|:--------| -| Time Lag | True | TimeLag | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | False | False | True | ["TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]"] | @@ -477,9 +487,9 @@ This section includes information about how the data can be used and how it is c Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE -| title | required | type | -|:--------------------|:-----------|:---------------------| -| Data Use Limitation | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data Use Limitation | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.DataUseLimitation.DataUseLimitation]', 'null'] | @@ -488,9 +498,9 @@ Please provide an indication of consent permissions for datasets and/or material Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information. -| title | required | type | -|:----------------------|:-----------|:---------------------| -| Data Use Requirements | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data Use Requirements | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.DataUseRequirements.DataUseRequirements]', 'null'] | @@ -499,9 +509,9 @@ Please indicate fit here are any additional conditions set for use if any, multi Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided. -| title | required | type | -|:----------------------|:-----------|:-----------------| -| Citation Requirements | False | ShortDescription | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Citation Requirements | False | True | False | ["ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]', 'null'] | @@ -510,9 +520,9 @@ Please provide the text that you would like included as part of any citation tha None -| title | required | type | -|:---------------|:-----------|:---------------------| -| Investigations | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Investigations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | @@ -521,9 +531,9 @@ None Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list. -| title | required | type | -|:----------|:-----------|:-------| -| Citations | False | Doi | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Citations | False | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'str', 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Doi.Doi]]', 'null'] | @@ -541,9 +551,9 @@ This section includes information about data access None -| title | required | type | -|:--------------|:-----------|:----------------| -| Access Rights | True | LongDescription | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Rights | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -552,11 +562,12 @@ None Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset. -| title | required | type | -|:---------------|:-----------|:----------------| -| Access Service | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Service | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: + * https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide @@ -564,9 +575,9 @@ Examples: Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian. -| title | required | type | -|:---------------------------------|:-----------|:----------------| -| Organisation Access Request Cost | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Organisation Access Request Cost | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | @@ -575,9 +586,9 @@ Please provide link(s) to a webpage detailing the commercial model for processin Please provide an indication of the typical processing times based on the types of requests typically received. -| title | required | type | -|:------------------------|:-----------|:-----------------| -| Access Request Duration | False | DeliveryLeadTime | +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------| +| Access Request Duration | False | True | False | ["DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", 'null'] | @@ -586,9 +597,9 @@ Please provide an indication of the typical processing times based on the types Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored. -| title | required | type | -|:-------------|:-----------|:---------------------| -| Jurisdiction | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Jurisdiction | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Isocountrycode.Isocountrycode]', 'null'] | @@ -597,9 +608,9 @@ Please use country code from ISO 3166-1 country codes and the associated ISO 316 Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed. -| title | required | type | -|:----------------|:-----------|:----------------| -| Data Controller | True | LongDescription | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -608,9 +619,9 @@ Data Controller means a person/entity who (either alone or jointly or in common A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller. -| title | required | type | -|:---------------|:-----------|:----------------| -| Data Processor | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Processor | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -628,9 +639,9 @@ Section includes technical attributes for language vocabularies, sizes etc. and List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided. -| title | required | type | -|:----------------------|:-----------|:---------------------| -| Controlled Vocabulary | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Controlled Vocabulary | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.ControlledVocabulary.ControlledVocabulary]', 'null'] | @@ -639,9 +650,9 @@ List any relevant terminologies / ontologies / controlled vocabularies, such as List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. -| title | required | type | -|:------------|:-----------|:---------------------| -| Conforms To | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Conforms To | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.StandardisedDataModels.StandardisedDataModels]', 'null'] | @@ -650,9 +661,9 @@ List standardised data models that the dataset has been stored in or transformed This should list all the languages in which the dataset metadata and underlying data is made available. -| title | required | type | -|:---------|:-----------|:---------------------| -| Language | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Language | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Language.Language]', 'null'] | @@ -661,9 +672,9 @@ This should list all the languages in which the dataset metadata and underlying If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format. -| title | required | type | -|:--------|:-----------|:---------------------| -| Format | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Format | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Format.Format]', 'null'] | @@ -681,9 +692,9 @@ This section includes information about related datasets that may have previousl If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate “ALL” and the onboarding portal will automate linkage across the datasets submitted. -| title | required | type | -|:----------------|:-----------|:---------------------| -| Linked Datasets | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Linked Datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]]', 'null'] | @@ -692,9 +703,9 @@ If applicable, please provide the DOI of other datasets that have previously bee Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset. -| title | required | type | -|:------------|:-----------|:---------------------| -| Derivations | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Derivations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.AbstractText.AbstractText]]', 'null'] | @@ -703,9 +714,9 @@ Indicate if derived datasets or predefined extracts are available and the type o Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/ -| title | required | type | -|:--------|:-----------|:---------------------| -| Tools | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Tools | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | @@ -723,11 +734,12 @@ Multiple observations about the dataset may be provided and users are expected t Please select one of the following statistical populations for you observation -| title | required | type | -|:-----------------------|:-----------|:---------------------------------| -| Statistical Population | True | StatisticalPopulationConstrained | +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------| +| Statistical Population | False | False | True | ["StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']"] | Examples: + * PERSONS @@ -735,9 +747,9 @@ Examples: Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset. -| title | required | type | -|:---------------|:-----------|:-------| -| Measured Value | True | int | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:--------| +| Measured Value | False | False | True | ['int'] | @@ -746,9 +758,9 @@ Please provide the population size associated with the population type the datas If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type. -| title | required | type | -|:---------------------------|:-----------|:-------------| -| Disambiguating Description | False | AbstractText | +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Disambiguating Description | False | True | False | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -757,9 +769,9 @@ If SNOMED CT term does not provide sufficient detail, please provide a descripti Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations. -| title | required | type | -|:-----------------|:-----------|:-------| -| Observation Date | True | date | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:---------------------| +| Observation Date | False | False | True | ['date', 'datetime'] | @@ -768,9 +780,9 @@ Please provide the date that the observation was made. Some datasets may be cont Initially this will be defaulted to "COUNT" -| title | required | type | -|:------------------|:-----------|:-----------------| -| Measured Property | True | MeasuredProperty | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------------------| +| Measured Property | False | False | True | ['MeasuredProperty[{}]'] | @@ -779,9 +791,9 @@ Initially this will be defaulted to "COUNT" Descriptions of all tables and data elements that can be included in the dataset -| title | required | type | -|:--------------------|:-----------|:----------| -| Structural Metadata | False | DataClass | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:--------------| +| Structural Metadata | True | True | False | ['DataClass'] | diff --git a/docs/HDRUK/2.1.2.structure.json b/docs/HDRUK/2.1.2.structure.json index 617a1c6..2eaa302 100644 --- a/docs/HDRUK/2.1.2.structure.json +++ b/docs/HDRUK/2.1.2.structure.json @@ -10,7 +10,13 @@ "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" ] ], - "type": "Uuidv4", + "type": [ + "Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -21,7 +27,11 @@ "examples": [ "1.1.0" ], - "type": "Semver", + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -30,7 +40,11 @@ "title": "Dataset Revisions", "description": "Revisions of Dataset metadata", "examples": null, - "type": "Revision", + "type": [ + "Revision" + ], + "is_list": true, + "is_optional": false, "subItems": [ { "name": "version", @@ -38,7 +52,11 @@ "title": null, "description": "Semantic Version", "examples": null, - "type": "Semver", + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -47,7 +65,12 @@ "title": null, "description": "URL endpoint to obtain the version", "examples": null, - "type": "Url", + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -58,7 +81,11 @@ "title": "Creation Date", "description": "Dataset Metadata Creation Date", "examples": null, - "type": "datetime" + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "modified", @@ -66,7 +93,11 @@ "title": "Modification Date", "description": "Dataset Metadata Creation Date", "examples": null, - "type": "datetime" + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "summary", @@ -74,7 +105,11 @@ "title": "Summary", "description": "Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP.", "examples": null, - "type": "Summary", + "type": [ + "Summary" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "title", @@ -86,7 +121,11 @@ "North West London COVID-19 Patient Level Situation Report" ] ], - "type": "OneHundredFiftyCharacters", + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -97,7 +136,12 @@ "examples": [ "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." ], - "type": "AbstractText", + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -106,7 +150,11 @@ "title": "Dataset publisher", "description": "This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", "examples": null, - "type": "Organisation", + "type": [ + "Organisation" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "identifier", @@ -114,7 +162,12 @@ "title": "Organisation Identifier", "description": "Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the \u201csuggest and institute\u201d function here: https://www.grid.ac/institutes#", "examples": null, - "type": "Url", + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -123,7 +176,11 @@ "title": "Organisation Name", "description": "Name of the organisation", "examples": null, - "type": "OneHundredFiftyCharacters", + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -132,7 +189,12 @@ "title": "Organisation Logo", "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", "examples": null, - "type": "Url", + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -141,7 +203,12 @@ "title": "Organisation Description", "description": "Please provide a URL that describes the organisation.", "examples": null, - "type": "Description", + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -150,7 +217,13 @@ "title": "Organisation Contact Point", "description": "Organisation contact point(s)", "examples": null, - "type": "EmailAddress", + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.EmailAddress.EmailAddress]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -159,62 +232,12 @@ "title": "Organisation Membership", "description": "Please indicate if the organisation is an Alliance Member or a Hub.", "examples": null, - "type": "MemberOf" - }, - { - "name": "accessRights", - "required": false, - "title": "Organisation Default Access Rights", - "description": "The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both.", - "examples": null, - "type": "Url", - "subItems": [] - }, - { - "name": "deliveryLeadTime", - "required": false, - "title": "Access Request Duration", - "description": "Please provide an indication of the typical processing times based on the types of requests typically received. Note: This value will be used as default access request duration for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", - "examples": null, - "type": "DeliveryLeadTime" - }, - { - "name": "accessService", - "required": false, - "title": "Organisation Access Service", - "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", - "examples": [ - "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + "type": [ + "MemberOf['HUB','ALLIANCE','OTHER','NCS']", + "null" ], - "type": "LongDescription", - "subItems": [] - }, - { - "name": "accessRequestCost", - "required": false, - "title": "Organisation Access Request Cost", - "description": "Please provide link(s) to a webpage or a short description detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", - "examples": null, - "type": "ShortDescription", - "subItems": [] - }, - { - "name": "dataUseLimitation", - "required": false, - "title": "Data Use Limitation", - "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. Notes: where there are existing data-sharing arrangements such as the HDR UK HUB data sharing agreement or the NIHR HIC data sharing agreement this should be indicated within access rights. This value will be used as terms for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", - "examples": null, - "type": "CommaSeparatedValues", - "subItems": [] - }, - { - "name": "dataUseRequirements", - "required": false, - "title": "Data Use Requirements", - "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", - "examples": null, - "type": "CommaSeparatedValues", - "subItems": [] + "is_list": false, + "is_optional": true } ] }, @@ -226,7 +249,12 @@ "examples": [ "SAILDatabank@swansea.ac.uk" ], - "type": "EmailAddress", + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -235,7 +263,13 @@ "title": "Keywords", "description": "Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -244,7 +278,13 @@ "title": "Alternate dataset identifiers", "description": "Alternate dataset identifiers or local identifiers", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -255,7 +295,12 @@ "examples": [ "10.3399/bjgp17X692645" ], - "type": "Doi", + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -266,7 +311,12 @@ "title": "Documentation", "description": "Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media.", "examples": null, - "type": "Documentation", + "type": [ + "Documentation", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "description", @@ -274,7 +324,12 @@ "title": "Description", "description": "A free-text description of the record.", "examples": null, - "type": "Description", + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -285,7 +340,13 @@ "examples": [ "PDF Document that describes study protocol" ], - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -296,7 +357,13 @@ "examples": [ "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)." ], - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters, hdr_schemata.definitions.HDRUK.IsPartOfEnum.IsPartOfEnum]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -307,7 +374,12 @@ "title": "Coverage", "description": "This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data.", "examples": null, - "type": "Coverage", + "type": [ + "Coverage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "spatial", @@ -317,7 +389,13 @@ "examples": [ "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" ], - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -326,7 +404,12 @@ "title": "Age Range", "description": "Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", "examples": null, - "type": "AgeRange", + "type": [ + "AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -337,7 +420,13 @@ "examples": [ "BONE MARROW" ], - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -346,7 +435,12 @@ "title": "Followup", "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", "examples": null, - "type": "Followup" + "type": [ + "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "pathway", @@ -354,7 +448,12 @@ "title": "Pathway", "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", "examples": null, - "type": "Description", + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -365,7 +464,12 @@ "title": "Provenance", "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", "examples": null, - "type": "Provenance", + "type": [ + "Provenance", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "origin", @@ -373,7 +477,12 @@ "title": null, "description": null, "examples": null, - "type": "Origin", + "type": [ + "Origin", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "purpose", @@ -381,7 +490,13 @@ "title": "Purpose", "description": "Pleases indicate the purpose(s) that the dataset was collected.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Purpose.Purpose]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -390,7 +505,13 @@ "title": "Source", "description": "Pleases indicate the source of the data extraction", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Source.Source]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -399,7 +520,13 @@ "title": "Setting", "description": "Pleases indicate the setting(s) where data was collected. Multiple settings may be provided", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Setting.Setting]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -410,7 +537,11 @@ "title": null, "description": null, "examples": null, - "type": "Temporal", + "type": [ + "Temporal" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "accrualPeriodicity", @@ -418,7 +549,11 @@ "title": "Periodicity", "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", "examples": null, - "type": "Periodicity" + "type": [ + "Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]" + ], + "is_list": false, + "is_optional": false }, { "name": "distributionReleaseDate", @@ -426,7 +561,13 @@ "title": "Release Date", "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "startDate", @@ -434,7 +575,13 @@ "title": "Start Date", "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "endDate", @@ -442,7 +589,14 @@ "title": "End Date", "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "EndDateEnum['CONTINUOUS',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "timeLag", @@ -450,7 +604,11 @@ "title": "Time Lag", "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", "examples": null, - "type": "TimeLag" + "type": [ + "TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]" + ], + "is_list": false, + "is_optional": false } ] } @@ -462,7 +620,11 @@ "title": "Accessibility", "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", "examples": null, - "type": "Accessibility", + "type": [ + "Accessibility" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "usage", @@ -470,7 +632,12 @@ "title": "Usage", "description": "This section includes information about how the data can be used and how it is currently being used", "examples": null, - "type": "Usage", + "type": [ + "Usage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "dataUseLimitation", @@ -478,7 +645,13 @@ "title": "Data Use Limitation", "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.DataUseLimitation.DataUseLimitation]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -487,7 +660,13 @@ "title": "Data Use Requirements", "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.DataUseRequirements.DataUseRequirements]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -496,7 +675,13 @@ "title": "Citation Requirements", "description": "Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.", "examples": null, - "type": "ShortDescription", + "type": [ + "ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -505,7 +690,13 @@ "title": "Investigations", "description": null, "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -514,7 +705,14 @@ "title": "Citations", "description": "Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list.", "examples": null, - "type": "Doi", + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "str", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Doi.Doi]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -525,7 +723,11 @@ "title": "Access", "description": "This section includes information about data access", "examples": null, - "type": "Access", + "type": [ + "Access" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "accessRights", @@ -533,7 +735,12 @@ "title": "Access Rights", "description": null, "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -544,7 +751,12 @@ "examples": [ "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" ], - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -553,7 +765,13 @@ "title": "Organisation Access Request Cost", "description": "Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -562,7 +780,12 @@ "title": "Access Request Duration", "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", "examples": null, - "type": "DeliveryLeadTime" + "type": [ + "DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "jurisdiction", @@ -570,7 +793,13 @@ "title": "Jurisdiction", "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Isocountrycode.Isocountrycode]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -579,7 +808,12 @@ "title": "Data Controller", "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -588,7 +822,12 @@ "title": "Data Processor", "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -599,7 +838,12 @@ "title": "Format and Standards", "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", "examples": null, - "type": "FormatAndStandards", + "type": [ + "FormatAndStandards", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "vocabularyEncodingScheme", @@ -607,7 +851,13 @@ "title": "Controlled Vocabulary", "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.ControlledVocabulary.ControlledVocabulary]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -616,7 +866,13 @@ "title": "Conforms To", "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.StandardisedDataModels.StandardisedDataModels]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -625,7 +881,13 @@ "title": "Language", "description": "This should list all the languages in which the dataset metadata and underlying data is made available.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Language.Language]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -634,7 +896,13 @@ "title": "Format", "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Format.Format]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -647,7 +915,12 @@ "title": "Enrichment and Linkage", "description": "This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", "examples": null, - "type": "EnrichmentAndLinkage", + "type": [ + "EnrichmentAndLinkage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "qualifiedRelation", @@ -655,7 +928,13 @@ "title": "Linked Datasets", "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate \u201cALL\u201d and the onboarding portal will automate linkage across the datasets submitted.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -664,7 +943,13 @@ "title": "Derivations", "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.AbstractText.AbstractText]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -673,7 +958,13 @@ "title": "Tools", "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -684,7 +975,11 @@ "title": "Observations", "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d", "examples": null, - "type": "Observation", + "type": [ + "Observation" + ], + "is_list": true, + "is_optional": false, "subItems": [ { "name": "observedNode", @@ -694,7 +989,11 @@ "examples": [ "PERSONS" ], - "type": "StatisticalPopulationConstrained" + "type": [ + "StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']" + ], + "is_list": false, + "is_optional": false }, { "name": "measuredValue", @@ -702,7 +1001,11 @@ "title": "Measured Value", "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", "examples": null, - "type": "int" + "type": [ + "int" + ], + "is_list": false, + "is_optional": false }, { "name": "disambiguatingDescription", @@ -710,7 +1013,12 @@ "title": "Disambiguating Description", "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", "examples": null, - "type": "AbstractText", + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -719,7 +1027,12 @@ "title": "Observation Date", "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "measuredProperty", @@ -727,7 +1040,11 @@ "title": "Measured Property", "description": "Initially this will be defaulted to \"COUNT\"", "examples": null, - "type": "MeasuredProperty", + "type": [ + "MeasuredProperty[{}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] } ] @@ -738,6 +1055,10 @@ "title": "Structural Metadata", "description": "Descriptions of all tables and data elements that can be included in the dataset", "examples": null, - "type": "DataClass" + "type": [ + "DataClass" + ], + "is_list": true, + "is_optional": true } ] \ No newline at end of file diff --git a/docs/HDRUK/2.1.3.md b/docs/HDRUK/2.1.3.md new file mode 100644 index 0000000..d0315a0 --- /dev/null +++ b/docs/HDRUK/2.1.3.md @@ -0,0 +1,799 @@ + +## identifier + +System dataset identifier + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset identifier | False | True | True | ["Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * ['226fb3f1-4471-400a-8c39-2b66d46a39b6', 'https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6'] + + +## version + +Dataset metadata version + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| Dataset Version | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | + +Examples: + + * 1.1.0 + + +## revisions + +Revisions of Dataset metadata + + + + + + +### version + +Semantic Version + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | + + + + +### url + +URL endpoint to obtain the version + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| | False | True | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +## issued + +Dataset Metadata Creation Date + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-------------| +| Creation Date | False | False | True | ['datetime'] | + + + + +## modified + +Dataset Metadata Creation Date + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------| +| Modification Date | False | False | True | ['datetime'] | + + + + +## summary + +Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP. + + + + + + +### title + +Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Title | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | + +Examples: + + * ['North West London COVID-19 Patient Level Situation Report'] + + +### abstract + +Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Dataset Abstract | False | True | True | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web® including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice. + + +### publisher + +This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank. + + + + + + +#### identifier + +Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the “suggest and institute” function here: https://www.grid.ac/institutes# + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Organisation Identifier | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### name + +Name of the organisation + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Organisation Name | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | + + + + +#### logo + +Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Organisation Logo | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### description + +Please provide a URL that describes the organisation. + +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Organisation Description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### contactPoint + +Organisation contact point(s) + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Organisation Contact Point | False | True | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.EmailAddress.EmailAddress]]', 'null'] | + + + + +#### memberOf + +Please indicate if the organisation is an Alliance Member or a Hub. + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-----------------------------------------------------| +| Organisation Membership | False | True | False | ["MemberOf['HUB','ALLIANCE','OTHER','NCS']", 'null'] | + + + + +### contactPoint + +Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose. + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------| +| Contact Point | False | True | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * SAILDatabank@swansea.ac.uk + + +### keywords + +Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]', 'null'] | + + + + +### alternateIdentifiers + +Alternate dataset identifiers or local identifiers + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Alternate dataset identifiers | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]', 'null'] | + + + + +### doiName + +All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI. + +| title | is_list | is_optional | required | type | +|:--------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------| +| Digital Object Identifier | False | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * 10.3399/bjgp17X692645 + + +## documentation + +Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media. + + + + + + +### description + +A free-text description of the record. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### associatedMedia + +Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Associated Media | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | + +Examples: + + * PDF Document that describes study protocol + + +### isPartOf + +Please complete only if the dataset is part of a group or family + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Group | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters, hdr_schemata.definitions.HDRUK.IsPartOfEnum.IsPartOfEnum]]', 'null'] | + +Examples: + + * Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS). + + +## coverage + +This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data. + + + + + + +### spatial + +The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race. + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Geographic Coverage | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | + +Examples: + + * https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html + + +### typicalAgeRange + +Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Age Range | False | True | False | ["AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### physicalSampleAvailability + +Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide “AVAILABILITY TO BE CONFIRMED”. If samples are not available, then please provide “NOT AVAILABLE”. + +| title | is_list | is_optional | required | type | +|:-----------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------| +| Physical Sample Availability | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List', 'null'] | + +Examples: + + * BONE MARROW + + +### followup + +If known, what is the typical time span that a patient appears in the dataset (follow up period) + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------| +| Followup | False | True | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | + + + + +### pathway + +Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Pathway | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +## provenance + +Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness. + + + + + + +### origin + +None + + + + + + +#### purpose + +Pleases indicate the purpose(s) that the dataset was collected. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Purpose | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Purpose.Purpose]', 'null'] | + + + + +#### source + +Pleases indicate the source of the data extraction + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Source | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Source.Source]', 'null'] | + + + + +#### collectionSituation + +Pleases indicate the setting(s) where data was collected. Multiple settings may be provided + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Setting | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Setting.Setting]', 'null'] | + + + + +### temporal + +None + + + + + + +#### distributionReleaseDate + +Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-----------------------------| +| Release Date | False | True | False | ['date', 'datetime', 'null'] | + + + + +#### startDate + +The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-----------------------------| +| Start Date | False | True | True | ['date', 'datetime', 'null'] | + + + + +#### endDate + +The end of the time period that the dataset provides coverage for. If the dataset is “Continuous” and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------| +| End Date | False | True | False | ['date', 'datetime', "EndDateEnum['CONTINUOUS',null]", 'null'] | + + + + +#### timeLag + +Please indicate the typical time-lag between an event and the data for that event appearing in the dataset + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | False | False | True | ["TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]"] | + + + + +#### publishingFrequency + +Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/ + +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Publishing Frequency | False | False | True | ["Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]"] | + + + + +## accessibility + +Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets. + + + + + + +### usage + +This section includes information about how the data can be used and how it is currently being used + + + + + + +#### dataUseLimitation + +Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data Use Limitation | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.DataUseLimitation.DataUseLimitation]', 'null'] | + + + + +#### dataUseRequirements + +Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information. + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data Use Requirements | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.DataUseRequirements.DataUseRequirements]', 'null'] | + + + + +#### resourceCreator + +Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided. + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Citation Requirements | False | True | False | ["ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]', 'null'] | + + + + +#### investigations + +None + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Investigations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | + + + + +#### isReferencedBy + +Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list. + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Citations | False | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'str', 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Doi.Doi]]', 'null'] | + + + + +### access + +This section includes information about data access + + + + + + +#### accessRights + +None + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Rights | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### accessService + +Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Service | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide + + +#### accessRequestCost + +Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian. + +| title | is_list | is_optional | required | type | +|:---------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Organisation Access Request Cost | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | + + + + +#### deliveryLeadTime + +Please provide an indication of the typical processing times based on the types of requests typically received. + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------| +| Access Request Duration | False | True | False | ["DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", 'null'] | + + + + +#### jurisdiction + +Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Jurisdiction | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Isocountrycode.Isocountrycode]', 'null'] | + + + + +#### dataController + +Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataProcessor + +A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Processor | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### formatAndStandards + +Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset. + + + + + + +#### vocabularyEncodingScheme + +List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided. + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Controlled Vocabulary | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.ControlledVocabulary.ControlledVocabulary]', 'null'] | + + + + +#### conformsTo + +List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Conforms To | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.StandardisedDataModels.StandardisedDataModels]', 'null'] | + + + + +#### language + +This should list all the languages in which the dataset metadata and underlying data is made available. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Language | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Language.Language]', 'null'] | + + + + +#### format + +If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Format | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Format.Format]', 'null'] | + + + + +## enrichmentAndLinkage + +This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers. + + + + + + +### qualifiedRelation + +If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate “ALL” and the onboarding portal will automate linkage across the datasets submitted. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Linked Datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]]', 'null'] | + + + + +### derivation + +Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Derivations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.AbstractText.AbstractText]]', 'null'] | + + + + +### tools + +Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/ + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Tools | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | + + + + +## observations + +Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: “2017” + + + + + + +### observedNode + +Please select one of the following statistical populations for you observation + +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------| +| Statistical Population | False | False | True | ["StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']"] | + +Examples: + + * PERSONS + + +### measuredValue + +Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:--------| +| Measured Value | False | False | True | ['int'] | + + + + +### disambiguatingDescription + +If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type. + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Disambiguating Description | False | True | False | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### observationDate + +Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:---------------------| +| Observation Date | False | False | True | ['date', 'datetime'] | + + + + +### measuredProperty + +Initially this will be defaulted to "COUNT" + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------------------| +| Measured Property | False | False | True | ['MeasuredProperty[{}]'] | + + + + +## structuralMetadata + +Descriptions of all tables and data elements that can be included in the dataset + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:--------------| +| Structural Metadata | True | True | False | ['DataClass'] | + + + diff --git a/docs/HDRUK/2.1.3.structure.json b/docs/HDRUK/2.1.3.structure.json new file mode 100644 index 0000000..e053d45 --- /dev/null +++ b/docs/HDRUK/2.1.3.structure.json @@ -0,0 +1,1064 @@ +[ + { + "name": "identifier", + "required": true, + "title": "Dataset identifier", + "description": "System dataset identifier", + "examples": [ + [ + "226fb3f1-4471-400a-8c39-2b66d46a39b6", + "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" + ] + ], + "type": [ + "Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "version", + "required": true, + "title": "Dataset Version", + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "revisions", + "required": true, + "title": "Dataset Revisions", + "description": "Revisions of Dataset metadata", + "examples": null, + "type": [ + "Revision" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "version", + "required": true, + "title": null, + "description": "Semantic Version", + "examples": null, + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "url", + "required": true, + "title": null, + "description": "URL endpoint to obtain the version", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "issued", + "required": true, + "title": "Creation Date", + "description": "Dataset Metadata Creation Date", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "modified", + "required": true, + "title": "Modification Date", + "description": "Dataset Metadata Creation Date", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "summary", + "required": true, + "title": "Summary", + "description": "Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP.", + "examples": null, + "type": [ + "Summary" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "title", + "required": true, + "title": "Title", + "description": "Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers.", + "examples": [ + [ + "North West London COVID-19 Patient Level Situation Report" + ] + ], + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "abstract", + "required": true, + "title": "Dataset Abstract", + "description": "Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible", + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "publisher", + "required": true, + "title": "Dataset publisher", + "description": "This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", + "examples": null, + "type": [ + "Organisation" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "identifier", + "required": false, + "title": "Organisation Identifier", + "description": "Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the \u201csuggest and institute\u201d function here: https://www.grid.ac/institutes#", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "name", + "required": true, + "title": "Organisation Name", + "description": "Name of the organisation", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "logo", + "required": false, + "title": "Organisation Logo", + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "description", + "required": false, + "title": "Organisation Description", + "description": "Please provide a URL that describes the organisation.", + "examples": null, + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "contactPoint", + "required": true, + "title": "Organisation Contact Point", + "description": "Organisation contact point(s)", + "examples": null, + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.EmailAddress.EmailAddress]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "memberOf", + "required": false, + "title": "Organisation Membership", + "description": "Please indicate if the organisation is an Alliance Member or a Hub.", + "examples": null, + "type": [ + "MemberOf['HUB','ALLIANCE','OTHER','NCS']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "contactPoint", + "required": true, + "title": "Contact Point", + "description": "Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose.", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "keywords", + "required": true, + "title": "Keywords", + "description": "Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "alternateIdentifiers", + "required": false, + "title": "Alternate dataset identifiers", + "description": "Alternate dataset identifiers or local identifiers", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "doiName", + "required": false, + "title": "Digital Object Identifier", + "description": "All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI.", + "examples": [ + "10.3399/bjgp17X692645" + ], + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "documentation", + "required": false, + "title": "Documentation", + "description": "Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media.", + "examples": null, + "type": [ + "Documentation", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "description", + "required": false, + "title": "Description", + "description": "A free-text description of the record.", + "examples": null, + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "associatedMedia", + "required": false, + "title": "Associated Media", + "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "examples": [ + "PDF Document that describes study protocol" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isPartOf", + "required": false, + "title": "Group", + "description": "Please complete only if the dataset is part of a group or family", + "examples": [ + "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)." + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters, hdr_schemata.definitions.HDRUK.IsPartOfEnum.IsPartOfEnum]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "coverage", + "required": false, + "title": "Coverage", + "description": "This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data.", + "examples": null, + "type": [ + "Coverage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "spatial", + "required": false, + "title": "Geographic Coverage", + "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", + "examples": [ + "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "typicalAgeRange", + "required": false, + "title": "Age Range", + "description": "Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "examples": null, + "type": [ + "AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "physicalSampleAvailability", + "required": false, + "title": "Physical Sample Availability", + "description": "Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide \u201cAVAILABILITY TO BE CONFIRMED\u201d. If samples are not available, then please provide \u201cNOT AVAILABLE\u201d.", + "examples": [ + "BONE MARROW" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "followup", + "required": false, + "title": "Followup", + "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", + "examples": null, + "type": [ + "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "pathway", + "required": false, + "title": "Pathway", + "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "examples": null, + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "provenance", + "required": false, + "title": "Provenance", + "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", + "examples": null, + "type": [ + "Provenance", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "origin", + "required": false, + "title": null, + "description": null, + "examples": null, + "type": [ + "Origin", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "purpose", + "required": false, + "title": "Purpose", + "description": "Pleases indicate the purpose(s) that the dataset was collected.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Purpose.Purpose]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "source", + "required": false, + "title": "Source", + "description": "Pleases indicate the source of the data extraction", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Source.Source]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "collectionSituation", + "required": false, + "title": "Setting", + "description": "Pleases indicate the setting(s) where data was collected. Multiple settings may be provided", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Setting.Setting]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "temporal", + "required": true, + "title": null, + "description": null, + "examples": null, + "type": [ + "Temporal" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "distributionReleaseDate", + "required": false, + "title": "Release Date", + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "startDate", + "required": true, + "title": "Start Date", + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "endDate", + "required": false, + "title": "End Date", + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", + "examples": null, + "type": [ + "date", + "datetime", + "EndDateEnum['CONTINUOUS',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "timeLag", + "required": true, + "title": "Time Lag", + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "examples": null, + "type": [ + "TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "publishingFrequency", + "required": true, + "title": "Publishing Frequency", + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "examples": null, + "type": [ + "Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]" + ], + "is_list": false, + "is_optional": false + } + ] + } + ] + }, + { + "name": "accessibility", + "required": true, + "title": "Accessibility", + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", + "examples": null, + "type": [ + "Accessibility" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "usage", + "required": false, + "title": "Usage", + "description": "This section includes information about how the data can be used and how it is currently being used", + "examples": null, + "type": [ + "Usage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "dataUseLimitation", + "required": false, + "title": "Data Use Limitation", + "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.DataUseLimitation.DataUseLimitation]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataUseRequirements", + "required": false, + "title": "Data Use Requirements", + "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.DataUseRequirements.DataUseRequirements]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "resourceCreator", + "required": false, + "title": "Citation Requirements", + "description": "Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.", + "examples": null, + "type": [ + "ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "investigations", + "required": false, + "title": "Investigations", + "description": null, + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isReferencedBy", + "required": false, + "title": "Citations", + "description": "Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list.", + "examples": null, + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "str", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Doi.Doi]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "access", + "required": true, + "title": "Access", + "description": "This section includes information about data access", + "examples": null, + "type": [ + "Access" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "accessRights", + "required": true, + "title": "Access Rights", + "description": null, + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessService", + "required": false, + "title": "Access Service", + "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "examples": [ + "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + ], + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessRequestCost", + "required": false, + "title": "Organisation Access Request Cost", + "description": "Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "deliveryLeadTime", + "required": false, + "title": "Access Request Duration", + "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", + "examples": null, + "type": [ + "DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "jurisdiction", + "required": true, + "title": "Jurisdiction", + "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Isocountrycode.Isocountrycode]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataController", + "required": true, + "title": "Data Controller", + "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataProcessor", + "required": false, + "title": "Data Processor", + "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "formatAndStandards", + "required": false, + "title": "Format and Standards", + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "examples": null, + "type": [ + "FormatAndStandards", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "vocabularyEncodingScheme", + "required": true, + "title": "Controlled Vocabulary", + "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.ControlledVocabulary.ControlledVocabulary]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "conformsTo", + "required": true, + "title": "Conforms To", + "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.StandardisedDataModels.StandardisedDataModels]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "language", + "required": true, + "title": "Language", + "description": "This should list all the languages in which the dataset metadata and underlying data is made available.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Language.Language]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "format", + "required": true, + "title": "Format", + "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Format.Format]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + } + ] + }, + { + "name": "enrichmentAndLinkage", + "required": false, + "title": "Enrichment and Linkage", + "description": "This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", + "examples": null, + "type": [ + "EnrichmentAndLinkage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "qualifiedRelation", + "required": false, + "title": "Linked Datasets", + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate \u201cALL\u201d and the onboarding portal will automate linkage across the datasets submitted.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "derivation", + "required": false, + "title": "Derivations", + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.AbstractText.AbstractText]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "tools", + "required": false, + "title": "Tools", + "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "observations", + "required": true, + "title": "Observations", + "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d", + "examples": null, + "type": [ + "Observation" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "observedNode", + "required": true, + "title": "Statistical Population", + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "type": [ + "StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredValue", + "required": true, + "title": "Measured Value", + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "disambiguatingDescription", + "required": false, + "title": "Disambiguating Description", + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "examples": null, + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "observationDate", + "required": true, + "title": "Observation Date", + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "examples": null, + "type": [ + "date", + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredProperty", + "required": true, + "title": "Measured Property", + "description": "Initially this will be defaulted to \"COUNT\"", + "examples": null, + "type": [ + "MeasuredProperty[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + } + ] + }, + { + "name": "structuralMetadata", + "required": false, + "title": "Structural Metadata", + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "examples": null, + "type": [ + "DataClass" + ], + "is_list": true, + "is_optional": true + } +] \ No newline at end of file diff --git a/docs/HDRUK/2.2.0.md b/docs/HDRUK/2.2.0.md index 9e797ab..cc70de0 100644 --- a/docs/HDRUK/2.2.0.md +++ b/docs/HDRUK/2.2.0.md @@ -3,9 +3,9 @@ System dataset identifier -| title | required | type | -|:-------------------|:-----------|:-------| -| Dataset identifier | True | Uuidv4 | +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset identifier | False | True | True | ["Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -16,9 +16,9 @@ Examples: Dataset metadata version -| title | required | type | -|:----------------|:-----------|:-------| -| Dataset Version | True | Semver | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| Dataset Version | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | Examples: @@ -38,9 +38,9 @@ Revisions of Dataset metadata Semantic Version -| title | required | type | -|:--------|:-----------|:-------| -| | True | Semver | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | @@ -49,9 +49,9 @@ Semantic Version URL endpoint to obtain the version -| title | required | type | -|:--------|:-----------|:-------| -| | True | Url | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| | False | True | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -60,9 +60,9 @@ URL endpoint to obtain the version Dataset Metadata Creation Date -| title | required | type | -|:--------------|:-----------|:---------| -| Creation Date | True | datetime | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-------------| +| Creation Date | False | False | True | ['datetime'] | @@ -71,9 +71,9 @@ Dataset Metadata Creation Date Dataset Metadata Creation Date -| title | required | type | -|:------------------|:-----------|:---------| -| Modification Date | True | datetime | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------| +| Modification Date | False | False | True | ['datetime'] | @@ -91,9 +91,9 @@ Summary metadata must be completed by Data Custodians onboarding metadata into t Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers. -| title | required | type | -|:--------|:-----------|:--------------------------| -| Title | True | OneHundredFiftyCharacters | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Title | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | Examples: @@ -104,9 +104,9 @@ Examples: Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible -| title | required | type | -|:-----------------|:-----------|:-------------| -| Dataset Abstract | True | AbstractText | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Dataset Abstract | False | True | True | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -126,9 +126,9 @@ This is the organisation responsible for running or supporting the data access r Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the “suggest and institute” function here: https://www.grid.ac/institutes# -| title | required | type | -|:------------------------|:-----------|:-------| -| Organisation Identifier | False | Url | +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Organisation Identifier | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -137,9 +137,9 @@ Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for you Name of the organisation -| title | required | type | -|:------------------|:-----------|:--------------------------| -| Organisation Name | True | OneHundredFiftyCharacters | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Organisation Name | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | @@ -148,9 +148,9 @@ Name of the organisation Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. -| title | required | type | -|:------------------|:-----------|:-------| -| Organisation Logo | False | Url | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Organisation Logo | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -159,9 +159,9 @@ Please provide a logo associated with the Gateway Organisation using a valid URL Please provide a URL that describes the organisation. -| title | required | type | -|:-------------------------|:-----------|:------------| -| Organisation Description | False | Description | +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Organisation Description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -170,9 +170,9 @@ Please provide a URL that describes the organisation. Organisation contact point(s) -| title | required | type | -|:---------------------------|:-----------|:-------------| -| Organisation Contact Point | True | EmailAddress | +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Organisation Contact Point | False | True | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.EmailAddress.EmailAddress]]', 'null'] | @@ -181,9 +181,9 @@ Organisation contact point(s) Please indicate if the organisation is an Alliance Member or a Hub. -| title | required | type | -|:------------------------|:-----------|:---------| -| Organisation Membership | False | MemberOf | +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-----------------------------------------------------| +| Organisation Membership | False | True | False | ["MemberOf['HUB','ALLIANCE','OTHER','NCS']", 'null'] | @@ -192,9 +192,9 @@ Please indicate if the organisation is an Alliance Member or a Hub. Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose. -| title | required | type | -|:--------------|:-----------|:-------------| -| Contact Point | True | EmailAddress | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------| +| Contact Point | False | True | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -205,9 +205,9 @@ Examples: Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users. -| title | required | type | -|:---------|:-----------|:---------------------| -| Keywords | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]', 'null'] | @@ -216,9 +216,9 @@ Please provide relevant and specific keywords that can improve the SEO of your d Alternate dataset identifiers or local identifiers -| title | required | type | -|:------------------------------|:-----------|:---------------------| -| Alternate dataset identifiers | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Alternate dataset identifiers | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]', 'null'] | @@ -227,9 +227,9 @@ Alternate dataset identifiers or local identifiers All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI. -| title | required | type | -|:--------------------------|:-----------|:-------| -| Digital Object Identifier | False | Doi | +| title | is_list | is_optional | required | type | +|:--------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------| +| Digital Object Identifier | False | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -240,9 +240,9 @@ Examples: Placeholder for dataset type -| title | required | type | -|:-----------|:-----------|:------------| -| Datasetype | True | DatasetType | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Datasetype | False | True | True | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -253,9 +253,9 @@ Examples: Placeholder for dataset sub-type -| title | required | type | -|:-----------|:-----------|:------------| -| Datasetype | True | DatasetType | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Datasetype | False | True | True | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -266,9 +266,9 @@ Examples: Summary population size of the cohort -| title | required | type | -|:----------------|:-----------|:-------| -| Population size | True | int | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------| +| Population size | False | True | True | ['int', 'null'] | @@ -286,9 +286,9 @@ Documentation can include a rich text description of the dataset or links to med A free-text description of the record. -| title | required | type | -|:------------|:-----------|:------------| -| Description | False | Description | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -297,9 +297,9 @@ A free-text description of the record. Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal. -| title | required | type | -|:-----------------|:-----------|:---------------------| -| Associated Media | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Associated Media | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | Examples: @@ -310,9 +310,9 @@ Examples: Please complete only if the dataset is part of a group or family -| title | required | type | -|:--------|:-----------|:---------------------| -| Group | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Group | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters, hdr_schemata.definitions.HDRUK.IsPartOfEnum.IsPartOfEnum]]', 'null'] | Examples: @@ -332,9 +332,9 @@ Observational, Spatial and Temporal coverage List of countries where the data was taken from -| title | required | type | -|:--------|:-----------|:---------------------| -| Spatial | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Spatial | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -343,9 +343,9 @@ List of countries where the data was taken from Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. -| title | required | type | -|:--------|:-----------|:----------------| -| Pathway | False | LongDescription | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Pathway | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -354,9 +354,9 @@ Long description of the clinical/diagnostic/treatment pathway if applicable. Thi What is the typical time span that a patient appears in the dataset (follow up period) -| title | required | type | -|:---------|:-----------|:---------| -| Followup | False | Followup | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------| +| Followup | False | True | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | @@ -365,9 +365,9 @@ What is the typical time span that a patient appears in the dataset (follow up p Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). -| title | required | type | -|:------------------|:-----------|:---------| -| Typical Age Range | False | AgeRange | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Typical Age Range | False | True | False | ["AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -376,9 +376,9 @@ Age range in whole years of participants in the dataset. Please provide range in Male, Female, Other -| title | required | type | -|:--------|:-----------|:-----------| -| Gender | False | GenderType | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------| +| Gender | True | True | False | ["GenderType['Male','Female','Other']"] | @@ -387,9 +387,9 @@ Male, Female, Other Blood, Saliva, Urine, Other -| title | required | type | -|:-------------------|:-----------|:---------------------| -| Biological Samples | False | BiologicalSampleType | +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:-----------------------------------------------------------| +| Biological Samples | True | True | False | ["BiologicalSampleType['Blood','Other','Urine','Saliva']"] | @@ -398,9 +398,9 @@ Blood, Saliva, Urine, Other Mental health, Cognitive function -| title | required | type | -|:--------------|:-----------|:------------------| -| Psychological | False | PsychologicalType | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:------------------------------------------------------------| +| Psychological | True | True | False | ["PsychologicalType['Cognitive Function','Mental Health']"] | @@ -409,9 +409,9 @@ Mental health, Cognitive function Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive -| title | required | type | -|:---------|:-----------|:-------------| -| Physical | False | PhysicalType | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------| +| Physical | True | True | False | ["PhysicalType['Respiratory','Vision','Hearing','Musculoskeletal','Cardiovascular','Reproductive']"] | @@ -420,9 +420,9 @@ Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive Height, Weight, Waist circumference, Hip circumference, Blood pressure -| title | required | type | -|:---------------|:-----------|:-------------------| -| Anthropometric | False | AnthropometricType | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------| +| Anthropometric | True | True | False | ["AnthropometricType['Blood Pressure','Hip Circumference','Height','Waist Circumference','Weight']"] | @@ -431,9 +431,9 @@ Height, Weight, Waist circumference, Hip circumference, Blood pressure Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol -| title | required | type | -|:----------|:-----------|:---------------| -| Lifestyle | False | LifestylesType | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------| +| Lifestyle | True | True | False | ["LifestylesType['Smoking','Dietary Habits','Physical Activity','Alcohol']"] | @@ -442,9 +442,9 @@ Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support -| title | required | type | -|:---------------|:-----------|:------------------| -| Socio-economic | False | SocioEconomicType | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------| +| Socio-economic | True | True | False | ["SocioEconomicType['Finances','Family Circumstances','Housing','Education','Marital Status','Occupation','Ethnic Group','Social Support']"] | @@ -471,9 +471,9 @@ None Pleases indicate the purpose(s) that the dataset was collected. -| title | required | type | -|:--------|:-----------|:---------------------| -| Purpose | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Purpose | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Purpose.Purpose]', 'null'] | @@ -482,9 +482,9 @@ Pleases indicate the purpose(s) that the dataset was collected. Pleases indicate the source of the data extraction -| title | required | type | -|:--------|:-----------|:---------------------| -| Source | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Source | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Source.Source]', 'null'] | @@ -493,9 +493,9 @@ Pleases indicate the source of the data extraction Pleases indicate the setting(s) where data was collected. Multiple settings may be provided -| title | required | type | -|:--------|:-----------|:---------------------| -| Setting | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Setting | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Setting.Setting]', 'null'] | @@ -513,9 +513,9 @@ None Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020. -| title | required | type | -|:-------------|:-----------|:-------| -| Release Date | False | date | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-----------------------------| +| Release Date | False | True | False | ['date', 'datetime', 'null'] | @@ -524,9 +524,9 @@ Date of the latest release of the dataset. If this is a regular release i.e. qua The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. -| title | required | type | -|:-----------|:-----------|:-------| -| Start Date | True | date | +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-----------------------------| +| Start Date | False | True | True | ['date', 'datetime', 'null'] | @@ -535,9 +535,9 @@ The start of the time period that the dataset provides coverage for. If there ar The end of the time period that the dataset provides coverage for. If the dataset is “Continuous” and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information. -| title | required | type | -|:---------|:-----------|:-------| -| End Date | False | date | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------| +| End Date | False | True | False | ['date', 'datetime', "EndDateEnum['CONTINUOUS',null]", 'null'] | @@ -546,9 +546,9 @@ The end of the time period that the dataset provides coverage for. If the datase Please indicate the typical time-lag between an event and the data for that event appearing in the dataset -| title | required | type | -|:---------|:-----------|:--------| -| Time Lag | True | TimeLag | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | False | False | True | ["TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]"] | @@ -557,9 +557,9 @@ Please indicate the typical time-lag between an event and the data for that even Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/ -| title | required | type | -|:---------------------|:-----------|:------------| -| Publishing Frequency | True | Periodicity | +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Publishing Frequency | False | False | True | ["Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]"] | @@ -586,9 +586,9 @@ This section includes information about how the data can be used and how it is c Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE -| title | required | type | -|:--------------------|:-----------|:---------------------| -| Data Use Limitation | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data Use Limitation | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.DataUseLimitation.DataUseLimitation]', 'null'] | @@ -597,9 +597,9 @@ Please provide an indication of consent permissions for datasets and/or material Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information. -| title | required | type | -|:----------------------|:-----------|:---------------------| -| Data Use Requirements | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data Use Requirements | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.DataUseRequirements.DataUseRequirements]', 'null'] | @@ -608,9 +608,9 @@ Please indicate fit here are any additional conditions set for use if any, multi Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided. -| title | required | type | -|:----------------------|:-----------|:-----------------| -| Citation Requirements | False | ShortDescription | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Citation Requirements | False | True | False | ["ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]', 'null'] | @@ -619,9 +619,9 @@ Please provide the text that you would like included as part of any citation tha None -| title | required | type | -|:---------------|:-----------|:---------------------| -| Investigations | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Investigations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | @@ -630,9 +630,9 @@ None Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list. -| title | required | type | -|:----------|:-----------|:-------| -| Citations | False | Doi | +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Citations | False | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'str', 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Doi.Doi]]', 'null'] | @@ -650,9 +650,9 @@ This section includes information about data access None -| title | required | type | -|:--------------|:-----------|:----------------| -| Access Rights | True | LongDescription | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Rights | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -661,9 +661,9 @@ None Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset. -| title | required | type | -|:---------------|:-----------|:----------------| -| Access Service | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Service | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | Examples: @@ -674,9 +674,9 @@ Examples: Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian. -| title | required | type | -|:---------------------------------|:-----------|:----------------| -| Organisation Access Request Cost | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Organisation Access Request Cost | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | @@ -685,9 +685,9 @@ Please provide link(s) to a webpage detailing the commercial model for processin Please provide an indication of the typical processing times based on the types of requests typically received. -| title | required | type | -|:------------------------|:-----------|:-----------------| -| Access Request Duration | False | DeliveryLeadTime | +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------| +| Access Request Duration | False | True | False | ["DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", 'null'] | @@ -696,9 +696,9 @@ Please provide an indication of the typical processing times based on the types Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored. -| title | required | type | -|:-------------|:-----------|:---------------------| -| Jurisdiction | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Jurisdiction | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Isocountrycode.Isocountrycode]', 'null'] | @@ -707,9 +707,9 @@ Please use country code from ISO 3166-1 country codes and the associated ISO 316 Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed. -| title | required | type | -|:----------------|:-----------|:----------------| -| Data Controller | True | LongDescription | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -718,9 +718,9 @@ Data Controller means a person/entity who (either alone or jointly or in common A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller. -| title | required | type | -|:---------------|:-----------|:----------------| -| Data Processor | False | LongDescription | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Processor | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -738,9 +738,9 @@ Section includes technical attributes for language vocabularies, sizes etc. and List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided. -| title | required | type | -|:----------------------|:-----------|:---------------------| -| Controlled Vocabulary | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Controlled Vocabulary | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.ControlledVocabulary.ControlledVocabulary]', 'null'] | @@ -749,9 +749,9 @@ List any relevant terminologies / ontologies / controlled vocabularies, such as List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use “other” and contact support desk to ask for an addition. -| title | required | type | -|:------------|:-----------|:---------------------| -| Conforms To | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Conforms To | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.StandardisedDataModels.StandardisedDataModels]', 'null'] | @@ -760,9 +760,9 @@ List standardised data models that the dataset has been stored in or transformed This should list all the languages in which the dataset metadata and underlying data is made available. -| title | required | type | -|:---------|:-----------|:---------------------| -| Language | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Language | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Language.Language]', 'null'] | @@ -771,9 +771,9 @@ This should list all the languages in which the dataset metadata and underlying If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format. -| title | required | type | -|:--------|:-----------|:---------------------| -| Format | True | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Format | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Format.Format]', 'null'] | @@ -791,9 +791,9 @@ This section includes information about related datasets that may have previousl If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate “ALL” and the onboarding portal will automate linkage across the datasets submitted. -| title | required | type | -|:----------------|:-----------|:---------------------| -| Linked Datasets | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Linked Datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]]', 'null'] | @@ -802,9 +802,9 @@ If applicable, please provide the DOI of other datasets that have previously bee Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset. -| title | required | type | -|:------------|:-----------|:---------------------| -| Derivations | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Derivations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.AbstractText.AbstractText]]', 'null'] | @@ -813,9 +813,9 @@ Indicate if derived datasets or predefined extracts are available and the type o Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/ -| title | required | type | -|:--------|:-----------|:---------------------| -| Tools | False | CommaSeparatedValues | +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Tools | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | @@ -833,9 +833,9 @@ Multiple observations about the dataset may be provided and users are expected t Please select one of the following statistical populations for you observation -| title | required | type | -|:-----------------------|:-----------|:---------------------------------| -| Statistical Population | True | StatisticalPopulationConstrained | +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------| +| Statistical Population | False | False | True | ["StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']"] | Examples: @@ -846,9 +846,9 @@ Examples: Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset. -| title | required | type | -|:---------------|:-----------|:-------| -| Measured Value | True | int | +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:--------| +| Measured Value | False | False | True | ['int'] | @@ -857,9 +857,9 @@ Please provide the population size associated with the population type the datas If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type. -| title | required | type | -|:---------------------------|:-----------|:-------------| -| Disambiguating Description | False | AbstractText | +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Disambiguating Description | False | True | False | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | @@ -868,9 +868,9 @@ If SNOMED CT term does not provide sufficient detail, please provide a descripti Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations. -| title | required | type | -|:-----------------|:-----------|:-------| -| Observation Date | True | date | +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:---------------------| +| Observation Date | False | False | True | ['date', 'datetime'] | @@ -879,9 +879,9 @@ Please provide the date that the observation was made. Some datasets may be cont Initially this will be defaulted to "COUNT" -| title | required | type | -|:------------------|:-----------|:-----------------| -| Measured Property | True | MeasuredProperty | +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------------------| +| Measured Property | False | False | True | ['MeasuredProperty[{}]'] | @@ -890,9 +890,9 @@ Initially this will be defaulted to "COUNT" Descriptions of all tables and data elements that can be included in the dataset -| title | required | type | -|:--------------------|:-----------|:----------| -| Structural Metadata | False | DataClass | +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:--------------| +| Structural Metadata | True | True | False | ['DataClass'] | @@ -908,22 +908,22 @@ Metadata collection for Tissue Samples datasets ### dataCategories -Data categories related to the tissue sample collection +The type of data that is associated with the samples in the study. Can be several values MIABIS-2.0-13 -| title | required | type | -|:----------------|:-----------|:-------------------------| -| Data Categories | False | TissueDataCategoriesEnum | +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data Categories | True | True | False | ["TissueDataCategoriesEnum['Biological samples','Survey data','Imaging data','Medical records','National registries','Genealogical records','Physiological/Biochemical measurements','Other']"] | ### materialType -Material type of the tissue sample collection +The biospecimen saved from a biological entity for propagation e.g. testing, diagnostics, treatment or research purposes. Can be several values MIABIS-2.0-14 -| title | required | type | -|:--------------|:-----------|:-----------------------| -| Material Type | False | MaterialTypeCategories | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Material Type | True | True | False | ["MaterialTypeCategories['Blood','DNA','Faeces','Immortalized Cell Lines','Isolated Pathogen','Other','Plasma','RNA','Saliva','Serum','Tissue (Frozen)','Tissue (FFPE)','Urine']"] | @@ -941,9 +941,9 @@ Metadata related to the tissue sample Date when the tissue sample metadata was created -| title | required | type | -|:--------------|:-----------|:-------| -| Creation Date | False | date | +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------| +| Creation Date | False | True | False | ['date', 'datetime', 'null'] | @@ -952,9 +952,20 @@ Date when the tissue sample metadata was created Ontology code for the anatomical site, this code must match an ICD-0-3 format -| title | required | type | -|:------------------------------|:-----------|:--------| -| Anatomical Site Ontology Code | False | ICD_0_3 | +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------| +| Anatomical Site Ontology Code | False | True | False | ["ICD_0_3[{'anyOf': [{'pattern': '^[C\\\\d]{3}\\\\.\\\\d{4}\\\\/\\\\d{1,4}$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### collectionType + +The type of the sample collection. Can be several values [MIABIS-2.0-16](https://github.com/BBMRI-ERIC/miabis/blob/master/Structured-data-and-lists.md#collection-type) + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Collection Type | False | True | False | ["TissueCollectionTypeEnum['Case-control','Cohort','Cross-sectional','Longitudinal','Twin-study','Quality control','Population-based','Disease specific','Birth cohort','Other']", 'null'] | diff --git a/docs/HDRUK/2.2.0.structure.json b/docs/HDRUK/2.2.0.structure.json index 2f8a7d1..4a4d8b7 100644 --- a/docs/HDRUK/2.2.0.structure.json +++ b/docs/HDRUK/2.2.0.structure.json @@ -10,7 +10,13 @@ "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" ] ], - "type": "Uuidv4", + "type": [ + "Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -21,7 +27,11 @@ "examples": [ "1.1.0" ], - "type": "Semver", + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -30,7 +40,11 @@ "title": "Dataset Revisions", "description": "Revisions of Dataset metadata", "examples": null, - "type": "Revision", + "type": [ + "Revision" + ], + "is_list": true, + "is_optional": false, "subItems": [ { "name": "version", @@ -38,7 +52,11 @@ "title": null, "description": "Semantic Version", "examples": null, - "type": "Semver", + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -47,7 +65,12 @@ "title": null, "description": "URL endpoint to obtain the version", "examples": null, - "type": "Url", + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -58,7 +81,11 @@ "title": "Creation Date", "description": "Dataset Metadata Creation Date", "examples": null, - "type": "datetime" + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "modified", @@ -66,7 +93,11 @@ "title": "Modification Date", "description": "Dataset Metadata Creation Date", "examples": null, - "type": "datetime" + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "summary", @@ -74,7 +105,11 @@ "title": "Summary", "description": "Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP.", "examples": null, - "type": "Summary", + "type": [ + "Summary" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "title", @@ -86,7 +121,11 @@ "North West London COVID-19 Patient Level Situation Report" ] ], - "type": "OneHundredFiftyCharacters", + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -97,7 +136,12 @@ "examples": [ "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." ], - "type": "AbstractText", + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -106,7 +150,11 @@ "title": "Dataset publisher", "description": "This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", "examples": null, - "type": "Organisation", + "type": [ + "Organisation" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "identifier", @@ -114,7 +162,12 @@ "title": "Organisation Identifier", "description": "Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the \u201csuggest and institute\u201d function here: https://www.grid.ac/institutes#", "examples": null, - "type": "Url", + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -123,7 +176,11 @@ "title": "Organisation Name", "description": "Name of the organisation", "examples": null, - "type": "OneHundredFiftyCharacters", + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] }, { @@ -132,7 +189,12 @@ "title": "Organisation Logo", "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", "examples": null, - "type": "Url", + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -141,7 +203,12 @@ "title": "Organisation Description", "description": "Please provide a URL that describes the organisation.", "examples": null, - "type": "Description", + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -150,7 +217,13 @@ "title": "Organisation Contact Point", "description": "Organisation contact point(s)", "examples": null, - "type": "EmailAddress", + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.EmailAddress.EmailAddress]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -159,7 +232,12 @@ "title": "Organisation Membership", "description": "Please indicate if the organisation is an Alliance Member or a Hub.", "examples": null, - "type": "MemberOf" + "type": [ + "MemberOf['HUB','ALLIANCE','OTHER','NCS']", + "null" + ], + "is_list": false, + "is_optional": true } ] }, @@ -171,7 +249,12 @@ "examples": [ "SAILDatabank@swansea.ac.uk" ], - "type": "EmailAddress", + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -180,7 +263,13 @@ "title": "Keywords", "description": "Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -189,7 +278,13 @@ "title": "Alternate dataset identifiers", "description": "Alternate dataset identifiers or local identifiers", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -200,7 +295,12 @@ "examples": [ "10.3399/bjgp17X692645" ], - "type": "Doi", + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -213,7 +313,12 @@ "" ] ], - "type": "DatasetType", + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -226,7 +331,12 @@ "" ] ], - "type": "DatasetType", + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -235,7 +345,12 @@ "title": "Population size", "description": "Summary population size of the cohort", "examples": null, - "type": "int" + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true } ] }, @@ -245,7 +360,12 @@ "title": "Documentation", "description": "Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media.", "examples": null, - "type": "Documentation", + "type": [ + "Documentation", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "description", @@ -253,7 +373,12 @@ "title": "Description", "description": "A free-text description of the record.", "examples": null, - "type": "Description", + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -264,7 +389,13 @@ "examples": [ "PDF Document that describes study protocol" ], - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -275,7 +406,13 @@ "examples": [ "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)." ], - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters, hdr_schemata.definitions.HDRUK.IsPartOfEnum.IsPartOfEnum]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -286,7 +423,12 @@ "title": "Coverage", "description": "Observational, Spatial and Temporal coverage", "examples": null, - "type": "Coverage", + "type": [ + "Coverage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "spatial", @@ -294,7 +436,12 @@ "title": "Spatial", "description": "List of countries where the data was taken from", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -303,7 +450,12 @@ "title": "Pathway", "description": "Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -312,7 +464,12 @@ "title": "Followup", "description": "What is the typical time span that a patient appears in the dataset (follow up period)", "examples": null, - "type": "Followup" + "type": [ + "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "typicalAgeRange", @@ -320,7 +477,12 @@ "title": "Typical Age Range", "description": "Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", "examples": null, - "type": "AgeRange", + "type": [ + "AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -329,7 +491,11 @@ "title": "Gender", "description": "Male, Female, Other", "examples": null, - "type": "GenderType" + "type": [ + "GenderType['Male','Female','Other']" + ], + "is_list": true, + "is_optional": true }, { "name": "biologicalsamples", @@ -337,7 +503,11 @@ "title": "Biological Samples", "description": "Blood, Saliva, Urine, Other", "examples": null, - "type": "BiologicalSampleType" + "type": [ + "BiologicalSampleType['Blood','Other','Urine','Saliva']" + ], + "is_list": true, + "is_optional": true }, { "name": "psychological", @@ -345,7 +515,11 @@ "title": "Psychological", "description": "Mental health, Cognitive function", "examples": null, - "type": "PsychologicalType" + "type": [ + "PsychologicalType['Cognitive Function','Mental Health']" + ], + "is_list": true, + "is_optional": true }, { "name": "physical", @@ -353,7 +527,11 @@ "title": "Physical", "description": "Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", "examples": null, - "type": "PhysicalType" + "type": [ + "PhysicalType['Respiratory','Vision','Hearing','Musculoskeletal','Cardiovascular','Reproductive']" + ], + "is_list": true, + "is_optional": true }, { "name": "anthropometric", @@ -361,7 +539,11 @@ "title": "Anthropometric", "description": "Height, Weight, Waist circumference, Hip circumference, Blood pressure", "examples": null, - "type": "AnthropometricType" + "type": [ + "AnthropometricType['Blood Pressure','Hip Circumference','Height','Waist Circumference','Weight']" + ], + "is_list": true, + "is_optional": true }, { "name": "lifestyle", @@ -369,7 +551,11 @@ "title": "Lifestyle", "description": "Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", "examples": null, - "type": "LifestylesType" + "type": [ + "LifestylesType['Smoking','Dietary Habits','Physical Activity','Alcohol']" + ], + "is_list": true, + "is_optional": true }, { "name": "socioeconomic", @@ -377,7 +563,11 @@ "title": "Socio-economic", "description": "Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support", "examples": null, - "type": "SocioEconomicType" + "type": [ + "SocioEconomicType['Finances','Family Circumstances','Housing','Education','Marital Status','Occupation','Ethnic Group','Social Support']" + ], + "is_list": true, + "is_optional": true } ] }, @@ -387,7 +577,12 @@ "title": "Provenance", "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", "examples": null, - "type": "Provenance", + "type": [ + "Provenance", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "origin", @@ -395,7 +590,12 @@ "title": null, "description": null, "examples": null, - "type": "Origin", + "type": [ + "Origin", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "purpose", @@ -403,7 +603,13 @@ "title": "Purpose", "description": "Pleases indicate the purpose(s) that the dataset was collected.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Purpose.Purpose]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -412,7 +618,13 @@ "title": "Source", "description": "Pleases indicate the source of the data extraction", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Source.Source]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -421,7 +633,13 @@ "title": "Setting", "description": "Pleases indicate the setting(s) where data was collected. Multiple settings may be provided", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Setting.Setting]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -432,7 +650,11 @@ "title": null, "description": null, "examples": null, - "type": "Temporal", + "type": [ + "Temporal" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "distributionReleaseDate", @@ -440,7 +662,13 @@ "title": "Release Date", "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "startDate", @@ -448,7 +676,13 @@ "title": "Start Date", "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "endDate", @@ -456,7 +690,14 @@ "title": "End Date", "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "EndDateEnum['CONTINUOUS',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "timeLag", @@ -464,7 +705,11 @@ "title": "Time Lag", "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", "examples": null, - "type": "TimeLag" + "type": [ + "TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]" + ], + "is_list": false, + "is_optional": false }, { "name": "publishingFrequency", @@ -472,7 +717,11 @@ "title": "Publishing Frequency", "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", "examples": null, - "type": "Periodicity" + "type": [ + "Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]" + ], + "is_list": false, + "is_optional": false } ] } @@ -484,7 +733,11 @@ "title": "Accessibility", "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", "examples": null, - "type": "Accessibility", + "type": [ + "Accessibility" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "usage", @@ -492,7 +745,12 @@ "title": "Usage", "description": "This section includes information about how the data can be used and how it is currently being used", "examples": null, - "type": "Usage", + "type": [ + "Usage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "dataUseLimitation", @@ -500,7 +758,13 @@ "title": "Data Use Limitation", "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.DataUseLimitation.DataUseLimitation]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -509,7 +773,13 @@ "title": "Data Use Requirements", "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.DataUseRequirements.DataUseRequirements]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -518,7 +788,13 @@ "title": "Citation Requirements", "description": "Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.", "examples": null, - "type": "ShortDescription", + "type": [ + "ShortDescription[{'anyOf': [{'maxLength': 1000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -527,7 +803,13 @@ "title": "Investigations", "description": null, "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -536,7 +818,14 @@ "title": "Citations", "description": "Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list.", "examples": null, - "type": "Doi", + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "str", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Doi.Doi]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -547,7 +836,11 @@ "title": "Access", "description": "This section includes information about data access", "examples": null, - "type": "Access", + "type": [ + "Access" + ], + "is_list": false, + "is_optional": false, "subItems": [ { "name": "accessRights", @@ -555,7 +848,12 @@ "title": "Access Rights", "description": null, "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -566,7 +864,12 @@ "examples": [ "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" ], - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -575,7 +878,13 @@ "title": "Organisation Access Request Cost", "description": "Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -584,7 +893,12 @@ "title": "Access Request Duration", "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", "examples": null, - "type": "DeliveryLeadTime" + "type": [ + "DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "jurisdiction", @@ -592,7 +906,13 @@ "title": "Jurisdiction", "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Isocountrycode.Isocountrycode]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -601,7 +921,12 @@ "title": "Data Controller", "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -610,7 +935,12 @@ "title": "Data Processor", "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", "examples": null, - "type": "LongDescription", + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -621,7 +951,12 @@ "title": "Format and Standards", "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", "examples": null, - "type": "FormatAndStandards", + "type": [ + "FormatAndStandards", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "vocabularyEncodingScheme", @@ -629,7 +964,13 @@ "title": "Controlled Vocabulary", "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.ControlledVocabulary.ControlledVocabulary]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -638,7 +979,13 @@ "title": "Conforms To", "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.StandardisedDataModels.StandardisedDataModels]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -647,7 +994,13 @@ "title": "Language", "description": "This should list all the languages in which the dataset metadata and underlying data is made available.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Language.Language]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -656,7 +1009,13 @@ "title": "Format", "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Format.Format]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -669,7 +1028,12 @@ "title": "Enrichment and Linkage", "description": "This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", "examples": null, - "type": "EnrichmentAndLinkage", + "type": [ + "EnrichmentAndLinkage", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "qualifiedRelation", @@ -677,7 +1041,13 @@ "title": "Linked Datasets", "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate \u201cALL\u201d and the onboarding portal will automate linkage across the datasets submitted.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -686,7 +1056,13 @@ "title": "Derivations", "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset.", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.AbstractText.AbstractText]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -695,7 +1071,13 @@ "title": "Tools", "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", "examples": null, - "type": "CommaSeparatedValues", + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] @@ -706,7 +1088,11 @@ "title": "Observations", "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d", "examples": null, - "type": "Observation", + "type": [ + "Observation" + ], + "is_list": true, + "is_optional": false, "subItems": [ { "name": "observedNode", @@ -716,7 +1102,11 @@ "examples": [ "PERSONS" ], - "type": "StatisticalPopulationConstrained" + "type": [ + "StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']" + ], + "is_list": false, + "is_optional": false }, { "name": "measuredValue", @@ -724,7 +1114,11 @@ "title": "Measured Value", "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", "examples": null, - "type": "int" + "type": [ + "int" + ], + "is_list": false, + "is_optional": false }, { "name": "disambiguatingDescription", @@ -732,7 +1126,12 @@ "title": "Disambiguating Description", "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", "examples": null, - "type": "AbstractText", + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] }, { @@ -741,7 +1140,12 @@ "title": "Observation Date", "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", "examples": null, - "type": "date" + "type": [ + "date", + "datetime" + ], + "is_list": false, + "is_optional": false }, { "name": "measuredProperty", @@ -749,7 +1153,11 @@ "title": "Measured Property", "description": "Initially this will be defaulted to \"COUNT\"", "examples": null, - "type": "MeasuredProperty", + "type": [ + "MeasuredProperty[{}]" + ], + "is_list": false, + "is_optional": false, "subItems": [] } ] @@ -760,7 +1168,11 @@ "title": "Structural Metadata", "description": "Descriptions of all tables and data elements that can be included in the dataset", "examples": null, - "type": "DataClass" + "type": [ + "DataClass" + ], + "is_list": true, + "is_optional": true }, { "name": "tissuesSampleCollection", @@ -768,23 +1180,36 @@ "title": "Tissues Sample Collection", "description": "Metadata collection for Tissue Samples datasets", "examples": null, - "type": "TissuesSampleCollection", + "type": [ + "TissuesSampleCollection", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "dataCategories", "required": false, "title": "Data Categories", - "description": "Data categories related to the tissue sample collection", + "description": "The type of data that is associated with the samples in the study. Can be several values MIABIS-2.0-13", "examples": null, - "type": "TissueDataCategoriesEnum" + "type": [ + "TissueDataCategoriesEnum['Biological samples','Survey data','Imaging data','Medical records','National registries','Genealogical records','Physiological/Biochemical measurements','Other']" + ], + "is_list": true, + "is_optional": true }, { "name": "materialType", "required": false, "title": "Material Type", - "description": "Material type of the tissue sample collection", + "description": "The biospecimen saved from a biological entity for propagation e.g. testing, diagnostics, treatment or research purposes. Can be several values MIABIS-2.0-14", "examples": null, - "type": "MaterialTypeCategories" + "type": [ + "MaterialTypeCategories['Blood','DNA','Faeces','Immortalized Cell Lines','Isolated Pathogen','Other','Plasma','RNA','Saliva','Serum','Tissue (Frozen)','Tissue (FFPE)','Urine']" + ], + "is_list": true, + "is_optional": true }, { "name": "tissueSampleMetadata", @@ -792,7 +1217,12 @@ "title": "Tissue Sample Metadata", "description": "Metadata related to the tissue sample", "examples": null, - "type": "TissueSampleMetadata", + "type": [ + "TissueSampleMetadata", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [ { "name": "creationDate", @@ -800,7 +1230,13 @@ "title": "Creation Date", "description": "Date when the tissue sample metadata was created", "examples": null, - "type": "date" + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true }, { "name": "AnatomicalSiteOntologyCode", @@ -808,10 +1244,28 @@ "title": "Anatomical Site Ontology Code", "description": "Ontology code for the anatomical site, this code must match an ICD-0-3 format", "examples": null, - "type": "ICD_0_3", + "type": [ + "ICD_0_3[{'anyOf': [{'pattern': '^[C\\\\d]{3}\\\\.\\\\d{4}\\\\/\\\\d{1,4}$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, "subItems": [] } ] + }, + { + "name": "collectionType", + "required": false, + "title": "Collection Type", + "description": "The type of the sample collection. Can be several values [MIABIS-2.0-16](https://github.com/BBMRI-ERIC/miabis/blob/master/Structured-data-and-lists.md#collection-type)", + "examples": null, + "type": [ + "TissueCollectionTypeEnum['Case-control','Cohort','Cross-sectional','Longitudinal','Twin-study','Quality control','Population-based','Disease specific','Birth cohort','Other']", + "null" + ], + "is_list": false, + "is_optional": true } ] } diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index e36b303..eeaf1d7 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -1,8 +1,3 @@ -# from hdr_schemata.models.GWDM.v1_0 import Gwdm10 as Model -# from hdr_schemata.models.HDRUK.base import Observation as Model -# from hdr_schemata.models.HDRUK import Hdruk220 as Model -from hdr_schemata.models.GWDM.v1_1 import Gwdm11 as Model - from pydantic._internal._model_construction import ModelMetaclass from pydantic import BaseModel, RootModel import pandas as pd @@ -10,12 +5,6 @@ import typing import enum -from hdr_schemata.models.HDRUK.v2_1_2.Observations import Observation - -_type1 = typing.List[Observation] -_type2 = typing.Optional[Observation] -_type3 = typing.Union[Observation, str] - def extract_type_info(type_hint): is_list = False @@ -78,8 +67,6 @@ def get_fields(structure, model: type[BaseModel]): for name, field in model.model_fields.items(): if name == "root": continue - # if name != "structuralMetadata": - # continue t = field.annotation @@ -146,15 +133,29 @@ def json_to_markdown(structure, level=2): return md -structure = [] -get_fields(structure, Model) -# get_fields(structure,Hdruk212) +def create_markdown(Model, path, name): + structure = [] + get_fields(structure, Model) + + with open(f"{path}/{name}.structure.json", "w") as f: + print(json.dumps(structure, indent=6)) + json.dump(structure, f, indent=6) + + md = json_to_markdown(structure) + + with open(f"{path}/{name}.md", "w") as f: + f.write(md) + print(f"Done {path}/name") -with open("temp.json", "w") as f: - print(json.dumps(structure, indent=6)) - json.dump(structure, f, indent=6) -md = json_to_markdown(structure) +from hdr_schemata.models.HDRUK import Hdruk212 +from hdr_schemata.models.HDRUK import Hdruk213 +from hdr_schemata.models.HDRUK import Hdruk220 +from hdr_schemata.models.GWDM.v1_1 import Gwdm10 +from hdr_schemata.models.GWDM.v1_1 import Gwdm11 -with open("temp.md", "w") as f: - f.write(md) +create_markdown(Gwdm10, "./docs/GWDM/", "1.0") +create_markdown(Gwdm11, "./docs/GWDM/", "1.1") +create_markdown(Hdruk212, "./docs/HDRUK/", "2.1.2") +create_markdown(Hdruk213, "./docs/HDRUK/", "2.1.3") +create_markdown(Hdruk220, "./docs/HDRUK/", "2.2.0") From b9f635f2eebb56b7d08f6af1a8c3c36c422f585e Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 5 Feb 2024 09:12:26 +0000 Subject: [PATCH 18/21] update mkdocs.yml --- mkdocs.yml | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) diff --git a/mkdocs.yml b/mkdocs.yml index 5d65a4b..be70de9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -1,24 +1,25 @@ site_name: HDRUK Schema theme: - name: material - palette: - scheme: hdruk - #custom_dir: overrides - #logo: images/favicon.png - #favicon: images/favicon.png - features: - - navigation.sections + name: material + palette: + scheme: hdruk + #custom_dir: overrides + #logo: images/favicon.png + #favicon: images/favicon.png + features: + - navigation.sections extra_css: - - stylesheets/custom.css - + - stylesheets/custom.css + nav: - - Welcome: index.md - - Gateway Data Model: - - Version 1.0: GWDM/1.0.md - - Version 1.1: GWDM/1.1.md - - HDRUK Schema: - - Version 2.1.2: HDRUK/2.1.2.md - - Version 2.2.0: HDRUK/2.2.0.md + - Welcome: index.md + - Gateway Data Model: + - Version 1.0: GWDM/1.0.md + - Version 1.1: GWDM/1.1.md + - HDRUK Schema: + - Version 2.1.2: HDRUK/2.1.2.md + - Version 2.2.0: HDRUK/2.1.3.md + - Version 2.2.0: HDRUK/2.2.0.md plugins: - - search + - search From 245a294b5a18fc11605fe1d3e649e5a915bf5c59 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 5 Feb 2024 09:55:11 +0000 Subject: [PATCH 19/21] put in some changes --- docs/GWDM/1.1.change.md | 39 +++++++++++++++++++++++++++++++++++++ docs/HDRUK/2.1.3.changes.md | 3 +++ docs/HDRUK/2.2.0.change.md | 29 +++++++++++++++++++++++++++ docs/index.md | 2 +- mkdocs.yml | 19 ++++++++++-------- 5 files changed, 83 insertions(+), 9 deletions(-) create mode 100644 docs/GWDM/1.1.change.md create mode 100644 docs/HDRUK/2.1.3.changes.md create mode 100644 docs/HDRUK/2.2.0.change.md diff --git a/docs/GWDM/1.1.change.md b/docs/GWDM/1.1.change.md new file mode 100644 index 0000000..8b3202a --- /dev/null +++ b/docs/GWDM/1.1.change.md @@ -0,0 +1,39 @@ +## Changes from 1.0 -> 1.1 + +- **Add** `summary.version` : new string field to record the metadata version, this was accidentaly dropped in the previous version of the model +- **Add** `summary.populationSize`: new integer field to record the population size of the dataset/cohort in the summary field +- **Add** `summary.datasetSubType`: new string field to be used with `summary.datasetType` to record the type of dataset +- **Change** `summary.publisher` : this field is now of type `Organisation`, it adds + - **Rename** `summary.publisher.publisherName` --> `summary.publisher.name` + - **Rename** `summary.publisher.publisherGatewayId` --> `summary.publisher.gatewayId` + - **Add** `summary.publisher.rorId` (Optional) [ror.org](https://ror.org/) +- **Change** coverage: New fields for cohort variables + - **Rename** `coverage.physicalSampleAvailability` --> `coverage.biologicalsamples` + - **Add** `coverage.gender` + - **Add** `coverage.psychological` + - **Add** `coverage.physical` + - **Add** `coverage.anthropometric` + - **Add** `coverage.lifestyle` + - **Add** `coverage.socioeconomic` +- **Add** New collection for tissue samples with 21 new fields : + - **Add** `tissuesSampleCollection.id` + - **Add** `tissuesSampleCollection.dataCategories` + - **Add** `tissuesSampleCollection.materialType` + - **Add** `tissuesSampleCollection.accessConditions` + - **Add** `tissuesSampleCollection.collectionType` + - **Add** `tissuesSampleCollection.disease` + - **Add** `tissuesSampleCollection.storageTemperature` + - **Add** `tissuesSampleCollection.sampleAgeRange` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.id` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.sampleDonor.id` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.sampleDonor.sex` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.sampleDonor.birthDate` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.sampleDonor.dataCategories` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.sampleType` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.storageTemperature` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.creationDate` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.anatomicalSiteOntologyCode` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.anatomicalSiteOntologyDescription` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.anatomicalSiteFreeText` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.sampleContentDiagnosis` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.useRestrictions` diff --git a/docs/HDRUK/2.1.3.changes.md b/docs/HDRUK/2.1.3.changes.md new file mode 100644 index 0000000..08a1569 --- /dev/null +++ b/docs/HDRUK/2.1.3.changes.md @@ -0,0 +1,3 @@ +## Changes from 2.1.2 -> 2.1.3 + +- **Rename** `provenance.temporal.accrualPeriodicity` --> `provenance.temporal.publishingFrequency` diff --git a/docs/HDRUK/2.2.0.change.md b/docs/HDRUK/2.2.0.change.md new file mode 100644 index 0000000..ee00b48 --- /dev/null +++ b/docs/HDRUK/2.2.0.change.md @@ -0,0 +1,29 @@ +## Changes from 2.1.3 -> 2.2.0 + +- **Add** `summary.populationSize`: new integer field to record the population size of the dataset/cohort in the summary field +- **Add** `summary.datasetType`: new string field to be used to record the type of dataset (**will be changed to enum at some later point **) +- **Add** `summary.datasetSubType`: new string field to be used with `summary.datasetType` to record the type of dataset +- **Change** coverage: New fields for cohort variables + - **Rename** `coverage.physicalSampleAvailability` --> `coverage.biologicalsamples`: + - **Add** `coverage.gender`, list: + - Allowed Values: Male, Female, Other + - **Add** `coverage.psychological`, list: + - Allowed Values: Cognitive Function, Mental Health + - **Add** `coverage.physical`, list: + - Allowed Values: Respiratory, Vision, Hearing, Musculoskeletal, Cardiovascular, Reproductive + - **Add** `coverage.anthropometric`, list: + - Allowed Values: Blood Pressure, Hip Circumference, Height, Waist Circumference, + - **Add** `coverage.lifestyle`, list: + - Allowed Values: Smoking, Dietary Habits, Physical Activity, Alcohol, Weight + - **Add** `coverage.socioeconomic`, list: + - Allowed Values: Finances, Family Circumstances, Housing, Education, Marital Status, Occupation, Ethnic Group, Social Support +- **Add** New collection for tissue samples: + - **Add** `tissuesSampleCollection.dataCategories` + - Allowed values: Biological samples, Survey data, Imaging data, Medical records, National registries, Genealogical records, Physiological/Biochemical measurements, Other + - **Add** `tissuesSampleCollection.materialType` + - Allowed values: Blood, DNA, Faeces, Immortalized Cell Lines, Isolated Pathogen, Other, Plasma, RNA, Saliva, Serum, Tissue (Frozen), Tissue (FFPE), Urine + - **Add** `tissuesSampleCollection.collectionType` + - Allowed values: Case-control, Cohort, Cross-sectional, Longitudinal, Twin-study, Quality control, Population-based, Disease specific, Birth cohort, Other + - **Add** `tissuesSampleCollection.tissueSampleMetadata.creationDate` + - **Add** `tissuesSampleCollection.tissueSampleMetadata.anatomicalSiteOntologyCode`: + - Has to match the pattern for an ICD-0-3 code `^[C\d]{3}\.\d{4}\/\d{1,4}$` diff --git a/docs/index.md b/docs/index.md index 32f95c0..2f8b059 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1 +1 @@ -hi \ No newline at end of file +Please find schema definitions and change logs for all schemas that be handled by the HDRUK Gateway diff --git a/mkdocs.yml b/mkdocs.yml index be70de9..71d9575 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -13,13 +13,16 @@ extra_css: nav: - Welcome: index.md - - Gateway Data Model: - - Version 1.0: GWDM/1.0.md - - Version 1.1: GWDM/1.1.md - - HDRUK Schema: - - Version 2.1.2: HDRUK/2.1.2.md - - Version 2.2.0: HDRUK/2.1.3.md - - Version 2.2.0: HDRUK/2.2.0.md - + - Schemata: + - Gateway Data Model (GWDM): + - Version 1.0: GWDM/1.0.md + - Version 1.1: GWDM/1.1.md + - HDRUK: + - Version 2.1.2: HDRUK/2.1.2.md + - Version 2.2.0: HDRUK/2.1.3.md + - Version 2.2.0: HDRUK/2.2.0.md + - Schema Change Log: + - GWDM: + - 1.1: GWDM/1.1.change.md plugins: - search From b5eb2471d275b9b08962532f0c82fe5f1643a6a1 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 5 Feb 2024 09:56:54 +0000 Subject: [PATCH 20/21] rename file --- docs/HDRUK/{2.1.3.changes.md => 2.1.3.change.md} | 0 mkdocs.yml | 3 +++ 2 files changed, 3 insertions(+) rename docs/HDRUK/{2.1.3.changes.md => 2.1.3.change.md} (100%) diff --git a/docs/HDRUK/2.1.3.changes.md b/docs/HDRUK/2.1.3.change.md similarity index 100% rename from docs/HDRUK/2.1.3.changes.md rename to docs/HDRUK/2.1.3.change.md diff --git a/mkdocs.yml b/mkdocs.yml index 71d9575..a4cb2e9 100644 --- a/mkdocs.yml +++ b/mkdocs.yml @@ -24,5 +24,8 @@ nav: - Schema Change Log: - GWDM: - 1.1: GWDM/1.1.change.md + - HDRUK: + - 2.1.3: HDRUK/2.1.3.change.md + - 2.2.0: HDRUK/2.2.0.change.md plugins: - search From 0ac1cc8758455bc45fadef2716fd5c2c7e891cae Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 5 Feb 2024 10:03:58 +0000 Subject: [PATCH 21/21] fix typo --- hdr_schemata/models/GWDM/1.0/schema.json | 2 +- hdr_schemata/models/GWDM/1.1/schema.json | 2 +- hdr_schemata/tests/test_schemas.py | 59 ++++++++++++++++-------- 3 files changed, 42 insertions(+), 21 deletions(-) diff --git a/hdr_schemata/models/GWDM/1.0/schema.json b/hdr_schemata/models/GWDM/1.0/schema.json index 4c08331..8e9f90b 100644 --- a/hdr_schemata/models/GWDM/1.0/schema.json +++ b/hdr_schemata/models/GWDM/1.0/schema.json @@ -1223,7 +1223,7 @@ "$ref": "#/$defs/TimeLag" } ], - "description": "Rypical time-lag between an event and the data for that event appearing in the dataset", + "description": "Typical time-lag between an event and the data for that event appearing in the dataset", "example": "LESS 1 WEEK", "title": "Time Lag" }, diff --git a/hdr_schemata/models/GWDM/1.1/schema.json b/hdr_schemata/models/GWDM/1.1/schema.json index fda9abb..17fe4f7 100644 --- a/hdr_schemata/models/GWDM/1.1/schema.json +++ b/hdr_schemata/models/GWDM/1.1/schema.json @@ -1488,7 +1488,7 @@ "$ref": "#/$defs/TimeLag" } ], - "description": "Rypical time-lag between an event and the data for that event appearing in the dataset", + "description": "Typical time-lag between an event and the data for that event appearing in the dataset", "example": "LESS 1 WEEK", "title": "Time Lag" }, diff --git a/hdr_schemata/tests/test_schemas.py b/hdr_schemata/tests/test_schemas.py index 6e10730..facc3bc 100644 --- a/hdr_schemata/tests/test_schemas.py +++ b/hdr_schemata/tests/test_schemas.py @@ -7,50 +7,70 @@ from hdr_schemata.models.SchemaOrg import BioSchema -def get_metadata(model,version): - metadata = json.load(open(f'../examples/{model}/{version}/example.json')) +def get_metadata(model, version): + metadata = json.load(open(f"../examples/{model}/{version}/example.json")) return metadata -def get_schema(model,version): - metadata = json.load(open(f'../models/{model}/{version}/schema.json')) + +def get_schema(model, version): + metadata = json.load(open(f"../models/{model}/{version}/schema.json")) return metadata + class TestHdruk212: - metadata = get_metadata('HDRUK','2.1.2') - json_schema = get_schema('HDRUK','2.1.2') + metadata = get_metadata("HDRUK", "2.1.2") + json_schema = get_schema("HDRUK", "2.1.2") def test_validation(self): assert Hdruk212(**self.metadata) != None def test_json_schema(self): schema = Hdruk212.model_json_schema() - expected_keys = ['$defs', 'additionalProperties', 'properties', 'required', 'title', 'type'] + expected_keys = [ + "$defs", + "additionalProperties", + "properties", + "required", + "title", + "type", + ] assert list(schema.keys()) == expected_keys assert schema == self.json_schema - + class TestGwdm10: - metadata = get_metadata('GWDM','1.0') - json_schema = get_schema('GWDM','1.0') + metadata = get_metadata("GWDM", "1.0") + json_schema = get_schema("GWDM", "1.0") def test_validation(self): assert Gwdm10(**self.metadata) != None def test_json_schema(self): schema = Gwdm10.model_json_schema() - expected_keys = ['$defs', 'additionalProperties', 'properties', 'required', 'title', 'type'] + expected_keys = [ + "$defs", + "additionalProperties", + "properties", + "required", + "title", + "type", + ] + print(expected_keys) + print(list(schema.keys())) + assert list(schema.keys()) == expected_keys assert schema == self.json_schema class TestGoogleRecommended: - metadata = get_metadata('SchemaOrg','GoogleRecommended') - json_schema = get_schema('SchemaOrg','GoogleRecommended') - + metadata = get_metadata("SchemaOrg", "GoogleRecommended") + json_schema = get_schema("SchemaOrg", "GoogleRecommended") + def test_organization(self): from hdr_schemata.models.SchemaOrg.GoogleRecommended import Organization - assert Organization(**self.metadata['creator']) != None - + + assert Organization(**self.metadata["creator"]) != None + def test_validation(self): assert GoogleRecommendedDataset(**self.metadata) != None @@ -58,10 +78,11 @@ def test_json_schema(self): schema = GoogleRecommendedDataset.model_json_schema() assert schema == self.json_schema + class TestBioSchema: - metadata = get_metadata('SchemaOrg','BioSchema') - json_schema = get_schema('SchemaOrg','BioSchema') - + metadata = get_metadata("SchemaOrg", "BioSchema") + json_schema = get_schema("SchemaOrg", "BioSchema") + def test_validation(self): assert BioSchema(**self.metadata) != None