From b0c3939c95d9a109359cd13169b7c5f7cfc9a3b3 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 8 Jan 2024 15:04:46 +0000 Subject: [PATCH 01/10] add a version 2.1.3 --- hdr_schemata/models/HDRUK/2.1.3/schema.json | 2132 +++++++++++++++++ hdr_schemata/models/HDRUK/__init__.py | 1 + hdr_schemata/models/HDRUK/test.py | 2 + .../models/HDRUK/v2_1_3/Provenance.py | 6 + hdr_schemata/models/HDRUK/v2_1_3/Temporal.py | 11 + hdr_schemata/models/HDRUK/v2_1_3/__init__.py | 9 + 6 files changed, 2161 insertions(+) create mode 100644 hdr_schemata/models/HDRUK/2.1.3/schema.json create mode 100644 hdr_schemata/models/HDRUK/v2_1_3/Provenance.py create mode 100644 hdr_schemata/models/HDRUK/v2_1_3/Temporal.py create mode 100644 hdr_schemata/models/HDRUK/v2_1_3/__init__.py diff --git a/hdr_schemata/models/HDRUK/2.1.3/schema.json b/hdr_schemata/models/HDRUK/2.1.3/schema.json new file mode 100644 index 0000000..9718b60 --- /dev/null +++ b/hdr_schemata/models/HDRUK/2.1.3/schema.json @@ -0,0 +1,2132 @@ +{ + "$defs": { + "AbstractText": { + "anyOf": [ + { + "maxLength": 500, + "minLength": 5, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "AbstractText" + }, + "Access": { + "additionalProperties": false, + "properties": { + "accessRights": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "title": "Access Rights" + }, + "accessService": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "examples": [ + "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + ], + "title": "Access Service" + }, + "accessRequestCost": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide link(s) to a webpage detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", + "title": "Organisation Access Request Cost" + }, + "deliveryLeadTime": { + "anyOf": [ + { + "$ref": "#/$defs/DeliveryLeadTime" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide an indication of the typical processing times based on the types of requests typically received.", + "title": "Access Request Duration" + }, + "jurisdiction": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Isocountrycode" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Please use country code from ISO 3166-1 country codes and the associated ISO 3166-2 for regions, cities, states etc. for the country/state under whose laws the data subjects' data is collected, processed and stored.", + "title": "Jurisdiction" + }, + "dataController": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "description": "Data Controller means a person/entity who (either alone or jointly or in common with other persons/entities) determines the purposes for which and the way any Data Subject data, specifically personal data or are to be processed.", + "title": "Data Controller" + }, + "dataProcessor": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A Data Processor, in relation to any Data Subject data, specifically personal data, means any person/entity (other than an employee of the data controller) who processes the data on behalf of the data controller.", + "title": "Data Processor" + } + }, + "required": [ + "accessRights", + "jurisdiction", + "dataController" + ], + "title": "Access", + "type": "object" + }, + "Accessibility": { + "additionalProperties": false, + "properties": { + "usage": { + "anyOf": [ + { + "$ref": "#/$defs/Usage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "This section includes information about how the data can be used and how it is currently being used", + "title": "Usage" + }, + "access": { + "allOf": [ + { + "$ref": "#/$defs/Access" + } + ], + "description": "This section includes information about data access" + }, + "formatAndStandards": { + "anyOf": [ + { + "$ref": "#/$defs/FormatAndStandards" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "title": "Format and Standards" + } + }, + "required": [ + "access" + ], + "title": "Accessibility", + "type": "object" + }, + "AgeRange": { + "anyOf": [ + { + "pattern": "Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "AgeRange" + }, + "CommaSeparatedValues": { + "anyOf": [ + { + "pattern": "([^,]+)", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "CommaSeparatedValues" + }, + "ControlledVocabulary": { + "anyOf": [ + { + "$ref": "#/$defs/ControlledVocabularyEnum" + }, + { + "type": "null" + } + ], + "default": null, + "title": "ControlledVocabulary" + }, + "ControlledVocabularyEnum": { + "enum": [ + "LOCAL", + "OPCS4", + "READ", + "SNOMED CT", + "SNOMED RT", + "DM PLUS D", + "DM+D", + "NHS NATIONAL CODES", + "NHS SCOTLAND NATIONAL CODES", + "NHS WALES NATIONAL CODES", + "ODS", + "LOINC", + "ICD10", + "ICD10CM", + "ICD10PCS", + "ICD9CM", + "ICD9", + "ICDO3", + "AMT", + "APC", + "ATC", + "CIEL", + "HPO", + "CPT4", + "DPD", + "DRG", + "HEMONC", + "JMDC", + "KCD7", + "MULTUM", + "NAACCR", + "NDC", + "NDFRT", + "OXMIS", + "RXNORM", + "RXNORM EXTENSION", + "SPL", + "OTHER" + ], + "title": "ControlledVocabularyEnum", + "type": "string" + }, + "Coverage": { + "additionalProperties": false, + "properties": { + "spatial": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The geographical area covered by the dataset. It is recommended that links are to entries in a well-maintained gazetteer such as https://www.geonames.org/ or https://what3words.com/daring.lion.race.", + "examples": [ + "https://www.geonames.org/2635167/united-kingdom-of-great-britain-and-northern-ireland.html" + ], + "title": "Geographic Coverage" + }, + "typicalAgeRange": { + "anyOf": [ + { + "$ref": "#/$defs/AgeRange" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate the age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "title": "Age Range" + }, + "physicalSampleAvailability": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": {}, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Availability of physical samples associated with the dataset. If samples are available, please indicate the types of samples that are available. More than one type may be provided. If sample are not yet available, please provide \u201cAVAILABILITY TO BE CONFIRMED\u201d. If samples are not available, then please provide \u201cNOT AVAILABLE\u201d.", + "examples": [ + "BONE MARROW" + ], + "title": "Physical Sample Availability" + }, + "followup": { + "anyOf": [ + { + "$ref": "#/$defs/Followup" + }, + { + "type": "null" + } + ], + "default": "UNKNOWN", + "description": "If known, what is the typical time span that a patient appears in the dataset (follow up period)", + "title": "Followup" + }, + "pathway": { + "anyOf": [ + { + "$ref": "#/$defs/Description" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate if the dataset is representative of the patient pathway and any limitations the dataset may have with respect to pathway coverage. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tiers (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "title": "Pathway" + } + }, + "title": "Coverage", + "type": "object" + }, + "DataClass": { + "additionalProperties": false, + "properties": { + "name": { + "anyOf": [ + { + "maxLength": 500, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The name of a table in a dataset.", + "title": "Table Name" + }, + "description": { + "anyOf": [ + { + "maxLength": 20000, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of a table in a dataset.", + "title": "Table Description" + }, + "elements": { + "description": "A list of data elements contained within a table in a dataset.", + "items": { + "$ref": "#/$defs/DataElement" + }, + "title": "Data Elements", + "type": "array" + } + }, + "required": [ + "name", + "elements" + ], + "title": "DataClass", + "type": "object" + }, + "DataElement": { + "additionalProperties": true, + "properties": { + "name": { + "allOf": [ + { + "$ref": "#/$defs/Name" + } + ], + "description": "The name of a column in a table.", + "title": "Column Name" + }, + "dataType": { + "description": "The data type of values in the column", + "title": "Data Type", + "type": "string" + }, + "description": { + "anyOf": [ + { + "maxLength": 20000, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of a column in a table.", + "title": "Column Description" + }, + "sensitive": { + "description": "A True or False value, indicating if the field is sensitive or not", + "title": "Sensitive", + "type": "boolean" + } + }, + "required": [ + "name", + "dataType", + "sensitive" + ], + "title": "DataElement", + "type": "object" + }, + "DataUseLimitation": { + "enum": [ + "GENERAL RESEARCH USE", + "COMMERCIAL RESEARCH USE", + "GENETIC STUDIES ONLY", + "NO GENERAL METHODS RESEARCH", + "NO RESTRICTION", + "GEOGRAPHICAL RESTRICTIONS", + "INSTITUTION SPECIFIC RESTRICTIONS", + "NOT FOR PROFIT USE", + "PROJECT SPECIFIC RESTRICTIONS", + "RESEARCH SPECIFIC RESTRICTIONS", + "USER SPECIFIC RESTRICTION", + "RESEARCH USE ONLY", + "NO LINKAGE" + ], + "title": "DataUseLimitation", + "type": "string" + }, + "DataUseRequirements": { + "enum": [ + "COLLABORATION REQUIRED", + "PROJECT SPECIFIC RESTRICTIONS", + "ETHICS APPROVAL REQUIRED", + "INSTITUTION SPECIFIC RESTRICTIONS", + "GEOGRAPHICAL RESTRICTIONS", + "PUBLICATION MORATORIUM", + "PUBLICATION REQUIRED", + "RETURN TO DATABASE OR RESOURCE", + "TIME LIMIT ON USE", + "DISCLOSURE CONTROL", + "NOT FOR PROFIT USE", + "USER SPECIFIC RESTRICTION", + null + ], + "title": "DataUseRequirements" + }, + "DeliveryLeadTime": { + "enum": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NOT APPLICABLE", + "OTHER", + null + ], + "title": "DeliveryLeadTime" + }, + "Description": { + "anyOf": [ + { + "maxLength": 10000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "Documentation": { + "additionalProperties": false, + "properties": { + "description": { + "anyOf": [ + { + "$ref": "#/$defs/Description" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A free-text description of the record.", + "title": "Description" + }, + "associatedMedia": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "examples": [ + "PDF Document that describes study protocol" + ], + "title": "Associated Media" + }, + "isPartOf": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + { + "$ref": "#/$defs/IsPartOfEnum" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": "NOT APPLICABLE", + "description": "Please complete only if the dataset is part of a group or family", + "examples": [ + "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)." + ], + "title": "Group" + } + }, + "title": "Documentation", + "type": "object" + }, + "Doi": { + "anyOf": [ + { + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Doi" + }, + "EmailAddress": { + "anyOf": [ + { + "format": "email", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "EmailAddress" + }, + "EndDateEnum": { + "enum": [ + "CONTINUOUS", + null + ], + "title": "EndDateEnum" + }, + "EnrichmentAndLinkage": { + "additionalProperties": false, + "properties": { + "qualifiedRelation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate \u201cALL\u201d and the onboarding portal will automate linkage across the datasets submitted.", + "title": "Linked Datasets" + }, + "derivation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset.", + "title": "Derivations" + }, + "tools": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", + "title": "Tools" + } + }, + "title": "EnrichmentAndLinkage", + "type": "object" + }, + "Followup": { + "enum": [ + "0 - 6 MONTHS", + "6 - 12 MONTHS", + "1 - 10 YEARS", + "> 10 YEARS", + "UNKNOWN", + "CONTINUOUS", + "OTHER", + null + ], + "title": "Followup" + }, + "Format": { + "minLength": 1, + "title": "Format", + "type": "string" + }, + "FormatAndStandards": { + "additionalProperties": false, + "properties": { + "vocabularyEncodingScheme": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/ControlledVocabulary" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "List any relevant terminologies / ontologies / controlled vocabularies, such as ICD 10 Codes, NHS Data Dictionary National Codes or SNOMED CT International, that are being used by the dataset. If the controlled vocabularies are local standards, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition. Notes: More than one vocabulary may be provided.", + "title": "Controlled Vocabulary" + }, + "conformsTo": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/StandardisedDataModels" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "List standardised data models that the dataset has been stored in or transformed to, such as OMOP or FHIR. If the data is only available in a local format, please make that explicit. If you are using a standard that has not been included in the list, please use \u201cother\u201d and contact support desk to ask for an addition.", + "title": "Conforms To" + }, + "language": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Language" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "This should list all the languages in which the dataset metadata and underlying data is made available.", + "title": "Language" + }, + "format": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Format" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "If multiple formats are available please specify. See application, audio, image, message, model, multipart, text, video, https://www.iana.org/assignments/media-types/media-types.xhtml Note: If your file format is not included in the current list of formats, please indicate other. If you are using the HOP you will be directed to a service desk page where you can request your additional format. If not please go to: https://metadata.atlassian.net/servicedesk/customer/portal/4 to request your format.", + "title": "Format" + } + }, + "required": [ + "vocabularyEncodingScheme", + "conformsTo", + "language", + "format" + ], + "title": "FormatAndStandards", + "type": "object" + }, + "IsPartOfEnum": { + "const": "NOT APPLICABLE", + "title": "IsPartOfEnum" + }, + "Isocountrycode": { + "pattern": "^[A-Z]{2}(-[A-Z]{2,3})?$", + "title": "Isocountrycode", + "type": "string" + }, + "Language": { + "anyOf": [ + { + "$ref": "#/$defs/LanguageEnum" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Language" + }, + "LanguageEnum": { + "enum": [ + "aa", + "ab", + "ae", + "af", + "ak", + "am", + "an", + "ar", + "as", + "av", + "ay", + "az", + "ba", + "be", + "bg", + "bh", + "bi", + "bm", + "bn", + "bo", + "br", + "bs", + "ca", + "ce", + "ch", + "co", + "cr", + "cs", + "cu", + "cv", + "cy", + "da", + "de", + "dv", + "dz", + "ee", + "el", + "en", + "eo", + "es", + "et", + "eu", + "fa", + "ff", + "fi", + "fj", + "fo", + "fr", + "fy", + "ga", + "gd", + "gl", + "gn", + "gu", + "gv", + "ha", + "he", + "hi", + "ho", + "hr", + "ht", + "hu", + "hy", + "hz", + "ia", + "id", + "ie", + "ig", + "ii", + "ik", + "io", + "is", + "it", + "iu", + "ja", + "jv", + "ka", + "kg", + "ki", + "kj", + "kk", + "kl", + "km", + "kn", + "ko", + "kr", + "ks", + "ku", + "kv", + "kw", + "ky", + "la", + "lb", + "lg", + "li", + "ln", + "lo", + "lt", + "lu", + "lv", + "mg", + "mh", + "mi", + "mk", + "ml", + "mn", + "mr", + "ms", + "mt", + "my", + "na", + "nb", + "nd", + "ne", + "ng", + "nl", + "nn", + "no", + "nr", + "nv", + "ny", + "oc", + "oj", + "om", + "or", + "os", + "pa", + "pi", + "pl", + "ps", + "pt", + "qu", + "rm", + "rn", + "ro", + "ru", + "rw", + "sa", + "sc", + "sd", + "se", + "sg", + "si", + "sk", + "sl", + "sm", + "sn", + "so", + "sq", + "sr", + "ss", + "st", + "su", + "sv", + "sw", + "ta", + "te", + "tg", + "th", + "ti", + "tk", + "tl", + "tn", + "to", + "tr", + "ts", + "tt", + "tw", + "ty", + "ug", + "uk", + "ur", + "uz", + "ve", + "vi", + "vo", + "wa", + "wo", + "xh", + "yi", + "yo", + "za", + "zh", + "zu" + ], + "title": "LanguageEnum", + "type": "string" + }, + "LongDescription": { + "anyOf": [ + { + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "LongDescription" + }, + "MeasuredProperty": { + "title": "MeasuredProperty" + }, + "MemberOf": { + "enum": [ + "HUB", + "ALLIANCE", + "OTHER", + "NCS" + ], + "title": "MemberOf", + "type": "string" + }, + "Name": { + "title": "Name" + }, + "Observation": { + "additionalProperties": false, + "properties": { + "observedNode": { + "allOf": [ + { + "$ref": "#/$defs/StatisticalPopulationConstrained" + } + ], + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "title": "Statistical Population" + }, + "measuredValue": { + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "title": "Measured Value", + "type": "integer" + }, + "disambiguatingDescription": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ], + "default": null, + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "title": "Disambiguating Description" + }, + "observationDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + } + ], + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "title": "Observation Date" + }, + "measuredProperty": { + "allOf": [ + { + "$ref": "#/$defs/MeasuredProperty" + } + ], + "description": "Initially this will be defaulted to \"COUNT\"", + "title": "Measured Property" + } + }, + "required": [ + "observedNode", + "measuredValue", + "observationDate", + "measuredProperty" + ], + "title": "Observation", + "type": "object" + }, + "OneHundredFiftyCharacters": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "Organisation": { + "properties": { + "identifier": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the \u201csuggest and institute\u201d function here: https://www.grid.ac/institutes#", + "title": "Organisation Identifier" + }, + "name": { + "allOf": [ + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + } + ], + "description": "Name of the organisation", + "title": "Organisation Name" + }, + "logo": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "title": "Organisation Logo" + }, + "description": { + "anyOf": [ + { + "$ref": "#/$defs/Description" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a URL that describes the organisation.", + "title": "Organisation Description" + }, + "contactPoint": { + "anyOf": [ + { + "$ref": "#/$defs/EmailAddress" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/EmailAddress" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Organisation contact point(s)", + "title": "Organisation Contact Point" + }, + "memberOf": { + "anyOf": [ + { + "$ref": "#/$defs/MemberOf" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate if the organisation is an Alliance Member or a Hub.", + "title": "Organisation Membership" + }, + "accessRights": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both.", + "title": "Organisation Default Access Rights" + }, + "deliveryLeadTime": { + "anyOf": [ + { + "$ref": "#/$defs/DeliveryLeadTime" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide an indication of the typical processing times based on the types of requests typically received. Note: This value will be used as default access request duration for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "title": "Access Request Duration" + }, + "accessService": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "examples": [ + "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" + ], + "title": "Organisation Access Service" + }, + "accessRequestCost": { + "anyOf": [ + { + "$ref": "#/$defs/ShortDescription" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide link(s) to a webpage or a short description detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", + "title": "Organisation Access Request Cost" + }, + "dataUseLimitation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/DataUseLimitation" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. Notes: where there are existing data-sharing arrangements such as the HDR UK HUB data sharing agreement or the NIHR HIC data sharing agreement this should be indicated within access rights. This value will be used as terms for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", + "title": "Data Use Limitation" + }, + "dataUseRequirements": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/DataUseRequirements" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", + "title": "Data Use Requirements" + } + }, + "required": [ + "name", + "contactPoint" + ], + "title": "Organisation", + "type": "object" + }, + "Origin": { + "additionalProperties": false, + "properties": { + "purpose": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Purpose" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Pleases indicate the purpose(s) that the dataset was collected.", + "title": "Purpose" + }, + "source": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Source" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Pleases indicate the source of the data extraction", + "title": "Source" + }, + "collectionSituation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Setting" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Pleases indicate the setting(s) where data was collected. Multiple settings may be provided", + "title": "Setting" + } + }, + "title": "Origin", + "type": "object" + }, + "Periodicity": { + "enum": [ + "STATIC", + "IRREGULAR", + "CONTINUOUS", + "BIENNIAL", + "ANNUAL", + "BIANNUAL", + "QUARTERLY", + "BIMONTHLY", + "MONTHLY", + "BIWEEKLY", + "WEEKLY", + "SEMIWEEKLY", + "DAILY", + "OTHER", + null + ], + "title": "Periodicity" + }, + "Provenance": { + "additionalProperties": false, + "properties": { + "origin": { + "anyOf": [ + { + "$ref": "#/$defs/Origin" + }, + { + "type": "null" + } + ], + "default": null + }, + "temporal": { + "$ref": "#/$defs/Temporal" + } + }, + "required": [ + "temporal" + ], + "title": "Provenance", + "type": "object" + }, + "Purpose": { + "enum": [ + "STUDY", + "DISEASE REGISTRY", + "TRIAL", + "CARE", + "AUDIT", + "ADMINISTRATIVE", + "FINANCIAL", + "STATUTORY", + "OTHER", + null + ], + "title": "Purpose" + }, + "Revision": { + "additionalProperties": false, + "properties": { + "version": { + "allOf": [ + { + "$ref": "#/$defs/Semver" + } + ], + "description": "Semantic Version" + }, + "url": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "description": "URL endpoint to obtain the version" + } + }, + "required": [ + "version", + "url" + ], + "title": "Revision", + "type": "object" + }, + "Semver": { + "pattern": "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$", + "title": "Semver", + "type": "string" + }, + "Setting": { + "enum": [ + "CLINIC", + "PRIMARY CARE", + "ACCIDENT AND EMERGENCY", + "OUTPATIENTS", + "IN-PATIENTS", + "SERVICES", + "COMMUNITY", + "HOME", + "PRIVATE", + "PHARMACY", + "SOCIAL CARE", + "LOCAL AUTHORITY", + "NATIONAL GOVERNMENT", + "OTHER" + ], + "title": "Setting", + "type": "string" + }, + "ShortDescription": { + "anyOf": [ + { + "maxLength": 1000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "ShortDescription" + }, + "Source": { + "enum": [ + "EPR", + "ELECTRONIC SURVEY", + "LIMS", + "OTHER INFORMATION SYSTEM", + "PAPER BASED", + "FREETEXT NLP", + "MACHINE GENERATED", + "OTHER" + ], + "title": "Source", + "type": "string" + }, + "StandardisedDataModels": { + "anyOf": [ + { + "$ref": "#/$defs/StandardisedDataModelsEnum" + }, + { + "type": "null" + } + ], + "default": null, + "title": "StandardisedDataModels" + }, + "StandardisedDataModelsEnum": { + "enum": [ + "HL7 FHIR", + "HL7 V2", + "HL7 CDA", + "HL7 CCOW", + "LOINC", + "DICOM", + "I2B2", + "IHE", + "OMOP", + "OPENEHR", + "SENTINEL", + "PCORNET", + "CDISC", + "NHS DATA DICTIONARY", + "NHS SCOTLAND DATA DICTIONARY", + "NHS WALES DATA DICTIONARY", + "LOCAL", + "OTHER" + ], + "title": "StandardisedDataModelsEnum", + "type": "string" + }, + "StatisticalPopulationConstrained": { + "enum": [ + "PERSONS", + "EVENTS", + "FINDINGS" + ], + "title": "StatisticalPopulationConstrained", + "type": "string" + }, + "Summary": { + "additionalProperties": false, + "properties": { + "title": { + "allOf": [ + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + } + ], + "description": "Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers.", + "examples": [ + [ + "North West London COVID-19 Patient Level Situation Report" + ] + ], + "title": "Title" + }, + "abstract": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ], + "description": "Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible", + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], + "title": "Dataset Abstract" + }, + "publisher": { + "allOf": [ + { + "$ref": "#/$defs/Organisation" + } + ], + "description": "This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", + "title": "Dataset publisher" + }, + "contactPoint": { + "anyOf": [ + { + "$ref": "#/$defs/EmailAddress" + }, + { + "type": "null" + } + ], + "description": "Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose.", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "title": "Contact Point" + }, + "keywords": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", + "title": "Keywords" + }, + "alternateIdentifiers": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/ShortDescription" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Alternate dataset identifiers or local identifiers", + "title": "Alternate dataset identifiers" + }, + "doiName": { + "anyOf": [ + { + "$ref": "#/$defs/Doi" + }, + { + "type": "null" + } + ], + "default": null, + "description": "All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI.", + "examples": [ + "10.3399/bjgp17X692645" + ], + "title": "Digital Object Identifier" + } + }, + "required": [ + "title", + "abstract", + "publisher", + "contactPoint", + "keywords" + ], + "title": "Summary", + "type": "object" + }, + "Temporal": { + "additionalProperties": false, + "properties": { + "accrualPeriodicity": { + "allOf": [ + { + "$ref": "#/$defs/Periodicity" + } + ], + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/" + }, + "distributionReleaseDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "title": "Release Date" + }, + "startDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "title": "Start Date" + }, + "endDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "$ref": "#/$defs/EndDateEnum" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", + "title": "End Date" + }, + "timeLag": { + "allOf": [ + { + "$ref": "#/$defs/TimeLag" + } + ], + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "title": "Time Lag" + } + }, + "required": [ + "accrualPeriodicity", + "startDate", + "timeLag" + ], + "title": "Temporal", + "type": "object" + }, + "TimeLag": { + "enum": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NO TIMELAG", + "NOT APPLICABLE", + "OTHER", + null + ], + "title": "TimeLag" + }, + "Url": { + "anyOf": [ + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Url" + }, + "Usage": { + "additionalProperties": false, + "properties": { + "dataUseLimitation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/DataUseLimitation" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. NOTE: we have extended the DUO to include a value for NO LINKAGE", + "title": "Data Use Limitation" + }, + "dataUseRequirements": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/DataUseRequirements" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", + "title": "Data Use Requirements" + }, + "resourceCreator": { + "anyOf": [ + { + "$ref": "#/$defs/ShortDescription" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/ShortDescription" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide the text that you would like included as part of any citation that credits this dataset. This is typically just the name of the publisher. No employee details should be provided.", + "title": "Citation Requirements" + }, + "investigations": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "title": "Investigations" + }, + "isReferencedBy": { + "anyOf": [ + { + "$ref": "#/$defs/Doi" + }, + { + "type": "string" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Doi" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide the keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. Please provide multiple entries, or if you are using a csv upload please provide them as a tab separated list.", + "title": "Citations" + } + }, + "title": "Usage", + "type": "object" + }, + "Uuidv4": { + "maxLength": 36, + "minLength": 36, + "pattern": "^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$", + "title": "Uuidv4", + "type": "string" + } + }, + "additionalProperties": false, + "properties": { + "identifier": { + "anyOf": [ + { + "$ref": "#/$defs/Uuidv4" + }, + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "description": "System dataset identifier", + "examples": [ + [ + "226fb3f1-4471-400a-8c39-2b66d46a39b6", + "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" + ] + ], + "title": "Dataset identifier" + }, + "version": { + "allOf": [ + { + "$ref": "#/$defs/Semver" + } + ], + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "title": "Dataset Version" + }, + "revisions": { + "description": "Revisions of Dataset metadata", + "items": { + "$ref": "#/$defs/Revision" + }, + "title": "Dataset Revisions", + "type": "array" + }, + "issued": { + "description": "Dataset Metadata Creation Date", + "format": "date-time", + "title": "Creation Date", + "type": "string" + }, + "modified": { + "description": "Dataset Metadata Creation Date", + "format": "date-time", + "title": "Modification Date", + "type": "string" + }, + "summary": { + "allOf": [ + { + "$ref": "#/$defs/Summary" + } + ], + "description": "Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP." + }, + "documentation": { + "anyOf": [ + { + "$ref": "#/$defs/Documentation" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media.", + "title": "Documentation" + }, + "coverage": { + "anyOf": [ + { + "$ref": "#/$defs/Coverage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "This information includes attributes for geographical and temporal coverage, cohort details etc. to enable a deeper understanding of the dataset content so that researchers can make decisions about the relevance of the underlying data.", + "title": "Coverage" + }, + "provenance": { + "anyOf": [ + { + "$ref": "#/$defs/Provenance" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", + "title": "Provenance" + }, + "accessibility": { + "allOf": [ + { + "$ref": "#/$defs/Accessibility" + } + ], + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets." + }, + "enrichmentAndLinkage": { + "anyOf": [ + { + "$ref": "#/$defs/EnrichmentAndLinkage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", + "title": "Enrichment and Linkage" + }, + "observations": { + "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d", + "items": { + "$ref": "#/$defs/Observation" + }, + "title": "Observations", + "type": "array" + }, + "structuralMetadata": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/DataClass" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "title": "Structural Metadata" + } + }, + "required": [ + "identifier", + "version", + "revisions", + "issued", + "modified", + "summary", + "accessibility", + "observations" + ], + "title": "Hdruk213", + "type": "object" +} \ No newline at end of file diff --git a/hdr_schemata/models/HDRUK/__init__.py b/hdr_schemata/models/HDRUK/__init__.py index 043ec4e..aa7f085 100644 --- a/hdr_schemata/models/HDRUK/__init__.py +++ b/hdr_schemata/models/HDRUK/__init__.py @@ -1 +1,2 @@ from .v2_1_2 import Hdruk212 +from .v2_1_3 import Hdruk213 diff --git a/hdr_schemata/models/HDRUK/test.py b/hdr_schemata/models/HDRUK/test.py index 1244055..d4c81b1 100644 --- a/hdr_schemata/models/HDRUK/test.py +++ b/hdr_schemata/models/HDRUK/test.py @@ -1,5 +1,7 @@ from pydantic import ValidationError import v2_1_2 +import v2_1_3 import json v2_1_2.Hdruk212.save_schema() +v2_1_3.Hdruk213.save_schema() diff --git a/hdr_schemata/models/HDRUK/v2_1_3/Provenance.py b/hdr_schemata/models/HDRUK/v2_1_3/Provenance.py new file mode 100644 index 0000000..d2b2705 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_1_3/Provenance.py @@ -0,0 +1,6 @@ +from hdr_schemata.models.HDRUK.base import Provenance as BaseProvenance +from .Temporal import Temporal + + +class Provenance(BaseProvenance): + temporal: Temporal diff --git a/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py new file mode 100644 index 0000000..1d36499 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py @@ -0,0 +1,11 @@ +from pydantic import Field +from hdr_schemata.definitions.HDRUK import Periodicity +from hdr_schemata.models.HDRUK.base import Temporal as TemporalBase + + +class Temporal(TemporalBase): + publishingFrequency: Periodicity = Field( + ..., + description="Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + title="Publishing Frequency", + ) diff --git a/hdr_schemata/models/HDRUK/v2_1_3/__init__.py b/hdr_schemata/models/HDRUK/v2_1_3/__init__.py new file mode 100644 index 0000000..920ce39 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_1_3/__init__.py @@ -0,0 +1,9 @@ +from hdr_schemata.models.HDRUK.v2_1_2 import Hdruk212 +import json + + +class Hdruk213(Hdruk212): + @classmethod + def save_schema(cls, location="./2.1.3/schema.json"): + with open(location, "w") as f: + json.dump(cls.model_json_schema(), f, indent=6) From d477033f28dd5925f7d486397ef27503f6a614f8 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 8 Jan 2024 15:06:59 +0000 Subject: [PATCH 02/10] add 2.1.3 --- available.json | 1 + 1 file changed, 1 insertion(+) diff --git a/available.json b/available.json index 6f9ae69..951e1ea 100644 --- a/available.json +++ b/available.json @@ -1,6 +1,7 @@ { "HDRUK": [ "2.1.2", + "2.1.3", "2.1.0", "2.0.2" ], From 1be6f854c5d8e2bc815d9e6ad4c229d293928e83 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 8 Jan 2024 15:13:37 +0000 Subject: [PATCH 03/10] didnt register changes --- hdr_schemata/models/HDRUK/v2_1_3/__init__.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/hdr_schemata/models/HDRUK/v2_1_3/__init__.py b/hdr_schemata/models/HDRUK/v2_1_3/__init__.py index 920ce39..073749f 100644 --- a/hdr_schemata/models/HDRUK/v2_1_3/__init__.py +++ b/hdr_schemata/models/HDRUK/v2_1_3/__init__.py @@ -1,8 +1,17 @@ from hdr_schemata.models.HDRUK.v2_1_2 import Hdruk212 import json +from typing import Optional +from pydantic import Field +from .Provenance import Provenance class Hdruk213(Hdruk212): + provenance: Optional[Provenance] = Field( + None, + description="Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", + title="Provenance", + ) + @classmethod def save_schema(cls, location="./2.1.3/schema.json"): with open(location, "w") as f: From 22ce473bd54ce0c1b82c240bd2e51a3d5e3743b0 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 8 Jan 2024 15:32:11 +0000 Subject: [PATCH 04/10] changes --- hdr_schemata/models/HDRUK/2.1.3/schema.json | 245 +------------------ hdr_schemata/models/HDRUK/v2_1_3/Temporal.py | 11 +- 2 files changed, 13 insertions(+), 243 deletions(-) diff --git a/hdr_schemata/models/HDRUK/2.1.3/schema.json b/hdr_schemata/models/HDRUK/2.1.3/schema.json index 9718b60..e3617db 100644 --- a/hdr_schemata/models/HDRUK/2.1.3/schema.json +++ b/hdr_schemata/models/HDRUK/2.1.3/schema.json @@ -620,13 +620,6 @@ ], "title": "EmailAddress" }, - "EndDateEnum": { - "enum": [ - "CONTINUOUS", - null - ], - "title": "EndDateEnum" - }, "EnrichmentAndLinkage": { "additionalProperties": false, "properties": { @@ -1131,6 +1124,7 @@ "type": "string" }, "Organisation": { + "additionalProperties": false, "properties": { "identifier": { "anyOf": [ @@ -1217,125 +1211,6 @@ "default": null, "description": "Please indicate if the organisation is an Alliance Member or a Hub.", "title": "Organisation Membership" - }, - "accessRights": { - "anyOf": [ - { - "$ref": "#/$defs/Url" - }, - { - "items": { - "anyOf": [ - { - "$ref": "#/$defs/Url" - }, - { - "type": "null" - } - ] - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The URL of a webpage where the data access request process and/or guidance is provided. If there is more than one access process i.e. industry vs academic please provide both.", - "title": "Organisation Default Access Rights" - }, - "deliveryLeadTime": { - "anyOf": [ - { - "$ref": "#/$defs/DeliveryLeadTime" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Please provide an indication of the typical processing times based on the types of requests typically received. Note: This value will be used as default access request duration for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", - "title": "Access Request Duration" - }, - "accessService": { - "anyOf": [ - { - "$ref": "#/$defs/LongDescription" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Please provide a brief description of the data access services that are available including: environment that is currently available to researchers;additional consultancy and services;any indication of costs associated. If no environment is currently available, please indicate the current plans and timelines when and how data will be made available to researchers Note: This value will be used as default access environment for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", - "examples": [ - "https://cnfl.extge.co.uk/display/GERE/Research+Environment+User+Guide" - ], - "title": "Organisation Access Service" - }, - "accessRequestCost": { - "anyOf": [ - { - "$ref": "#/$defs/ShortDescription" - }, - { - "items": { - "anyOf": [ - { - "$ref": "#/$defs/Url" - }, - { - "type": "null" - } - ] - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Please provide link(s) to a webpage or a short description detailing the commercial model for processing data access requests for the organisation (if available) Definition: Indication of commercial model or cost (in GBP) for processing each data access request by the data custodian.", - "title": "Organisation Access Request Cost" - }, - "dataUseLimitation": { - "anyOf": [ - { - "$ref": "#/$defs/CommaSeparatedValues" - }, - { - "items": { - "$ref": "#/$defs/DataUseLimitation" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Please provide an indication of consent permissions for datasets and/or materials, and relates to the purposes for which datasets and/or material might be removed, stored or used. Notes: where there are existing data-sharing arrangements such as the HDR UK HUB data sharing agreement or the NIHR HIC data sharing agreement this should be indicated within access rights. This value will be used as terms for all datasets submitted by the organisation. However, there will be the opportunity to overwrite this value for each dataset.", - "title": "Data Use Limitation" - }, - "dataUseRequirements": { - "anyOf": [ - { - "$ref": "#/$defs/CommaSeparatedValues" - }, - { - "items": { - "$ref": "#/$defs/DataUseRequirements" - }, - "type": "array" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Please indicate fit here are any additional conditions set for use if any, multiple requirements may be provided. Please ensure that these restrictions are documented in access rights information.", - "title": "Data Use Requirements" } }, "required": [ @@ -1409,26 +1284,6 @@ "title": "Origin", "type": "object" }, - "Periodicity": { - "enum": [ - "STATIC", - "IRREGULAR", - "CONTINUOUS", - "BIENNIAL", - "ANNUAL", - "BIANNUAL", - "QUARTERLY", - "BIMONTHLY", - "MONTHLY", - "BIWEEKLY", - "WEEKLY", - "SEMIWEEKLY", - "DAILY", - "OTHER", - null - ], - "title": "Periodicity" - }, "Provenance": { "additionalProperties": false, "properties": { @@ -1723,106 +1578,16 @@ "type": "object" }, "Temporal": { - "additionalProperties": false, "properties": { - "accrualPeriodicity": { - "allOf": [ - { - "$ref": "#/$defs/Periodicity" - } - ], - "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/" - }, - "distributionReleaseDate": { - "anyOf": [ - { - "format": "date", - "type": "string" - }, - { - "format": "date-time", - "type": "string" - }, - { - "type": "null" - } - ], - "default": null, - "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", - "title": "Release Date" - }, - "startDate": { - "anyOf": [ - { - "format": "date", - "type": "string" - }, - { - "format": "date-time", - "type": "string" - }, - { - "type": "null" - } - ], - "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", - "title": "Start Date" - }, - "endDate": { - "anyOf": [ - { - "format": "date", - "type": "string" - }, - { - "format": "date-time", - "type": "string" - }, - { - "$ref": "#/$defs/EndDateEnum" - }, - { - "type": "null" - } - ], - "default": null, - "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", - "title": "End Date" - }, - "timeLag": { - "allOf": [ - { - "$ref": "#/$defs/TimeLag" - } - ], - "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", - "title": "Time Lag" + "test": { + "default": "blah", + "title": "Test", + "type": "string" } }, - "required": [ - "accrualPeriodicity", - "startDate", - "timeLag" - ], "title": "Temporal", "type": "object" }, - "TimeLag": { - "enum": [ - "LESS 1 WEEK", - "1-2 WEEKS", - "2-4 WEEKS", - "1-2 MONTHS", - "2-6 MONTHS", - "MORE 6 MONTHS", - "VARIABLE", - "NO TIMELAG", - "NOT APPLICABLE", - "OTHER", - null - ], - "title": "TimeLag" - }, "Url": { "anyOf": [ { diff --git a/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py index 1d36499..65f184e 100644 --- a/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py +++ b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py @@ -1,11 +1,16 @@ from pydantic import Field from hdr_schemata.definitions.HDRUK import Periodicity -from hdr_schemata.models.HDRUK.base import Temporal as TemporalBase +from hdr_schemata.models.HDRUK.base import Temporal as BaseTemporal +from hdr_schemata.models.HDRUK.base import Coverage as BaseCoverage +from pydantic import BaseModel, Field +print(BaseTemporal) +print(BaseCoverage) -class Temporal(TemporalBase): + +class Temporal(BaseTemporal): publishingFrequency: Periodicity = Field( ..., - description="Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", title="Publishing Frequency", + description="Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", ) From c5a48b7b656f77aa800eb67e89dbfea74a8f3913 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 8 Jan 2024 15:37:32 +0000 Subject: [PATCH 05/10] fix code --- hdr_schemata/models/HDRUK/v2_1_3/Temporal.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py index 65f184e..9abc6a5 100644 --- a/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py +++ b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py @@ -1,12 +1,8 @@ from pydantic import Field from hdr_schemata.definitions.HDRUK import Periodicity -from hdr_schemata.models.HDRUK.base import Temporal as BaseTemporal -from hdr_schemata.models.HDRUK.base import Coverage as BaseCoverage +from hdr_schemata.models.HDRUK.base.Temporal import Temporal as BaseTemporal from pydantic import BaseModel, Field -print(BaseTemporal) -print(BaseCoverage) - class Temporal(BaseTemporal): publishingFrequency: Periodicity = Field( From f5432d09031ae4a283d978317b4088098624b05c Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 8 Jan 2024 15:37:48 +0000 Subject: [PATCH 06/10] add new schema --- hdr_schemata/models/HDRUK/2.1.3/schema.json | 135 +++++++++++++++++++- 1 file changed, 131 insertions(+), 4 deletions(-) diff --git a/hdr_schemata/models/HDRUK/2.1.3/schema.json b/hdr_schemata/models/HDRUK/2.1.3/schema.json index e3617db..1d629d0 100644 --- a/hdr_schemata/models/HDRUK/2.1.3/schema.json +++ b/hdr_schemata/models/HDRUK/2.1.3/schema.json @@ -620,6 +620,13 @@ ], "title": "EmailAddress" }, + "EndDateEnum": { + "enum": [ + "CONTINUOUS", + null + ], + "title": "EndDateEnum" + }, "EnrichmentAndLinkage": { "additionalProperties": false, "properties": { @@ -1284,6 +1291,26 @@ "title": "Origin", "type": "object" }, + "Periodicity": { + "enum": [ + "STATIC", + "IRREGULAR", + "CONTINUOUS", + "BIENNIAL", + "ANNUAL", + "BIANNUAL", + "QUARTERLY", + "BIMONTHLY", + "MONTHLY", + "BIWEEKLY", + "WEEKLY", + "SEMIWEEKLY", + "DAILY", + "OTHER", + null + ], + "title": "Periodicity" + }, "Provenance": { "additionalProperties": false, "properties": { @@ -1578,16 +1605,116 @@ "type": "object" }, "Temporal": { + "additionalProperties": false, "properties": { - "test": { - "default": "blah", - "title": "Test", - "type": "string" + "accrualPeriodicity": { + "allOf": [ + { + "$ref": "#/$defs/Periodicity" + } + ], + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/" + }, + "distributionReleaseDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "title": "Release Date" + }, + "startDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "title": "Start Date" + }, + "endDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "$ref": "#/$defs/EndDateEnum" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", + "title": "End Date" + }, + "timeLag": { + "allOf": [ + { + "$ref": "#/$defs/TimeLag" + } + ], + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "title": "Time Lag" + }, + "publishingFrequency": { + "allOf": [ + { + "$ref": "#/$defs/Periodicity" + } + ], + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "title": "Publishing Frequency" } }, + "required": [ + "accrualPeriodicity", + "startDate", + "timeLag", + "publishingFrequency" + ], "title": "Temporal", "type": "object" }, + "TimeLag": { + "enum": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NO TIMELAG", + "NOT APPLICABLE", + "OTHER", + null + ], + "title": "TimeLag" + }, "Url": { "anyOf": [ { From bdbc0b18f21877f199bd7c5d7cda048cbbb8e9f4 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 8 Jan 2024 15:39:09 +0000 Subject: [PATCH 07/10] update --- hdr_schemata/models/HDRUK/latest/dev/schema.json | 14 ++++++++++++-- hdr_schemata/models/HDRUK/test.py | 1 + 2 files changed, 13 insertions(+), 2 deletions(-) diff --git a/hdr_schemata/models/HDRUK/latest/dev/schema.json b/hdr_schemata/models/HDRUK/latest/dev/schema.json index 798be21..1d629d0 100644 --- a/hdr_schemata/models/HDRUK/latest/dev/schema.json +++ b/hdr_schemata/models/HDRUK/latest/dev/schema.json @@ -1679,12 +1679,22 @@ ], "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", "title": "Time Lag" + }, + "publishingFrequency": { + "allOf": [ + { + "$ref": "#/$defs/Periodicity" + } + ], + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "title": "Publishing Frequency" } }, "required": [ "accrualPeriodicity", "startDate", - "timeLag" + "timeLag", + "publishingFrequency" ], "title": "Temporal", "type": "object" @@ -2009,6 +2019,6 @@ "accessibility", "observations" ], - "title": "Hdruk212", + "title": "Hdruk213", "type": "object" } \ No newline at end of file diff --git a/hdr_schemata/models/HDRUK/test.py b/hdr_schemata/models/HDRUK/test.py index d4c81b1..67d69b0 100644 --- a/hdr_schemata/models/HDRUK/test.py +++ b/hdr_schemata/models/HDRUK/test.py @@ -5,3 +5,4 @@ v2_1_2.Hdruk212.save_schema() v2_1_3.Hdruk213.save_schema() +v2_1_3.Hdruk213.save_schema("latest/dev/schema.json") From 5da5f0b1e5a827c8c9dffcde84f2ac05967f27df Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Mon, 8 Jan 2024 15:44:35 +0000 Subject: [PATCH 08/10] include removal of accrualPeriodicity --- hdr_schemata/models/HDRUK/2.1.3/schema.json | 9 --------- .../models/HDRUK/{test.py => create_json_schema.py} | 0 hdr_schemata/models/HDRUK/latest/dev/schema.json | 9 --------- hdr_schemata/models/HDRUK/v2_1_3/Temporal.py | 4 ++++ 4 files changed, 4 insertions(+), 18 deletions(-) rename hdr_schemata/models/HDRUK/{test.py => create_json_schema.py} (100%) diff --git a/hdr_schemata/models/HDRUK/2.1.3/schema.json b/hdr_schemata/models/HDRUK/2.1.3/schema.json index 1d629d0..23a49a1 100644 --- a/hdr_schemata/models/HDRUK/2.1.3/schema.json +++ b/hdr_schemata/models/HDRUK/2.1.3/schema.json @@ -1607,14 +1607,6 @@ "Temporal": { "additionalProperties": false, "properties": { - "accrualPeriodicity": { - "allOf": [ - { - "$ref": "#/$defs/Periodicity" - } - ], - "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/" - }, "distributionReleaseDate": { "anyOf": [ { @@ -1691,7 +1683,6 @@ } }, "required": [ - "accrualPeriodicity", "startDate", "timeLag", "publishingFrequency" diff --git a/hdr_schemata/models/HDRUK/test.py b/hdr_schemata/models/HDRUK/create_json_schema.py similarity index 100% rename from hdr_schemata/models/HDRUK/test.py rename to hdr_schemata/models/HDRUK/create_json_schema.py diff --git a/hdr_schemata/models/HDRUK/latest/dev/schema.json b/hdr_schemata/models/HDRUK/latest/dev/schema.json index 1d629d0..23a49a1 100644 --- a/hdr_schemata/models/HDRUK/latest/dev/schema.json +++ b/hdr_schemata/models/HDRUK/latest/dev/schema.json @@ -1607,14 +1607,6 @@ "Temporal": { "additionalProperties": false, "properties": { - "accrualPeriodicity": { - "allOf": [ - { - "$ref": "#/$defs/Periodicity" - } - ], - "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/" - }, "distributionReleaseDate": { "anyOf": [ { @@ -1691,7 +1683,6 @@ } }, "required": [ - "accrualPeriodicity", "startDate", "timeLag", "publishingFrequency" diff --git a/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py index 9abc6a5..1b0aee5 100644 --- a/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py +++ b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py @@ -2,6 +2,7 @@ from hdr_schemata.definitions.HDRUK import Periodicity from hdr_schemata.models.HDRUK.base.Temporal import Temporal as BaseTemporal from pydantic import BaseModel, Field +from hdr_schemata.models import remove_fields_from_cls class Temporal(BaseTemporal): @@ -10,3 +11,6 @@ class Temporal(BaseTemporal): title="Publishing Frequency", description="Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", ) + + +remove_fields_from_cls(Temporal, ["accrualPeriodicity"]) From 4575f4a7935be3593c62e28ea80ed6a6ad74444d Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Tue, 9 Jan 2024 11:47:24 +0000 Subject: [PATCH 09/10] rename base --> 2.1.2 --- hdr_schemata/models/HDRUK/v2_1_2.py | 8 -------- hdr_schemata/models/HDRUK/{base => v2_1_2}/Access.py | 0 .../models/HDRUK/{base => v2_1_2}/Accessibility.py | 0 hdr_schemata/models/HDRUK/{base => v2_1_2}/Coverage.py | 0 .../models/HDRUK/{base => v2_1_2}/DataClass.py | 0 .../models/HDRUK/{base => v2_1_2}/DataElement.py | 0 .../models/HDRUK/{base => v2_1_2}/Documentation.py | 0 .../HDRUK/{base => v2_1_2}/EnrichmentAndLinkage.py | 0 .../HDRUK/{base => v2_1_2}/FormatAndStandards.py | 0 .../models/HDRUK/{base => v2_1_2}/Observations.py | 0 .../models/HDRUK/{base => v2_1_2}/Organisation.py | 0 hdr_schemata/models/HDRUK/{base => v2_1_2}/Origin.py | 0 .../models/HDRUK/{base => v2_1_2}/Provenance.py | 0 hdr_schemata/models/HDRUK/{base => v2_1_2}/Revision.py | 0 hdr_schemata/models/HDRUK/{base => v2_1_2}/Summary.py | 0 hdr_schemata/models/HDRUK/{base => v2_1_2}/Temporal.py | 0 hdr_schemata/models/HDRUK/{base => v2_1_2}/Usage.py | 0 hdr_schemata/models/HDRUK/{base => v2_1_2}/__init__.py | 10 ++++++++-- hdr_schemata/models/HDRUK/v2_1_3/Provenance.py | 2 +- hdr_schemata/models/HDRUK/v2_1_3/Temporal.py | 2 +- 20 files changed, 10 insertions(+), 12 deletions(-) delete mode 100644 hdr_schemata/models/HDRUK/v2_1_2.py rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Access.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Accessibility.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Coverage.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/DataClass.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/DataElement.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Documentation.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/EnrichmentAndLinkage.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/FormatAndStandards.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Observations.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Organisation.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Origin.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Provenance.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Revision.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Summary.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Temporal.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/Usage.py (100%) rename hdr_schemata/models/HDRUK/{base => v2_1_2}/__init__.py (95%) diff --git a/hdr_schemata/models/HDRUK/v2_1_2.py b/hdr_schemata/models/HDRUK/v2_1_2.py deleted file mode 100644 index 9c83e84..0000000 --- a/hdr_schemata/models/HDRUK/v2_1_2.py +++ /dev/null @@ -1,8 +0,0 @@ -from hdr_schemata.models.HDRUK.base import HdrukBaseModel -import json - -class Hdruk212(HdrukBaseModel): - @classmethod - def save_schema(cls,location='./2.1.2/schema.json'): - with open(location,'w') as f: - json.dump(cls.model_json_schema(),f,indent=6) diff --git a/hdr_schemata/models/HDRUK/base/Access.py b/hdr_schemata/models/HDRUK/v2_1_2/Access.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Access.py rename to hdr_schemata/models/HDRUK/v2_1_2/Access.py diff --git a/hdr_schemata/models/HDRUK/base/Accessibility.py b/hdr_schemata/models/HDRUK/v2_1_2/Accessibility.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Accessibility.py rename to hdr_schemata/models/HDRUK/v2_1_2/Accessibility.py diff --git a/hdr_schemata/models/HDRUK/base/Coverage.py b/hdr_schemata/models/HDRUK/v2_1_2/Coverage.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Coverage.py rename to hdr_schemata/models/HDRUK/v2_1_2/Coverage.py diff --git a/hdr_schemata/models/HDRUK/base/DataClass.py b/hdr_schemata/models/HDRUK/v2_1_2/DataClass.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/DataClass.py rename to hdr_schemata/models/HDRUK/v2_1_2/DataClass.py diff --git a/hdr_schemata/models/HDRUK/base/DataElement.py b/hdr_schemata/models/HDRUK/v2_1_2/DataElement.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/DataElement.py rename to hdr_schemata/models/HDRUK/v2_1_2/DataElement.py diff --git a/hdr_schemata/models/HDRUK/base/Documentation.py b/hdr_schemata/models/HDRUK/v2_1_2/Documentation.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Documentation.py rename to hdr_schemata/models/HDRUK/v2_1_2/Documentation.py diff --git a/hdr_schemata/models/HDRUK/base/EnrichmentAndLinkage.py b/hdr_schemata/models/HDRUK/v2_1_2/EnrichmentAndLinkage.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/EnrichmentAndLinkage.py rename to hdr_schemata/models/HDRUK/v2_1_2/EnrichmentAndLinkage.py diff --git a/hdr_schemata/models/HDRUK/base/FormatAndStandards.py b/hdr_schemata/models/HDRUK/v2_1_2/FormatAndStandards.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/FormatAndStandards.py rename to hdr_schemata/models/HDRUK/v2_1_2/FormatAndStandards.py diff --git a/hdr_schemata/models/HDRUK/base/Observations.py b/hdr_schemata/models/HDRUK/v2_1_2/Observations.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Observations.py rename to hdr_schemata/models/HDRUK/v2_1_2/Observations.py diff --git a/hdr_schemata/models/HDRUK/base/Organisation.py b/hdr_schemata/models/HDRUK/v2_1_2/Organisation.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Organisation.py rename to hdr_schemata/models/HDRUK/v2_1_2/Organisation.py diff --git a/hdr_schemata/models/HDRUK/base/Origin.py b/hdr_schemata/models/HDRUK/v2_1_2/Origin.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Origin.py rename to hdr_schemata/models/HDRUK/v2_1_2/Origin.py diff --git a/hdr_schemata/models/HDRUK/base/Provenance.py b/hdr_schemata/models/HDRUK/v2_1_2/Provenance.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Provenance.py rename to hdr_schemata/models/HDRUK/v2_1_2/Provenance.py diff --git a/hdr_schemata/models/HDRUK/base/Revision.py b/hdr_schemata/models/HDRUK/v2_1_2/Revision.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Revision.py rename to hdr_schemata/models/HDRUK/v2_1_2/Revision.py diff --git a/hdr_schemata/models/HDRUK/base/Summary.py b/hdr_schemata/models/HDRUK/v2_1_2/Summary.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Summary.py rename to hdr_schemata/models/HDRUK/v2_1_2/Summary.py diff --git a/hdr_schemata/models/HDRUK/base/Temporal.py b/hdr_schemata/models/HDRUK/v2_1_2/Temporal.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Temporal.py rename to hdr_schemata/models/HDRUK/v2_1_2/Temporal.py diff --git a/hdr_schemata/models/HDRUK/base/Usage.py b/hdr_schemata/models/HDRUK/v2_1_2/Usage.py similarity index 100% rename from hdr_schemata/models/HDRUK/base/Usage.py rename to hdr_schemata/models/HDRUK/v2_1_2/Usage.py diff --git a/hdr_schemata/models/HDRUK/base/__init__.py b/hdr_schemata/models/HDRUK/v2_1_2/__init__.py similarity index 95% rename from hdr_schemata/models/HDRUK/base/__init__.py rename to hdr_schemata/models/HDRUK/v2_1_2/__init__.py index df80854..44ffbc5 100644 --- a/hdr_schemata/models/HDRUK/base/__init__.py +++ b/hdr_schemata/models/HDRUK/v2_1_2/__init__.py @@ -1,7 +1,7 @@ +import json from datetime import date, datetime from enum import Enum from typing import List, Optional, Union - from pydantic import AnyUrl, BaseModel, EmailStr, Field, constr from hdr_schemata.definitions.HDRUK import * @@ -17,7 +17,8 @@ from .DataClass import DataClass from .DataElement import DataElement -class HdrukBaseModel(BaseModel): + +class Hdruk212(BaseModel): class Config: extra = 'forbid' @@ -91,3 +92,8 @@ class Config: description='Descriptions of all tables and data elements that can be included in the dataset', title='Structural Metadata', ) + + @classmethod + def save_schema(cls,location='./2.1.2/schema.json'): + with open(location,'w') as f: + json.dump(cls.model_json_schema(),f,indent=6) diff --git a/hdr_schemata/models/HDRUK/v2_1_3/Provenance.py b/hdr_schemata/models/HDRUK/v2_1_3/Provenance.py index d2b2705..a295187 100644 --- a/hdr_schemata/models/HDRUK/v2_1_3/Provenance.py +++ b/hdr_schemata/models/HDRUK/v2_1_3/Provenance.py @@ -1,4 +1,4 @@ -from hdr_schemata.models.HDRUK.base import Provenance as BaseProvenance +from hdr_schemata.models.HDRUK.v2_1_2 import Provenance as BaseProvenance from .Temporal import Temporal diff --git a/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py index 1b0aee5..958c5b4 100644 --- a/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py +++ b/hdr_schemata/models/HDRUK/v2_1_3/Temporal.py @@ -1,6 +1,6 @@ from pydantic import Field from hdr_schemata.definitions.HDRUK import Periodicity -from hdr_schemata.models.HDRUK.base.Temporal import Temporal as BaseTemporal +from hdr_schemata.models.HDRUK.v2_1_2.Temporal import Temporal as BaseTemporal from pydantic import BaseModel, Field from hdr_schemata.models import remove_fields_from_cls From 7dc1918b41957f97d3e71d40da0d12a751b94512 Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Tue, 9 Jan 2024 11:48:56 +0000 Subject: [PATCH 10/10] update printing available stuff --- hdr_schemata/utils/print_structure.py | 1 + 1 file changed, 1 insertion(+) diff --git a/hdr_schemata/utils/print_structure.py b/hdr_schemata/utils/print_structure.py index 186445c..f6985e4 100644 --- a/hdr_schemata/utils/print_structure.py +++ b/hdr_schemata/utils/print_structure.py @@ -4,6 +4,7 @@ structure = {} for schema in glob.glob('hdr_schemata/models/**/schema.json',recursive=True): items = schema.split('/') + if len(items) != 5: continue model = items[2] version = items[3] if model not in structure: