diff --git a/available.json b/available.json index 8e1a78f..de1c37a 100644 --- a/available.json +++ b/available.json @@ -4,11 +4,13 @@ "2.1.3", "2.1.0", "2.0.2", + "2.2.1", "2.2.0" ], "GWDM": [ "1.0", - "1.1" + "1.1", + "1.2" ], "SchemaOrg": [ "default", diff --git a/docs/GWDM/1.2.md b/docs/GWDM/1.2.md new file mode 100644 index 0000000..731b476 --- /dev/null +++ b/docs/GWDM/1.2.md @@ -0,0 +1,1335 @@ + +## required + +required metadata needed for the GWDM + + + + + + +### gatewayId + +Need a field in Mauro that captures the datasetID to link to gateway database - or can we just use the one created in Mauro? + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:--------| +| Gatewayid | False | False | True | ['str'] | + + + + +### gatewayPid + +Need a field in Mauro that captures the dataset pid to link to gateway database + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:--------| +| Gatewaypid | False | False | True | ['str'] | + + + + +### issued + +Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different? + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------| +| Issued | False | False | True | ['datetime'] | + + + + +### modified + +Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different? + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-------------| +| Modified | False | False | True | ['datetime'] | + + + + +### revisions + +None + + + + + + +#### version + +Version number used for previous version of this dataset + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:--------| +| revision version | False | False | True | ['str'] | + + + + +#### url + +Some url with a reference to the record of a previous version of this dataset + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------| +| revision url | False | False | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +### version + +Dataset metadata version + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:--------| +| Dataset Version | False | False | True | ['str'] | + +Examples: + + * 1.1.0 + + +## summary + +Summary of metadata describing key pieces of information. + + + + + + +### title + +The main title of the dataset + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------| +| Title | False | False | True | ["TwoHundredFiftyFiveCharacters[{'maxLength': 255, 'minLength': 2, 'type': 'string'}]"] | + + + + +### shortTitle + +A shorter descriptive title of the dataset + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------| +| Shorttitle | False | True | True | ["ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### doiName + +DOI associated to this dataset + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------| +| Doiname | False | True | True | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### abstract + +Longer abstract detailing the dataset. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------| +| Abstract | False | False | True | ["LongAbstractText[{'anyOf': [{'maxLength': 5000, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +### keywords + +Comma separated key words associated to this dataset. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### controlledKeywords + +Keywords that have been filtered and limited + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Controlled Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### contactPoint + +email of a person who can be the main contact point of this dataset + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:---------------------| +| Contact Point | False | True | True | ['EmailStr', 'null'] | + + + + +### datasetType + +What type of dataset is this? + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Dataset type | False | True | True | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### description + +Longer description of the dataset in detail + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Description | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### publisher + +Link to details about the publisher of this dataset + + + + + + +#### name + +The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/ + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------| +| Name | False | True | True | ['Name[{}]', 'null'] | + + + + +#### gatewayId + +The link to an ID somewhere in the gateway where more information on the publisher can be retrieved. + +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:----------------| +| Publisher gateway id | False | True | False | ['str', 'null'] | + + + + +#### rorId + +The Research Organization Registry (ROR) for the organisation, if applicable + +| title | is_list | is_optional | required | type | +|:------------------------------------------|:----------|:--------------|:-----------|:----------------| +| Research Organization Registry Identifier | False | True | False | ['str', 'null'] | + + + + +### populationSize + +Summary population size of the cohort + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------| +| Population size | False | True | False | ['int', 'null'] | + + + + +### datasetSubType + +What us the subtype for this dataset? + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Dataset sub type | False | True | False | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +## coverage + +Observational, Spatial and Temporal coverage + + + + + + +### spatial + +List of countries where the data was taken from + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Spatial | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### pathway + +Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Pathway | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### followup + +What is the typical time span that a patient appears in the dataset (follow up period) + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------| +| Followup | False | True | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | + + + + +### typicalAgeRange + +Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Typical Age Range | False | True | False | ["AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### gender + +Male, Female, Other + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Gender | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### biologicalsamples + +Blood, Saliva, Urine, Other + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Biological Samples | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### psychological + +Mental health, Cognitive function + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Psychological | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### physical + +Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Physical | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### anthropometric + +Height, Weight, Waist circumference, Hip circumference, Blood pressure + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Anthropometric | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### lifestyle + +Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Lifestyle | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### socioeconomic + +Occupation, Family circumstances, Housing, Education, Ethnic group, Marital status, Social support + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Socio-economic | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +## provenance + +Provenance information + + + + + + +### origin + +None + + + + + + +#### purpose + +Indicates the purpose(s) that the dataset was collected. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Purpose | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### source + +Indicates the source of the data extraction + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Source | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### collectionSituation + +Indicate the setting(s) where data was collected. Multiple settings may be provided + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Setting | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### temporal + +None + + + + + + +#### startDate + +The start of the time period that the dataset provides coverage for + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-----------------------------| +| Start Date | False | True | True | ['date', 'datetime', 'null'] | + + + + +#### endDate + +The end of the time period that the dataset provides coverage for + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------| +| End Date | False | True | False | ['date', 'datetime', 'null'] | + + + + +#### timeLag + +Typical time-lag between an event and the data for that event appearing in the dataset + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | False | False | True | ["TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]"] | + + + + +#### accrualPeriodicity + +frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Periodicity | False | False | True | ["Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]"] | + + + + +#### distributionReleaseDate + +Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-----------------------------| +| Release Date | False | True | False | ['date', 'datetime', 'null'] | + + + + +## accessibility + +Accessibility information. + + + + + + +### usage + +This section includes information about how the data can be used and how it is currently being used + + + + + + +#### dataUseLimitation + +Any restrictions to its usage + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Limitation | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataUseRequirement + +Any requirements needed for data usage + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Requirements | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### resourceCreator + +Who has created this resource + + + + + + +##### name + +The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/ + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------| +| Name | False | True | True | ['Name[{}]', 'null'] | + + + + +##### gatewayId + +The link to an ID somewhere in the gateway where more information on the publisher can be retrieved. + +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:----------------| +| Publisher gateway id | False | True | False | ['str', 'null'] | + + + + +##### rorId + +The Research Organization Registry (ROR) for the organisation, if applicable + +| title | is_list | is_optional | required | type | +|:------------------------------------------|:----------|:--------------|:-----------|:----------------| +| Research Organization Registry Identifier | False | True | False | ['str', 'null'] | + + + + +### access + +This section includes information about data access + + + + + + +#### accessRights + +Optional link(s) or a description of where the license associated to accessing this dataset + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Access Rights | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### accessService + + + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Service | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### accessRequestCost + + + +| title | is_list | is_optional | required | type | +|:---------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Organisation Access Request Cost | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### deliveryLeadTime + +An arbitrary guess at the time to gain access to the dataset... + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------| +| Access Request Duration | False | True | False | ["DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", 'null'] | + + + + +#### jurisdiction + +Comma separated country codes of where the data jurisdiction is. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Jurisdiction | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataController + +Name of the data controller + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataProcessor + +Name of the data processors + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Processor | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### accessServiceCategory + +Where access to data come from: TRE/SED, direct access, open acccess, varies based on project. + +| title | is_list | is_optional | required | type | +|:--------------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Access/governance requirementss | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### formatAndStandards + +Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset. + + + + + + +#### vocabularyEncodingSchemes + +Code value of the ontology vocabulary encoding + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Controlled Vocabulary | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### conformsTo + +What the vocabulary conforms to. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Conforms To | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### languages + +Language code(s) of the language of the dataset metadata and underlying data is made available. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Language Code(s) | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### formats + +Format(s) the dataset can be made available in + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Dataset Format | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +## linkage + +Linkage and enrichment. + + + + + + +### isGeneratedUsing + +?? + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is Generated Using | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### associatedMedia + +Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Associated Media | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### dataUses + +?? + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Uses | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### isReferenceIn + +Rhe keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is Reference in | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### tools + +URL of any analysis tools or models that have been created for this dataset and are available for further use + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Tools | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### datasetLinkage + +Dataset Linkage copied over from + + + + + + +#### isDerivedFrom + +Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Derivations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### isPartOf + +If the dataset is part of a group or family + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is PartOf | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### isMemberOf + +Dataset is a member of XXX(?) + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Is MemberOf | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### linkedDatasets + +Links to other datasets. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Linked Datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### investigations + +Please provide the keystone paper associated with the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Investigations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### syntheticDataWebLink + +Links to locations of information and or raw downloads of synthetic data associated with this dataset + +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Synthetic Data Web Links | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +## observations + +Obsservations + + + + + + +### observedNode + +Please select one of the following statistical populations for you observation + +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------| +| Statistical Population | False | False | True | ["StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']"] | + +Examples: + + * PERSONS + + +### measuredValue + +Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:--------| +| Measured Value | False | False | True | ['int'] | + + + + +### disambiguatingDescription + +If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type. + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Disambiguating Description | False | True | False | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### observationDate + +Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:---------------------| +| Observation Date | False | False | True | ['date', 'datetime'] | + + + + +### measuredProperty + +Initially this will be defaulted to "COUNT" + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------------------| +| Measured Property | False | False | True | ['MeasuredProperty[{}]'] | + + + + +## structuralMetadata + +Descriptions of all tables and data elements that can be included in the dataset + + + + + + +### name + +The name of a table in a dataset. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:----------------| +| Table Name | False | True | True | ['str', 'null'] | + + + + +### description + +A description of a table in a dataset. + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------| +| Table Description | False | True | False | ['str', 'null'] | + + + + +### columns + +A list of columns contained within a table in a dataset. + + + + + + +#### name + +The name of a column in a table. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------| +| Column Name | False | False | True | ['Name[{}]'] | + + + + +#### dataType + +The data type of values in the column + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:--------| +| Data Type | False | False | True | ['str'] | + + + + +#### description + +A description of a column in a table. + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------| +| Column Description | False | True | False | ['str', 'null'] | + + + + +#### sensitive + +A True or False value, indicating if the field is sensitive or not + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:---------| +| Sensitive | False | False | True | ['bool'] | + + + + +#### values + +Data values contained within the column + + + + + + +##### name + +Unique value in a column . + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-------------| +| Value Name | False | False | True | ['Name[{}]'] | + + + + +##### description + +A description of a unique value in a column. + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------| +| Value Description | False | True | False | ['str', 'null'] | + + + + +##### frequency + +The frequency of occurrance of a value in a column + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------| +| Value Frequency | False | True | False | ['int', 'null'] | + + + + +## tissuesSampleCollection + +Metadata collection for Tissue Samples datasets + + + + + + +### id + +ID of the tissue sample collection + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| ID | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### dataCategories + +Data categories related to the tissue sample collection + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Categories | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### materialType + +Material type of the tissue sample collection + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Material Type | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### accessConditions + +Access conditions for the tissue sample collection + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Access Conditions | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### collectionType + +Type of the tissue sample collection + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Collection Type | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### disease + +Disease associated with the tissue sample collection + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Disease | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### storageTemperature + +Storage temperature of the tissue sample collection + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Storage Temperature | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### sampleAgeRange + +Age range of the tissue sample collection + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Sample Age Range | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### tissueSampleMetadata + +Metadata related to the tissue sample + + + + + + +#### id + +ID of the tissue sample metadata + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------| +| Metadata ID | False | True | False | ['str', 'null'] | + + + + +#### sampleDonor + +Information about the sample donor + + + + + + +##### id + +ID of the sample donor + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:----------------| +| Donor ID | False | True | False | ['str', 'null'] | + + + + +##### sex + +Sex of the sample donor + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:----------------| +| Donor Sex | False | True | False | ['str', 'null'] | + + + + +##### birthDate + +Date of birth of the sample donor + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:-----------------------------| +| Donor birth date | False | True | False | ['date', 'datetime', 'null'] | + + + + +##### dataCategories + +Data categories related to the sample donor + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Donor Data Categories | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### sampleType + +Type of the tissue sample + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Sample Type | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### storageTemperature + +Storage temperature of the tissue sample + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------| +| Storage Temperature | False | True | False | ['str', 'null'] | + + + + +#### creationDate + +Date when the tissue sample metadata was created + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------| +| Creation Date | False | True | False | ['date', 'datetime', 'null'] | + + + + +#### anatomicalSiteOntologyCode + +Ontology code for the anatomical site + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Anatomical Site Ontology Code | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### anatomicalSiteOntologyDescription + +Ontology description for the anatomical site + +| title | is_list | is_optional | required | type | +|:-------------------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Anatomical Site Ontology Description | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### anatomicalSiteFreeText + +Free text describing the anatomical site + +| title | is_list | is_optional | required | type | +|:--------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Anatomical Site Free Text | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### sampleContentDiagnosis + +Diagnosis related to the sample content + +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Sample Content Diagnosis | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### useRestrictions + +Restrictions on the use of the tissue sample + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Use Restrictions | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + diff --git a/docs/GWDM/1.2.structure.json b/docs/GWDM/1.2.structure.json new file mode 100644 index 0000000..ab03246 --- /dev/null +++ b/docs/GWDM/1.2.structure.json @@ -0,0 +1,1719 @@ +[ + { + "name": "required", + "required": true, + "title": "Required", + "description": "required metadata needed for the GWDM", + "examples": null, + "type": [ + "Required" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "gatewayId", + "required": true, + "title": "Gatewayid", + "description": "Need a field in Mauro that captures the datasetID to link to gateway database - or can we just use the one created in Mauro?", + "examples": null, + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "gatewayPid", + "required": true, + "title": "Gatewaypid", + "description": "Need a field in Mauro that captures the dataset pid to link to gateway database", + "examples": null, + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "issued", + "required": true, + "title": "Issued", + "description": "Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different?", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "modified", + "required": true, + "title": "Modified", + "description": "Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different?", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "revisions", + "required": true, + "title": "Revisions", + "description": null, + "examples": null, + "type": [ + "Revision" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "version", + "required": true, + "title": "revision version", + "description": "Version number used for previous version of this dataset", + "examples": null, + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "url", + "required": true, + "title": "revision url", + "description": "Some url with a reference to the record of a previous version of this dataset", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + } + ] + }, + { + "name": "version", + "required": true, + "title": "Dataset Version", + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + } + ] + }, + { + "name": "summary", + "required": true, + "title": "Summary", + "description": "Summary of metadata describing key pieces of information.", + "examples": null, + "type": [ + "Summary" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "title", + "required": true, + "title": "Title", + "description": "The main title of the dataset", + "examples": null, + "type": [ + "TwoHundredFiftyFiveCharacters[{'maxLength': 255, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "shortTitle", + "required": true, + "title": "Shorttitle", + "description": "A shorter descriptive title of the dataset", + "examples": null, + "type": [ + "ShortTitle[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "doiName", + "required": true, + "title": "Doiname", + "description": "DOI associated to this dataset", + "examples": null, + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "abstract", + "required": true, + "title": "Abstract", + "description": "Longer abstract detailing the dataset.", + "examples": null, + "type": [ + "LongAbstractText[{'anyOf': [{'maxLength': 5000, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "keywords", + "required": true, + "title": "Keywords", + "description": "Comma separated key words associated to this dataset.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "controlledKeywords", + "required": true, + "title": "Controlled Keywords", + "description": "Keywords that have been filtered and limited", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "contactPoint", + "required": true, + "title": "Contact Point", + "description": "email of a person who can be the main contact point of this dataset", + "examples": null, + "type": [ + "EmailStr", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "datasetType", + "required": true, + "title": "Dataset type", + "description": "What type of dataset is this?", + "examples": null, + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "description", + "required": true, + "title": "Description", + "description": "Longer description of the dataset in detail", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "publisher", + "required": true, + "title": "Publisher", + "description": "Link to details about the publisher of this dataset", + "examples": null, + "type": [ + "Organisation", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Name", + "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/", + "examples": null, + "type": [ + "Name[{}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "gatewayId", + "required": false, + "title": "Publisher gateway id", + "description": "The link to an ID somewhere in the gateway where more information on the publisher can be retrieved.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "rorId", + "required": false, + "title": "Research Organization Registry Identifier", + "description": "The Research Organization Registry (ROR) for the organisation, if applicable", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "populationSize", + "required": false, + "title": "Population size", + "description": "Summary population size of the cohort", + "examples": null, + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "datasetSubType", + "required": false, + "title": "Dataset sub type", + "description": "What us the subtype for this dataset?", + "examples": null, + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "coverage", + "required": false, + "title": "Coverage", + "description": "Observational, Spatial and Temporal coverage", + "examples": null, + "type": [ + "Coverage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "spatial", + "required": false, + "title": "Spatial", + "description": "List of countries where the data was taken from", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "pathway", + "required": false, + "title": "Pathway", + "description": "Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "followup", + "required": false, + "title": "Followup", + "description": "What is the typical time span that a patient appears in the dataset (follow up period)", + "examples": null, + "type": [ + "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "typicalAgeRange", + "required": false, + "title": "Typical Age Range", + "description": "Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "examples": null, + "type": [ + "AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "gender", + "required": false, + "title": "Gender", + "description": "Male, Female, Other", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "biologicalsamples", + "required": false, + "title": "Biological Samples", + "description": "Blood, Saliva, Urine, Other", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "psychological", + "required": false, + "title": "Psychological", + "description": "Mental health, Cognitive function", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "physical", + "required": false, + "title": "Physical", + "description": "Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "anthropometric", + "required": false, + "title": "Anthropometric", + "description": "Height, Weight, Waist circumference, Hip circumference, Blood pressure", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "lifestyle", + "required": false, + "title": "Lifestyle", + "description": "Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "socioeconomic", + "required": false, + "title": "Socio-economic", + "description": "Occupation, Family circumstances, Housing, Education, Ethnic group, Marital status, Social support", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "provenance", + "required": false, + "title": "Provenance", + "description": "Provenance information", + "examples": null, + "type": [ + "Provenance", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "origin", + "required": false, + "title": null, + "description": null, + "examples": null, + "type": [ + "Origin", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "purpose", + "required": false, + "title": "Purpose", + "description": "Indicates the purpose(s) that the dataset was collected.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "source", + "required": false, + "title": "Source", + "description": "Indicates the source of the data extraction", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "collectionSituation", + "required": false, + "title": "Setting", + "description": "Indicate the setting(s) where data was collected. Multiple settings may be provided", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "temporal", + "required": true, + "title": null, + "description": null, + "examples": null, + "type": [ + "Temporal" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "startDate", + "required": true, + "title": "Start Date", + "description": "The start of the time period that the dataset provides coverage for", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "endDate", + "required": false, + "title": "End Date", + "description": "The end of the time period that the dataset provides coverage for", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "timeLag", + "required": true, + "title": "Time Lag", + "description": "Typical time-lag between an event and the data for that event appearing in the dataset", + "examples": null, + "type": [ + "TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "accrualPeriodicity", + "required": true, + "title": "Periodicity", + "description": "frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.", + "examples": null, + "type": [ + "Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "distributionReleaseDate", + "required": false, + "title": "Release Date", + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity.", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + } + ] + }, + { + "name": "accessibility", + "required": false, + "title": "Accessibility", + "description": "Accessibility information.", + "examples": null, + "type": [ + "Accessibility" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "usage", + "required": false, + "title": "Usage", + "description": "This section includes information about how the data can be used and how it is currently being used", + "examples": null, + "type": [ + "Usage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "dataUseLimitation", + "required": true, + "title": "Data Use Limitation", + "description": "Any restrictions to its usage", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataUseRequirement", + "required": true, + "title": "Data Use Requirements", + "description": "Any requirements needed for data usage", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "resourceCreator", + "required": false, + "title": "Resource Creator", + "description": "Who has created this resource", + "examples": null, + "type": [ + "Organisation", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Name", + "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/", + "examples": null, + "type": [ + "Name[{}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "gatewayId", + "required": false, + "title": "Publisher gateway id", + "description": "The link to an ID somewhere in the gateway where more information on the publisher can be retrieved.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "rorId", + "required": false, + "title": "Research Organization Registry Identifier", + "description": "The Research Organization Registry (ROR) for the organisation, if applicable", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + } + ] + }, + { + "name": "access", + "required": true, + "title": "Access", + "description": "This section includes information about data access", + "examples": null, + "type": [ + "Access" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "accessRights", + "required": true, + "title": "Access Rights", + "description": "Optional link(s) or a description of where the license associated to accessing this dataset", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessService", + "required": false, + "title": "Access Service", + "description": "", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessRequestCost", + "required": false, + "title": "Organisation Access Request Cost", + "description": "", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "deliveryLeadTime", + "required": false, + "title": "Access Request Duration", + "description": "An arbitrary guess at the time to gain access to the dataset...", + "examples": null, + "type": [ + "DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "jurisdiction", + "required": true, + "title": "Jurisdiction", + "description": "Comma separated country codes of where the data jurisdiction is.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataController", + "required": true, + "title": "Data Controller", + "description": "Name of the data controller", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataProcessor", + "required": false, + "title": "Data Processor", + "description": "Name of the data processors", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessServiceCategory", + "required": false, + "title": "Access/governance requirementss", + "description": "Where access to data come from: TRE/SED, direct access, open acccess, varies based on project.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "formatAndStandards", + "required": false, + "title": "Format and Standards", + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "examples": null, + "type": [ + "FormatAndStandards", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "vocabularyEncodingSchemes", + "required": true, + "title": "Controlled Vocabulary", + "description": "Code value of the ontology vocabulary encoding", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "conformsTo", + "required": true, + "title": "Conforms To", + "description": "What the vocabulary conforms to.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "languages", + "required": true, + "title": "Language Code(s)", + "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "formats", + "required": true, + "title": "Dataset Format", + "description": "Format(s) the dataset can be made available in", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + } + ] + }, + { + "name": "linkage", + "required": false, + "title": "Linkage", + "description": "Linkage and enrichment.", + "examples": null, + "type": [ + "Linkage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "isGeneratedUsing", + "required": false, + "title": "Is Generated Using", + "description": "??", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "associatedMedia", + "required": false, + "title": "Associated Media", + "description": "Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataUses", + "required": false, + "title": "Data Uses", + "description": "??", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isReferenceIn", + "required": false, + "title": "Is Reference in", + "description": "Rhe keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "tools", + "required": false, + "title": "Tools", + "description": "URL of any analysis tools or models that have been created for this dataset and are available for further use", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "datasetLinkage", + "required": false, + "title": "Dataset Linkage", + "description": "Dataset Linkage copied over from", + "examples": null, + "type": [ + "DatasetLinkage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "isDerivedFrom", + "required": false, + "title": "Derivations", + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isPartOf", + "required": false, + "title": "Is PartOf", + "description": "If the dataset is part of a group or family", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isMemberOf", + "required": false, + "title": "Is MemberOf", + "description": "Dataset is a member of XXX(?)", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "linkedDatasets", + "required": false, + "title": "Linked Datasets", + "description": "Links to other datasets.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "investigations", + "required": false, + "title": "Investigations", + "description": "Please provide the keystone paper associated with the dataset.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "syntheticDataWebLink", + "required": false, + "title": "Synthetic Data Web Links", + "description": "Links to locations of information and or raw downloads of synthetic data associated with this dataset", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "observations", + "required": false, + "title": "Observations", + "description": "Obsservations", + "examples": null, + "type": [ + "Observation" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "observedNode", + "required": true, + "title": "Statistical Population", + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "type": [ + "StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredValue", + "required": true, + "title": "Measured Value", + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "disambiguatingDescription", + "required": false, + "title": "Disambiguating Description", + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "examples": null, + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "observationDate", + "required": true, + "title": "Observation Date", + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "examples": null, + "type": [ + "date", + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredProperty", + "required": true, + "title": "Measured Property", + "description": "Initially this will be defaulted to \"COUNT\"", + "examples": null, + "type": [ + "MeasuredProperty[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + } + ] + }, + { + "name": "structuralMetadata", + "required": false, + "title": "Structural Metadata", + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "examples": null, + "type": [ + "DataTable" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Table Name", + "description": "The name of a table in a dataset.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "description", + "required": false, + "title": "Table Description", + "description": "A description of a table in a dataset.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "columns", + "required": true, + "title": "Data Columns", + "description": "A list of columns contained within a table in a dataset.", + "examples": null, + "type": [ + "DataColumn" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Column Name", + "description": "The name of a column in a table.", + "examples": null, + "type": [ + "Name[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "dataType", + "required": true, + "title": "Data Type", + "description": "The data type of values in the column", + "examples": null, + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "description", + "required": false, + "title": "Column Description", + "description": "A description of a column in a table.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "sensitive", + "required": true, + "title": "Sensitive", + "description": "A True or False value, indicating if the field is sensitive or not", + "examples": null, + "type": [ + "bool" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "values", + "required": false, + "title": "Values", + "description": "Data values contained within the column", + "examples": null, + "type": [ + "DataValue" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Value Name", + "description": "Unique value in a column .", + "examples": null, + "type": [ + "Name[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "description", + "required": false, + "title": "Value Description", + "description": "A description of a unique value in a column.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "frequency", + "required": false, + "title": "Value Frequency", + "description": "The frequency of occurrance of a value in a column", + "examples": null, + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + } + ] + } + ] + }, + { + "name": "tissuesSampleCollection", + "required": false, + "title": "Tissues Sample Collection", + "description": "Metadata collection for Tissue Samples datasets", + "examples": null, + "type": [ + "TissuesSampleCollection" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "id", + "required": false, + "title": "ID", + "description": "ID of the tissue sample collection", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataCategories", + "required": false, + "title": "Data Categories", + "description": "Data categories related to the tissue sample collection", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "materialType", + "required": false, + "title": "Material Type", + "description": "Material type of the tissue sample collection", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessConditions", + "required": false, + "title": "Access Conditions", + "description": "Access conditions for the tissue sample collection", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "collectionType", + "required": false, + "title": "Collection Type", + "description": "Type of the tissue sample collection", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "disease", + "required": false, + "title": "Disease", + "description": "Disease associated with the tissue sample collection", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "storageTemperature", + "required": false, + "title": "Storage Temperature", + "description": "Storage temperature of the tissue sample collection", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "sampleAgeRange", + "required": false, + "title": "Sample Age Range", + "description": "Age range of the tissue sample collection", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "tissueSampleMetadata", + "required": false, + "title": "Tissue Sample Metadata", + "description": "Metadata related to the tissue sample", + "examples": null, + "type": [ + "TissueSampleMetadata", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "id", + "required": false, + "title": "Metadata ID", + "description": "ID of the tissue sample metadata", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "sampleDonor", + "required": false, + "title": "Sample Donor", + "description": "Information about the sample donor", + "examples": null, + "type": [ + "SampleDonor", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "id", + "required": false, + "title": "Donor ID", + "description": "ID of the sample donor", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "sex", + "required": false, + "title": "Donor Sex", + "description": "Sex of the sample donor", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "birthDate", + "required": false, + "title": "Donor birth date", + "description": "Date of birth of the sample donor", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "dataCategories", + "required": false, + "title": "Donor Data Categories", + "description": "Data categories related to the sample donor", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "sampleType", + "required": false, + "title": "Sample Type", + "description": "Type of the tissue sample", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "storageTemperature", + "required": false, + "title": "Storage Temperature", + "description": "Storage temperature of the tissue sample", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "creationDate", + "required": false, + "title": "Creation Date", + "description": "Date when the tissue sample metadata was created", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "anatomicalSiteOntologyCode", + "required": false, + "title": "Anatomical Site Ontology Code", + "description": "Ontology code for the anatomical site", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "anatomicalSiteOntologyDescription", + "required": false, + "title": "Anatomical Site Ontology Description", + "description": "Ontology description for the anatomical site", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "anatomicalSiteFreeText", + "required": false, + "title": "Anatomical Site Free Text", + "description": "Free text describing the anatomical site", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "sampleContentDiagnosis", + "required": false, + "title": "Sample Content Diagnosis", + "description": "Diagnosis related to the sample content", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "useRestrictions", + "required": false, + "title": "Use Restrictions", + "description": "Restrictions on the use of the tissue sample", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + } + ] + } +] \ No newline at end of file diff --git a/docs/HDRUK/2.2.1.md b/docs/HDRUK/2.2.1.md new file mode 100644 index 0000000..1976fc0 --- /dev/null +++ b/docs/HDRUK/2.2.1.md @@ -0,0 +1,1073 @@ + +## identifier + +System dataset identifier + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Dataset identifier | False | True | True | ["Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * ['226fb3f1-4471-400a-8c39-2b66d46a39b6', 'https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6'] + + +## version + +Dataset metadata version + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| Dataset Version | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | + +Examples: + + * 1.1.0 + + +## revisions + +Revisions of Dataset metadata + + + + + + +### version + +Semantic Version + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------| +| | False | False | True | ["Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]"] | + + + + +### url + +URL endpoint to obtain the version + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| | False | True | True | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +## issued + +Dataset Metadata Creation Date + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-------------| +| Creation Date | False | False | True | ['datetime'] | + + + + +## modified + +Dataset Metadata Creation Date + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------| +| Modification Date | False | False | True | ['datetime'] | + + + + +## summary + +Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP. + + + + + + +### title + +Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Title | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | + +Examples: + + * ['North West London COVID-19 Patient Level Situation Report'] + + +### abstract + +Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Dataset Abstract | False | True | True | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web® including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice. + + +### publisher + +This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank. + + + + + + +#### identifier + +Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the “suggest and institute” function here: https://www.grid.ac/institutes# + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Organisation Identifier | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### name + +Name of the organisation + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------| +| Organisation Name | False | False | True | ["OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]"] | + + + + +#### logo + +Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg. + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------| +| Organisation Logo | False | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### description + +Please provide a URL that describes the organisation. + +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Organisation Description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### contactPoint + +Organisation contact point(s) + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Organisation Contact Point | False | True | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.EmailAddress.EmailAddress]]', 'null'] | + + + + +#### memberOf + +Please indicate if the organisation is an Alliance Member or a Hub. + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-----------------------------------------------------| +| Organisation Membership | False | True | False | ["MemberOf['HUB','ALLIANCE','OTHER','NCS']", 'null'] | + + + + +### contactPoint + +Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose. + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------| +| Contact Point | False | True | True | ["EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * SAILDatabank@swansea.ac.uk + + +### keywords + +Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Keywords | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]', 'null'] | + + + + +### alternateIdentifiers + +Alternate dataset identifiers or local identifiers + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Alternate dataset identifiers | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]', 'null'] | + + + + +### doiName + +All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI. + +| title | is_list | is_optional | required | type | +|:--------------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------| +| Digital Object Identifier | False | True | False | ["Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * 10.3399/bjgp17X692645 + + +### datasetType + +Placeholder for dataset type + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Datasetype | False | True | True | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * [''] + + +### datasetSubType + +Placeholder for dataset sub-type + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------| +| Datasetype | False | True | True | ["DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + +Examples: + + * [''] + + +### populationSize + +Summary population size of the cohort + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:----------------| +| Population size | False | True | True | ['int', 'null'] | + + + + +## documentation + +Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media. + + + + + + +### description + +A free-text description of the record. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------| +| Description | False | True | False | ["Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### associatedMedia + +Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Associated Media | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | + +Examples: + + * PDF Document that describes study protocol + + +### isPartOf + +Please complete only if the dataset is part of a group or family + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Group | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters, hdr_schemata.definitions.HDRUK.IsPartOfEnum.IsPartOfEnum]]', 'null'] | + +Examples: + + * Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS). + + +## coverage + +Observational, Spatial and Temporal coverage + + + + + + +### spatial + +List of countries where the data was taken from + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Spatial | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### pathway + +Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Pathway | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### followup + +What is the typical time span that a patient appears in the dataset (follow up period) + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------| +| Followup | False | True | False | ["Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", 'null'] | + + + + +### typicalAgeRange + +Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] – [max age]' where both the minimum and maximum are whole numbers (integers). + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Typical Age Range | False | True | False | ["AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### gender + +Male, Female, Other + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------| +| Gender | True | True | False | ["GenderType['Male','Female','Other']"] | + + + + +### biologicalsamples + +Blood, Saliva, Urine, Other + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:-----------------------------------------------------------| +| Biological Samples | True | True | False | ["BiologicalSampleType['Blood','Other','Urine','Saliva']"] | + + + + +### psychological + +Mental health, Cognitive function + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:------------------------------------------------------------| +| Psychological | True | True | False | ["PsychologicalType['Cognitive Function','Mental Health']"] | + + + + +### physical + +Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------| +| Physical | True | True | False | ["PhysicalType['Respiratory','Vision','Hearing','Musculoskeletal','Cardiovascular','Reproductive']"] | + + + + +### anthropometric + +Height, Weight, Waist circumference, Hip circumference, Blood pressure + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------| +| Anthropometric | True | True | False | ["AnthropometricType['Blood Pressure','Hip Circumference','Height','Waist Circumference','Weight']"] | + + + + +### lifestyle + +Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------| +| Lifestyle | True | True | False | ["LifestylesType['Smoking','Dietary Habits','Physical Activity','Alcohol']"] | + + + + +### socioeconomic + +Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------| +| Socio-economic | True | True | False | ["SocioEconomicType['Finances','Family Circumstances','Housing','Education','Marital Status','Occupation','Ethnic Group','Social Support']"] | + + + + +## provenance + +Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness. + + + + + + +### origin + +None + + + + + + +#### purpose + +Pleases indicate the purpose(s) that the dataset was collected. + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Purpose | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Purpose.Purpose]', 'null'] | + + + + +#### source + +Pleases indicate the source of the data extraction + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Source | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Source.Source]', 'null'] | + + + + +#### collectionSituation + +Pleases indicate the setting(s) where data was collected. Multiple settings may be provided + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Setting | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[hdr_schemata.definitions.HDRUK.Setting.Setting]', 'null'] | + + + + +### temporal + +None + + + + + + +#### distributionReleaseDate + +Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:-----------------------------| +| Release Date | False | True | False | ['date', 'datetime', 'null'] | + + + + +#### startDate + +The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:-----------------------------| +| Start Date | False | True | True | ['date', 'datetime', 'null'] | + + + + +#### endDate + +The end of the time period that the dataset provides coverage for. If the dataset is “Continuous” and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information. + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------| +| End Date | False | True | False | ['date', 'datetime', "EndDateEnum['CONTINUOUS',null]", 'null'] | + + + + +#### timeLag + +Please indicate the typical time-lag between an event and the data for that event appearing in the dataset + +| title | is_list | is_optional | required | type | +|:---------|:----------|:--------------|:-----------|:---------------------------------------------------------------------------------------------------------------------------------------------------| +| Time Lag | False | False | True | ["TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]"] | + + + + +#### publishingFrequency + +Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or “on-demand” please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/ + +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Publishing Frequency | False | False | True | ["Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]"] | + + + + +## accessibility + +Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets. + + + + + + +### usage + +This section includes information about how the data can be used and how it is currently being used + + + + + + +#### dataUseLimitation + +Any restrictions to its usage + +| title | is_list | is_optional | required | type | +|:--------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Limitation | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataUseRequirement + +Any requirements needed for data usage + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Data Use Requirements | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### resourceCreator + +Who has created this resource + + + + + + +##### name + +The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/ + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:---------------------| +| Name | False | True | True | ['Name[{}]', 'null'] | + + + + +##### gatewayId + +The link to an ID somewhere in the gateway where more information on the publisher can be retrieved. + +| title | is_list | is_optional | required | type | +|:---------------------|:----------|:--------------|:-----------|:----------------| +| Publisher gateway id | False | True | False | ['str', 'null'] | + + + + +##### rorId + +The Research Organization Registry (ROR) for the organisation, if applicable + +| title | is_list | is_optional | required | type | +|:------------------------------------------|:----------|:--------------|:-----------|:----------------| +| Research Organization Registry Identifier | False | True | False | ['str', 'null'] | + + + + +### access + +This section includes information about data access + + + + + + +#### accessRights + +Optional link(s) or a description of where the license associated to accessing this dataset + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Access Rights | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### accessService + + + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Access Service | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### accessRequestCost + + + +| title | is_list | is_optional | required | type | +|:---------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Organisation Access Request Cost | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### deliveryLeadTime + +An arbitrary guess at the time to gain access to the dataset... + +| title | is_list | is_optional | required | type | +|:------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------| +| Access Request Duration | False | True | False | ["DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", 'null'] | + + + + +#### jurisdiction + +Comma separated country codes of where the data jurisdiction is. + +| title | is_list | is_optional | required | type | +|:-------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Jurisdiction | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataController + +Name of the data controller + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Controller | False | True | True | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### dataProcessor + +Name of the data processors + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------| +| Data Processor | False | True | False | ["LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### accessServiceCategory + +Where access to data come from: TRE/SED, direct access, open acccess, varies based on project. + +| title | is_list | is_optional | required | type | +|:--------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------| +| Access/governance requirementss | True | True | False | ["AccessService['TRE/SDE','Direct access','Open access','Varies based on project']"] | + + + + +### formatAndStandards + +Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset. + + + + + + +#### vocabularyEncodingSchemes + +Code value of the ontology vocabulary encoding + +| title | is_list | is_optional | required | type | +|:----------------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Controlled Vocabulary | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### conformsTo + +What the vocabulary conforms to. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Conforms To | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### languages + +Language code(s) of the language of the dataset metadata and underlying data is made available. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Language Code(s) | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +#### formats + +Format(s) the dataset can be made available in + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------| +| Dataset Format | False | True | True | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +## enrichmentAndLinkage + +This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers. + + + + + + +### qualifiedRelation + +If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate “ALL” and the onboarding portal will automate linkage across the datasets submitted. + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Linked Datasets | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]]', 'null'] | + + + + +### derivation + +Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Derivations | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.AbstractText.AbstractText]]', 'null'] | + + + + +### tools + +Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/ + +| title | is_list | is_optional | required | type | +|:--------|:----------|:--------------|:-----------|:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Tools | False | True | False | ["CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", 'typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]', 'null'] | + + + + +### syntheticDataWebLink + +Links to locations of information and or raw downloads of synthetic data associated with this dataset + +| title | is_list | is_optional | required | type | +|:-------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------| +| Synthetic Data Web Links | True | True | False | ["Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]"] | + + + + +## observations + +Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: “2017” + + + + + + +### observedNode + +Please select one of the following statistical populations for you observation + +| title | is_list | is_optional | required | type | +|:-----------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------| +| Statistical Population | False | False | True | ["StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']"] | + +Examples: + + * PERSONS + + +### measuredValue + +Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset. + +| title | is_list | is_optional | required | type | +|:---------------|:----------|:--------------|:-----------|:--------| +| Measured Value | False | False | True | ['int'] | + + + + +### disambiguatingDescription + +If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type. + +| title | is_list | is_optional | required | type | +|:---------------------------|:----------|:--------------|:-----------|:--------------------------------------------------------------------------------------------------------------| +| Disambiguating Description | False | True | False | ["AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### observationDate + +Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations. + +| title | is_list | is_optional | required | type | +|:-----------------|:----------|:--------------|:-----------|:---------------------| +| Observation Date | False | False | True | ['date', 'datetime'] | + + + + +### measuredProperty + +Initially this will be defaulted to "COUNT" + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:-------------------------| +| Measured Property | False | False | True | ['MeasuredProperty[{}]'] | + + + + +## structuralMetadata + +Descriptions of all tables and data elements that can be included in the dataset + + + + + + +### name + +The name of a table in a dataset. + +| title | is_list | is_optional | required | type | +|:-----------|:----------|:--------------|:-----------|:----------------| +| Table Name | False | True | True | ['str', 'null'] | + + + + +### description + +A description of a table in a dataset. + +| title | is_list | is_optional | required | type | +|:------------------|:----------|:--------------|:-----------|:----------------| +| Table Description | False | True | False | ['str', 'null'] | + + + + +### elements + +A list of data elements contained within a table in a dataset. + + + + + + +#### name + +The name of a column in a table. + +| title | is_list | is_optional | required | type | +|:------------|:----------|:--------------|:-----------|:-------------| +| Column Name | False | False | True | ['Name[{}]'] | + + + + +#### dataType + +The data type of values in the column + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:--------| +| Data Type | False | False | True | ['str'] | + + + + +#### description + +A description of a column in a table. + +| title | is_list | is_optional | required | type | +|:-------------------|:----------|:--------------|:-----------|:----------------| +| Column Description | False | True | False | ['str', 'null'] | + + + + +#### sensitive + +A True or False value, indicating if the field is sensitive or not + +| title | is_list | is_optional | required | type | +|:----------|:----------|:--------------|:-----------|:---------| +| Sensitive | False | False | True | ['bool'] | + + + + +## tissuesSampleCollection + +Metadata collection for Tissue Samples datasets + + + + + + +### dataCategories + +The type of data that is associated with the samples in the study. Can be several values MIABIS-2.0-13 + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data Categories | True | True | False | ["TissueDataCategoriesEnum['Biological samples','Survey data','Imaging data','Medical records','National registries','Genealogical records','Physiological/Biochemical measurements','Other']"] | + + + + +### materialType + +The biospecimen saved from a biological entity for propagation e.g. testing, diagnostics, treatment or research purposes. Can be several values MIABIS-2.0-14 + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Material Type | True | True | False | ["MaterialTypeCategories['Blood','DNA','Faeces','Immortalized Cell Lines','Isolated Pathogen','Other','Plasma','RNA','Saliva','Serum','Tissue (Frozen)','Tissue (FFPE)','Urine']"] | + + + + +### tissueSampleMetadata + +Metadata related to the tissue sample + + + + + + +#### creationDate + +Date when the tissue sample metadata was created + +| title | is_list | is_optional | required | type | +|:--------------|:----------|:--------------|:-----------|:-----------------------------| +| Creation Date | False | True | False | ['date', 'datetime', 'null'] | + + + + +#### AnatomicalSiteOntologyCode + +Ontology code for the anatomical site, this code must match an ICD-0-3 format + +| title | is_list | is_optional | required | type | +|:------------------------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------| +| Anatomical Site Ontology Code | False | True | False | ["ICD_0_3[{'anyOf': [{'pattern': '^[C\\\\d]{3}\\\\.\\\\d{4}\\\\/\\\\d{1,4}$', 'type': 'string'}, {'type': 'null'}]}]", 'null'] | + + + + +### collectionType + +The type of the sample collection. Can be several values [MIABIS-2.0-16](https://github.com/BBMRI-ERIC/miabis/blob/master/Structured-data-and-lists.md#collection-type) + +| title | is_list | is_optional | required | type | +|:----------------|:----------|:--------------|:-----------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Collection Type | False | True | False | ["TissueCollectionTypeEnum['Case-control','Cohort','Cross-sectional','Longitudinal','Twin-study','Quality control','Population-based','Disease specific','Birth cohort','Other']", 'null'] | + + + diff --git a/docs/HDRUK/2.2.1.structure.json b/docs/HDRUK/2.2.1.structure.json new file mode 100644 index 0000000..c9ef77f --- /dev/null +++ b/docs/HDRUK/2.2.1.structure.json @@ -0,0 +1,1387 @@ +[ + { + "name": "identifier", + "required": true, + "title": "Dataset identifier", + "description": "System dataset identifier", + "examples": [ + [ + "226fb3f1-4471-400a-8c39-2b66d46a39b6", + "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" + ] + ], + "type": [ + "Uuidv4[{'maxLength': 36, 'minLength': 36, 'pattern': '^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$', 'type': 'string'}]", + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "version", + "required": true, + "title": "Dataset Version", + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "revisions", + "required": true, + "title": "Dataset Revisions", + "description": "Revisions of Dataset metadata", + "examples": null, + "type": [ + "Revision" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "version", + "required": true, + "title": null, + "description": "Semantic Version", + "examples": null, + "type": [ + "Semver[{'pattern': '^([0-9]+)\\\\.([0-9]+)\\\\.([0-9]+)$', 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "url", + "required": true, + "title": null, + "description": "URL endpoint to obtain the version", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "issued", + "required": true, + "title": "Creation Date", + "description": "Dataset Metadata Creation Date", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "modified", + "required": true, + "title": "Modification Date", + "description": "Dataset Metadata Creation Date", + "examples": null, + "type": [ + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "summary", + "required": true, + "title": "Summary", + "description": "Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP.", + "examples": null, + "type": [ + "Summary" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "title", + "required": true, + "title": "Title", + "description": "Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers.", + "examples": [ + [ + "North West London COVID-19 Patient Level Situation Report" + ] + ], + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "abstract", + "required": true, + "title": "Dataset Abstract", + "description": "Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible", + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "publisher", + "required": true, + "title": "Dataset publisher", + "description": "This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", + "examples": null, + "type": [ + "Organisation" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "identifier", + "required": false, + "title": "Organisation Identifier", + "description": "Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the \u201csuggest and institute\u201d function here: https://www.grid.ac/institutes#", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "name", + "required": true, + "title": "Organisation Name", + "description": "Name of the organisation", + "examples": null, + "type": [ + "OneHundredFiftyCharacters[{'maxLength': 150, 'minLength': 2, 'type': 'string'}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "logo", + "required": false, + "title": "Organisation Logo", + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "description", + "required": false, + "title": "Organisation Description", + "description": "Please provide a URL that describes the organisation.", + "examples": null, + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "contactPoint", + "required": true, + "title": "Organisation Contact Point", + "description": "Organisation contact point(s)", + "examples": null, + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.EmailAddress.EmailAddress]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "memberOf", + "required": false, + "title": "Organisation Membership", + "description": "Please indicate if the organisation is an Alliance Member or a Hub.", + "examples": null, + "type": [ + "MemberOf['HUB','ALLIANCE','OTHER','NCS']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "contactPoint", + "required": true, + "title": "Contact Point", + "description": "Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose.", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "type": [ + "EmailAddress[{'anyOf': [{'format': 'email', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "keywords", + "required": true, + "title": "Keywords", + "description": "Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "alternateIdentifiers", + "required": false, + "title": "Alternate dataset identifiers", + "description": "Alternate dataset identifiers or local identifiers", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.ShortDescription.ShortDescription]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "doiName", + "required": false, + "title": "Digital Object Identifier", + "description": "All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI.", + "examples": [ + "10.3399/bjgp17X692645" + ], + "type": [ + "Doi[{'anyOf': [{'pattern': '^10.\\\\d{4,9}/[-._;()/:a-zA-Z0-9]+$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "datasetType", + "required": true, + "title": "Datasetype", + "description": "Placeholder for dataset type", + "examples": [ + [ + "" + ] + ], + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "datasetSubType", + "required": true, + "title": "Datasetype", + "description": "Placeholder for dataset sub-type", + "examples": [ + [ + "" + ] + ], + "type": [ + "DatasetType[{'anyOf': [{'maxLength': 100, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "populationSize", + "required": true, + "title": "Population size", + "description": "Summary population size of the cohort", + "examples": null, + "type": [ + "int", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + }, + { + "name": "documentation", + "required": false, + "title": "Documentation", + "description": "Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media.", + "examples": null, + "type": [ + "Documentation", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "description", + "required": false, + "title": "Description", + "description": "A free-text description of the record.", + "examples": null, + "type": [ + "Description[{'anyOf': [{'maxLength': 10000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "associatedMedia", + "required": false, + "title": "Associated Media", + "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "examples": [ + "PDF Document that describes study protocol" + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "isPartOf", + "required": false, + "title": "Group", + "description": "Please complete only if the dataset is part of a group or family", + "examples": [ + "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)." + ], + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters, hdr_schemata.definitions.HDRUK.IsPartOfEnum.IsPartOfEnum]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "coverage", + "required": false, + "title": "Coverage", + "description": "Observational, Spatial and Temporal coverage", + "examples": null, + "type": [ + "Coverage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "spatial", + "required": false, + "title": "Spatial", + "description": "List of countries where the data was taken from", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "pathway", + "required": false, + "title": "Pathway", + "description": "Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "followup", + "required": false, + "title": "Followup", + "description": "What is the typical time span that a patient appears in the dataset (follow up period)", + "examples": null, + "type": [ + "Followup['0 - 6 MONTHS','6 - 12 MONTHS','1 - 10 YEARS','> 10 YEARS','UNKNOWN','CONTINUOUS','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "typicalAgeRange", + "required": false, + "title": "Typical Age Range", + "description": "Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "examples": null, + "type": [ + "AgeRange[{'anyOf': [{'pattern': 'Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "gender", + "required": false, + "title": "Gender", + "description": "Male, Female, Other", + "examples": null, + "type": [ + "GenderType['Male','Female','Other']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "biologicalsamples", + "required": false, + "title": "Biological Samples", + "description": "Blood, Saliva, Urine, Other", + "examples": null, + "type": [ + "BiologicalSampleType['Blood','Other','Urine','Saliva']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "psychological", + "required": false, + "title": "Psychological", + "description": "Mental health, Cognitive function", + "examples": null, + "type": [ + "PsychologicalType['Cognitive Function','Mental Health']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "physical", + "required": false, + "title": "Physical", + "description": "Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", + "examples": null, + "type": [ + "PhysicalType['Respiratory','Vision','Hearing','Musculoskeletal','Cardiovascular','Reproductive']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "anthropometric", + "required": false, + "title": "Anthropometric", + "description": "Height, Weight, Waist circumference, Hip circumference, Blood pressure", + "examples": null, + "type": [ + "AnthropometricType['Blood Pressure','Hip Circumference','Height','Waist Circumference','Weight']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "lifestyle", + "required": false, + "title": "Lifestyle", + "description": "Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", + "examples": null, + "type": [ + "LifestylesType['Smoking','Dietary Habits','Physical Activity','Alcohol']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "socioeconomic", + "required": false, + "title": "Socio-economic", + "description": "Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support", + "examples": null, + "type": [ + "SocioEconomicType['Finances','Family Circumstances','Housing','Education','Marital Status','Occupation','Ethnic Group','Social Support']" + ], + "is_list": true, + "is_optional": true + } + ] + }, + { + "name": "provenance", + "required": false, + "title": "Provenance", + "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", + "examples": null, + "type": [ + "Provenance", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "origin", + "required": false, + "title": null, + "description": null, + "examples": null, + "type": [ + "Origin", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "purpose", + "required": false, + "title": "Purpose", + "description": "Pleases indicate the purpose(s) that the dataset was collected.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Purpose.Purpose]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "source", + "required": false, + "title": "Source", + "description": "Pleases indicate the source of the data extraction", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Source.Source]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "collectionSituation", + "required": false, + "title": "Setting", + "description": "Pleases indicate the setting(s) where data was collected. Multiple settings may be provided", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[hdr_schemata.definitions.HDRUK.Setting.Setting]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "temporal", + "required": true, + "title": null, + "description": null, + "examples": null, + "type": [ + "Temporal" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "distributionReleaseDate", + "required": false, + "title": "Release Date", + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "startDate", + "required": true, + "title": "Start Date", + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "endDate", + "required": false, + "title": "End Date", + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", + "examples": null, + "type": [ + "date", + "datetime", + "EndDateEnum['CONTINUOUS',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "timeLag", + "required": true, + "title": "Time Lag", + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "examples": null, + "type": [ + "TimeLag['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NO TIMELAG','NOT APPLICABLE','OTHER',null]" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "publishingFrequency", + "required": true, + "title": "Publishing Frequency", + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "examples": null, + "type": [ + "Periodicity['STATIC','IRREGULAR','CONTINUOUS','BIENNIAL','ANNUAL','BIANNUAL','QUARTERLY','BIMONTHLY','MONTHLY','BIWEEKLY','WEEKLY','SEMIWEEKLY','DAILY','OTHER',null]" + ], + "is_list": false, + "is_optional": false + } + ] + } + ] + }, + { + "name": "accessibility", + "required": true, + "title": "Accessibility", + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", + "examples": null, + "type": [ + "Accessibility" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "usage", + "required": false, + "title": "Usage", + "description": "This section includes information about how the data can be used and how it is currently being used", + "examples": null, + "type": [ + "Usage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "dataUseLimitation", + "required": true, + "title": "Data Use Limitation", + "description": "Any restrictions to its usage", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataUseRequirement", + "required": true, + "title": "Data Use Requirements", + "description": "Any requirements needed for data usage", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "resourceCreator", + "required": false, + "title": "Resource Creator", + "description": "Who has created this resource", + "examples": null, + "type": [ + "Organisation", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Name", + "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/", + "examples": null, + "type": [ + "Name[{}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "gatewayId", + "required": false, + "title": "Publisher gateway id", + "description": "The link to an ID somewhere in the gateway where more information on the publisher can be retrieved.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "rorId", + "required": false, + "title": "Research Organization Registry Identifier", + "description": "The Research Organization Registry (ROR) for the organisation, if applicable", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + } + ] + }, + { + "name": "access", + "required": true, + "title": "Access", + "description": "This section includes information about data access", + "examples": null, + "type": [ + "Access" + ], + "is_list": false, + "is_optional": false, + "subItems": [ + { + "name": "accessRights", + "required": true, + "title": "Access Rights", + "description": "Optional link(s) or a description of where the license associated to accessing this dataset", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessService", + "required": false, + "title": "Access Service", + "description": "", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessRequestCost", + "required": false, + "title": "Organisation Access Request Cost", + "description": "", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "deliveryLeadTime", + "required": false, + "title": "Access Request Duration", + "description": "An arbitrary guess at the time to gain access to the dataset...", + "examples": null, + "type": [ + "DeliveryLeadTime['LESS 1 WEEK','1-2 WEEKS','2-4 WEEKS','1-2 MONTHS','2-6 MONTHS','MORE 6 MONTHS','VARIABLE','NOT APPLICABLE','OTHER',null]", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "jurisdiction", + "required": true, + "title": "Jurisdiction", + "description": "Comma separated country codes of where the data jurisdiction is.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataController", + "required": true, + "title": "Data Controller", + "description": "Name of the data controller", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "dataProcessor", + "required": false, + "title": "Data Processor", + "description": "Name of the data processors", + "examples": null, + "type": [ + "LongDescription[{'anyOf': [{'maxLength': 50000, 'minLength': 2, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "accessServiceCategory", + "required": false, + "title": "Access/governance requirementss", + "description": "Where access to data come from: TRE/SED, direct access, open acccess, varies based on project.", + "examples": null, + "type": [ + "AccessService['TRE/SDE','Direct access','Open access','Varies based on project']" + ], + "is_list": true, + "is_optional": true + } + ] + }, + { + "name": "formatAndStandards", + "required": false, + "title": "Format and Standards", + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "examples": null, + "type": [ + "FormatAndStandards", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "vocabularyEncodingSchemes", + "required": true, + "title": "Controlled Vocabulary", + "description": "Code value of the ontology vocabulary encoding", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "conformsTo", + "required": true, + "title": "Conforms To", + "description": "What the vocabulary conforms to.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "languages", + "required": true, + "title": "Language Code(s)", + "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "formats", + "required": true, + "title": "Dataset Format", + "description": "Format(s) the dataset can be made available in", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + } + ] + }, + { + "name": "enrichmentAndLinkage", + "required": false, + "title": "Enrichment and Linkage", + "description": "This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", + "examples": null, + "type": [ + "EnrichmentAndLinkage", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "qualifiedRelation", + "required": false, + "title": "Linked Datasets", + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate \u201cALL\u201d and the onboarding portal will automate linkage across the datasets submitted.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Union[hdr_schemata.definitions.HDRUK.Url.Url, NoneType, hdr_schemata.definitions.HDRUK.OneHundredFiftyCharacters.OneHundredFiftyCharacters]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "derivation", + "required": false, + "title": "Derivations", + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset.", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.AbstractText.AbstractText]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "tools", + "required": false, + "title": "Tools", + "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", + "examples": null, + "type": [ + "CommaSeparatedValues[{'anyOf': [{'pattern': '([^,]+)', 'type': 'string'}, {'type': 'null'}]}]", + "typing.List[typing.Optional[hdr_schemata.definitions.HDRUK.Url.Url]]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "syntheticDataWebLink", + "required": false, + "title": "Synthetic Data Web Links", + "description": "Links to locations of information and or raw downloads of synthetic data associated with this dataset", + "examples": null, + "type": [ + "Url[{'anyOf': [{'format': 'uri', 'minLength': 1, 'type': 'string'}, {'type': 'null'}]}]" + ], + "is_list": true, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "observations", + "required": true, + "title": "Observations", + "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d", + "examples": null, + "type": [ + "Observation" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "observedNode", + "required": true, + "title": "Statistical Population", + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "type": [ + "StatisticalPopulationConstrained['PERSONS','EVENTS','FINDINGS']" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredValue", + "required": true, + "title": "Measured Value", + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "examples": null, + "type": [ + "int" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "disambiguatingDescription", + "required": false, + "title": "Disambiguating Description", + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "examples": null, + "type": [ + "AbstractText[{'anyOf': [{'maxLength': 500, 'minLength': 5, 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + }, + { + "name": "observationDate", + "required": true, + "title": "Observation Date", + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "examples": null, + "type": [ + "date", + "datetime" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "measuredProperty", + "required": true, + "title": "Measured Property", + "description": "Initially this will be defaulted to \"COUNT\"", + "examples": null, + "type": [ + "MeasuredProperty[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + } + ] + }, + { + "name": "structuralMetadata", + "required": false, + "title": "Structural Metadata", + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "examples": null, + "type": [ + "DataClass" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Table Name", + "description": "The name of a table in a dataset.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "description", + "required": false, + "title": "Table Description", + "description": "A description of a table in a dataset.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "elements", + "required": true, + "title": "Data Elements", + "description": "A list of data elements contained within a table in a dataset.", + "examples": null, + "type": [ + "DataElement" + ], + "is_list": true, + "is_optional": false, + "subItems": [ + { + "name": "name", + "required": true, + "title": "Column Name", + "description": "The name of a column in a table.", + "examples": null, + "type": [ + "Name[{}]" + ], + "is_list": false, + "is_optional": false, + "subItems": [] + }, + { + "name": "dataType", + "required": true, + "title": "Data Type", + "description": "The data type of values in the column", + "examples": null, + "type": [ + "str" + ], + "is_list": false, + "is_optional": false + }, + { + "name": "description", + "required": false, + "title": "Column Description", + "description": "A description of a column in a table.", + "examples": null, + "type": [ + "str", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "sensitive", + "required": true, + "title": "Sensitive", + "description": "A True or False value, indicating if the field is sensitive or not", + "examples": null, + "type": [ + "bool" + ], + "is_list": false, + "is_optional": false + } + ] + } + ] + }, + { + "name": "tissuesSampleCollection", + "required": false, + "title": "Tissues Sample Collection", + "description": "Metadata collection for Tissue Samples datasets", + "examples": null, + "type": [ + "TissuesSampleCollection" + ], + "is_list": true, + "is_optional": true, + "subItems": [ + { + "name": "dataCategories", + "required": false, + "title": "Data Categories", + "description": "The type of data that is associated with the samples in the study. Can be several values MIABIS-2.0-13", + "examples": null, + "type": [ + "TissueDataCategoriesEnum['Biological samples','Survey data','Imaging data','Medical records','National registries','Genealogical records','Physiological/Biochemical measurements','Other']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "materialType", + "required": false, + "title": "Material Type", + "description": "The biospecimen saved from a biological entity for propagation e.g. testing, diagnostics, treatment or research purposes. Can be several values MIABIS-2.0-14", + "examples": null, + "type": [ + "MaterialTypeCategories['Blood','DNA','Faeces','Immortalized Cell Lines','Isolated Pathogen','Other','Plasma','RNA','Saliva','Serum','Tissue (Frozen)','Tissue (FFPE)','Urine']" + ], + "is_list": true, + "is_optional": true + }, + { + "name": "tissueSampleMetadata", + "required": false, + "title": "Tissue Sample Metadata", + "description": "Metadata related to the tissue sample", + "examples": null, + "type": [ + "TissueSampleMetadata", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [ + { + "name": "creationDate", + "required": false, + "title": "Creation Date", + "description": "Date when the tissue sample metadata was created", + "examples": null, + "type": [ + "date", + "datetime", + "null" + ], + "is_list": false, + "is_optional": true + }, + { + "name": "AnatomicalSiteOntologyCode", + "required": false, + "title": "Anatomical Site Ontology Code", + "description": "Ontology code for the anatomical site, this code must match an ICD-0-3 format", + "examples": null, + "type": [ + "ICD_0_3[{'anyOf': [{'pattern': '^[C\\\\d]{3}\\\\.\\\\d{4}\\\\/\\\\d{1,4}$', 'type': 'string'}, {'type': 'null'}]}]", + "null" + ], + "is_list": false, + "is_optional": true, + "subItems": [] + } + ] + }, + { + "name": "collectionType", + "required": false, + "title": "Collection Type", + "description": "The type of the sample collection. Can be several values [MIABIS-2.0-16](https://github.com/BBMRI-ERIC/miabis/blob/master/Structured-data-and-lists.md#collection-type)", + "examples": null, + "type": [ + "TissueCollectionTypeEnum['Case-control','Cohort','Cross-sectional','Longitudinal','Twin-study','Quality control','Population-based','Disease specific','Birth cohort','Other']", + "null" + ], + "is_list": false, + "is_optional": true + } + ] + } +] \ No newline at end of file diff --git a/hdr_schemata/definitions/HDRUK/AccessService.py b/hdr_schemata/definitions/HDRUK/AccessService.py new file mode 100644 index 0000000..8807888 --- /dev/null +++ b/hdr_schemata/definitions/HDRUK/AccessService.py @@ -0,0 +1,8 @@ +from enum import Enum + + +class AccessService(Enum): + TRE_SDE = "TRE/SDE" + DIRECT_ACCESS = "Direct access" + OPEN_ACCESS = "Open access" + VARIED = "Varies based on project" diff --git a/hdr_schemata/definitions/HDRUK/__init__.py b/hdr_schemata/definitions/HDRUK/__init__.py index 34fed8d..3ffec8f 100644 --- a/hdr_schemata/definitions/HDRUK/__init__.py +++ b/hdr_schemata/definitions/HDRUK/__init__.py @@ -1,4 +1,5 @@ from .AbstractText import AbstractText +from .AccessService import AccessService from .AgeRange import AgeRange from .CommaSeparatedIntegers import CommaSeparatedIntegers from .CommaSeparatedValues import CommaSeparatedValues diff --git a/hdr_schemata/models/GWDM/1.2/schema.json b/hdr_schemata/models/GWDM/1.2/schema.json new file mode 100644 index 0000000..2c4e95d --- /dev/null +++ b/hdr_schemata/models/GWDM/1.2/schema.json @@ -0,0 +1,1949 @@ +{ + "$defs": { + "AbstractText": { + "anyOf": [ + { + "maxLength": 500, + "minLength": 5, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "AbstractText" + }, + "Access": { + "additionalProperties": false, + "properties": { + "accessRights": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Optional link(s) or a description of where the license associated to accessing this dataset", + "example": "https://raw.githubusercontent.com/HDRUK/papers/master/LICENSE", + "title": "Access Rights" + }, + "accessService": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "", + "example": "The SAIL Databank is powered by the UK Secure e-Research Platform (UKSeRP). Following approval through safeguard processes, access to project-specific data within the secure environment is permitted using two-factor authentication.", + "title": "Access Service" + }, + "accessRequestCost": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "", + "example": "Data provision is free from SAIL. Overall project costing depends on the number of people that require access to the SAIL Gateway, the activities that SAIL needs to complete (e.g. loading non-standard datasets), data refreshes, analytical work required, disclosure control process, and special case technological requirements.", + "title": "Organisation Access Request Cost" + }, + "deliveryLeadTime": { + "anyOf": [ + { + "$ref": "#/$defs/DeliveryLeadTime" + }, + { + "type": "null" + } + ], + "default": null, + "description": "An arbitrary guess at the time to gain access to the dataset...", + "example": "2-6 MONTHS", + "title": "Access Request Duration" + }, + "jurisdiction": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Comma separated country codes of where the data jurisdiction is.", + "example": "GB-WLS,GB-GBN,GB-SCT", + "title": "Jurisdiction" + }, + "dataController": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "description": "Name of the data controller", + "example": "SAIL Databank", + "title": "Data Controller" + }, + "dataProcessor": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Name of the data processors", + "example": "SAIL Databank", + "title": "Data Processor" + }, + "accessServiceCategory": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Where access to data come from: TRE/SED, direct access, open acccess, varies based on project.", + "example": "", + "title": "Access/governance requirementss" + } + }, + "required": [ + "accessRights", + "jurisdiction", + "dataController" + ], + "title": "Access", + "type": "object" + }, + "Accessibility": { + "additionalProperties": false, + "properties": { + "usage": { + "anyOf": [ + { + "$ref": "#/$defs/Usage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "This section includes information about how the data can be used and how it is currently being used", + "title": "Usage" + }, + "access": { + "allOf": [ + { + "$ref": "#/$defs/Access" + } + ], + "description": "This section includes information about data access" + }, + "formatAndStandards": { + "anyOf": [ + { + "$ref": "#/$defs/FormatAndStandards" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "title": "Format and Standards" + } + }, + "required": [ + "access" + ], + "title": "Accessibility", + "type": "object" + }, + "AgeRange": { + "anyOf": [ + { + "pattern": "Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "AgeRange" + }, + "CommaSeparatedValues": { + "anyOf": [ + { + "pattern": "([^,]+)", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "CommaSeparatedValues" + }, + "Coverage": { + "additionalProperties": false, + "properties": { + "spatial": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "List of countries where the data was taken from", + "example": "United Kingdom,Wales,England", + "title": "Spatial" + }, + "pathway": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "example": "The lookup contains references to link data held elsewhere on:\n\u2022 individuals appearing as defendants in criminal cases dealt with by the magistrates' or Crown Court in England and Wales (including Youth Courts). \n\u2022 individuals supervised by the probation service in England and Wales\n\u2022 individuals serving custodial sentences in England & Wales who appear within records from the prison data source, p-NOMIS. Young Offenders are included if resident at prisons or Young Offender Institutes (YOIs) that use p-NOMIS, however, this excludes the majority of Secure Schools and Secure Training Centres. \"\n\n\"The linking dataset includes a person ID and link to record in other data first datasets for: \n\u2022 Disposals in the magistrates\u2019 court from 1 January 2011 to 31 December 2020\n\u2022 Disposals in the Crown Court from 1 January 2013 to 31 December 2020\n\u2022 Custodial sentences of offenders in custody from January 2011 to September 2021 (including sentences begun before 2011) \n\u2022 Offender probation records from January 2014 to December 2020.", + "title": "Pathway" + }, + "followup": { + "anyOf": [ + { + "$ref": "#/$defs/Followup" + }, + { + "type": "null" + } + ], + "default": null, + "description": "What is the typical time span that a patient appears in the dataset (follow up period)", + "example": "CONTINUOUS", + "title": "Followup" + }, + "typicalAgeRange": { + "anyOf": [ + { + "$ref": "#/$defs/AgeRange" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "example": "1-150", + "title": "Typical Age Range" + }, + "gender": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Male, Female, Other", + "title": "Gender" + }, + "biologicalsamples": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Blood, Saliva, Urine, Other", + "title": "Biological Samples" + }, + "psychological": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Mental health, Cognitive function", + "title": "Psychological" + }, + "physical": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", + "title": "Physical" + }, + "anthropometric": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Height, Weight, Waist circumference, Hip circumference, Blood pressure", + "title": "Anthropometric" + }, + "lifestyle": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", + "title": "Lifestyle" + }, + "socioeconomic": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Occupation, Family circumstances, Housing, Education, Ethnic group, Marital status, Social support", + "title": "Socio-economic" + } + }, + "title": "Coverage", + "type": "object" + }, + "DataColumn": { + "properties": { + "name": { + "allOf": [ + { + "$ref": "#/$defs/Name" + } + ], + "description": "The name of a column in a table.", + "title": "Column Name" + }, + "dataType": { + "description": "The data type of values in the column", + "title": "Data Type", + "type": "string" + }, + "description": { + "anyOf": [ + { + "maxLength": 20000, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of a column in a table.", + "title": "Column Description" + }, + "sensitive": { + "description": "A True or False value, indicating if the field is sensitive or not", + "title": "Sensitive", + "type": "boolean" + }, + "values": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/DataValue" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Data values contained within the column", + "title": "Values" + } + }, + "required": [ + "name", + "dataType", + "sensitive" + ], + "title": "DataColumn", + "type": "object" + }, + "DataTable": { + "additionalProperties": false, + "properties": { + "name": { + "anyOf": [ + { + "maxLength": 500, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The name of a table in a dataset.", + "title": "Table Name" + }, + "description": { + "anyOf": [ + { + "maxLength": 20000, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of a table in a dataset.", + "title": "Table Description" + }, + "columns": { + "description": "A list of columns contained within a table in a dataset.", + "items": { + "$ref": "#/$defs/DataColumn" + }, + "title": "Data Columns", + "type": "array" + } + }, + "required": [ + "name", + "columns" + ], + "title": "DataTable", + "type": "object" + }, + "DataValue": { + "properties": { + "name": { + "allOf": [ + { + "$ref": "#/$defs/Name" + } + ], + "description": "Unique value in a column .", + "title": "Value Name" + }, + "description": { + "anyOf": [ + { + "maxLength": 20000, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of a unique value in a column.", + "title": "Value Description" + }, + "frequency": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The frequency of occurrance of a value in a column", + "title": "Value Frequency" + } + }, + "required": [ + "name" + ], + "title": "DataValue", + "type": "object" + }, + "DatasetLinkage": { + "additionalProperties": false, + "properties": { + "isDerivedFrom": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset", + "example": "Data will be minimised as appropriate relative to the data access application", + "title": "Derivations" + }, + "isPartOf": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "If the dataset is part of a group or family", + "example": "UKCRC Tissue Directory and Coordination Centre", + "title": "Is PartOf" + }, + "isMemberOf": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Dataset is a member of XXX(?)", + "title": "Is MemberOf" + }, + "linkedDatasets": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Links to other datasets.", + "example": "Yes. To any SAIL dataset & reference data.,ALL", + "title": "Linked Datasets" + } + }, + "title": "DatasetLinkage", + "type": "object" + }, + "DatasetType": { + "anyOf": [ + { + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "DatasetType" + }, + "DeliveryLeadTime": { + "enum": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NOT APPLICABLE", + "OTHER", + null + ], + "title": "DeliveryLeadTime" + }, + "Doi": { + "anyOf": [ + { + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Doi" + }, + "Followup": { + "enum": [ + "0 - 6 MONTHS", + "6 - 12 MONTHS", + "1 - 10 YEARS", + "> 10 YEARS", + "UNKNOWN", + "CONTINUOUS", + "OTHER", + null + ], + "title": "Followup" + }, + "FormatAndStandards": { + "additionalProperties": false, + "properties": { + "vocabularyEncodingSchemes": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Code value of the ontology vocabulary encoding", + "example": "OPCS4,NHS NATIONAL CODES,ICD10,OTHER", + "title": "Controlled Vocabulary" + }, + "conformsTo": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "What the vocabulary conforms to.", + "example": "LOCAL,NHS DATA DICTIONARY", + "title": "Conforms To" + }, + "languages": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", + "example": "en", + "title": "Language Code(s)" + }, + "formats": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Format(s) the dataset can be made available in", + "example": "CSV,JSON,SQL database table", + "title": "Dataset Format" + } + }, + "required": [ + "vocabularyEncodingSchemes", + "conformsTo", + "languages", + "formats" + ], + "title": "FormatAndStandards", + "type": "object" + }, + "Linkage": { + "additionalProperties": false, + "properties": { + "isGeneratedUsing": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "??", + "title": "Is Generated Using" + }, + "associatedMedia": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question", + "example": "https://popdatasci.swan.ac.uk/centres-of-excellence/sail/,https://www.youtube.com/watch?v=ZK9-Jw3uVkw,https://saildatabank.com/,https://saildatabank.com/about-us/", + "title": "Associated Media" + }, + "dataUses": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "??", + "title": "Data Uses" + }, + "isReferenceIn": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Rhe keystone paper associated with the dataset. Also include a list of known citations, if available and should be links to existing resources where the dataset has been used or referenced.", + "title": "Is Reference in" + }, + "tools": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "URL of any analysis tools or models that have been created for this dataset and are available for further use", + "example": "https://conceptlibrary.saildatabank.com/", + "title": "Tools" + }, + "datasetLinkage": { + "anyOf": [ + { + "$ref": "#/$defs/DatasetLinkage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Dataset Linkage copied over from", + "title": "Dataset Linkage" + }, + "investigations": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide the keystone paper associated with the dataset.", + "example": "https://digital.nhs.uk/services/data-access-request-service-dars/register-of-approved-data-releases", + "title": "Investigations" + }, + "syntheticDataWebLink": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Links to locations of information and or raw downloads of synthetic data associated with this dataset", + "example": "", + "title": "Synthetic Data Web Links" + } + }, + "title": "Linkage", + "type": "object" + }, + "LongAbstractText": { + "anyOf": [ + { + "maxLength": 5000, + "minLength": 5, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "LongAbstractText" + }, + "LongDescription": { + "anyOf": [ + { + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "LongDescription" + }, + "MeasuredProperty": { + "title": "MeasuredProperty" + }, + "Name": { + "title": "Name" + }, + "Observation": { + "additionalProperties": false, + "properties": { + "observedNode": { + "allOf": [ + { + "$ref": "#/$defs/StatisticalPopulationConstrained" + } + ], + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "title": "Statistical Population" + }, + "measuredValue": { + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "title": "Measured Value", + "type": "integer" + }, + "disambiguatingDescription": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ], + "default": null, + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "title": "Disambiguating Description" + }, + "observationDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + } + ], + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "title": "Observation Date" + }, + "measuredProperty": { + "allOf": [ + { + "$ref": "#/$defs/MeasuredProperty" + } + ], + "description": "Initially this will be defaulted to \"COUNT\"", + "title": "Measured Property" + } + }, + "required": [ + "observedNode", + "measuredValue", + "observationDate", + "measuredProperty" + ], + "title": "Observation", + "type": "object" + }, + "Organisation": { + "properties": { + "name": { + "anyOf": [ + { + "$ref": "#/$defs/Name" + }, + { + "type": "null" + } + ], + "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/", + "example": "SAIL", + "title": "Name" + }, + "gatewayId": { + "anyOf": [ + { + "maxLength": 50, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The link to an ID somewhere in the gateway where more information on the publisher can be retrieved.", + "title": "Publisher gateway id" + }, + "rorId": { + "anyOf": [ + { + "maxLength": 9, + "minLength": 9, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The Research Organization Registry (ROR) for the organisation, if applicable", + "title": "Research Organization Registry Identifier" + } + }, + "required": [ + "name" + ], + "title": "Organisation", + "type": "object" + }, + "Origin": { + "additionalProperties": false, + "properties": { + "purpose": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Indicates the purpose(s) that the dataset was collected.", + "example": "ADMINISTRATIVE,STATUTORY", + "title": "Purpose" + }, + "source": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Indicates the source of the data extraction", + "example": "PAPER BASED,ELECTRONIC SURVEY", + "title": "Source" + }, + "collectionSituation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Indicate the setting(s) where data was collected. Multiple settings may be provided", + "example": "IN-PATIENTS,PRIMARY CARE", + "title": "Setting" + } + }, + "title": "Origin", + "type": "object" + }, + "Periodicity": { + "enum": [ + "STATIC", + "IRREGULAR", + "CONTINUOUS", + "BIENNIAL", + "ANNUAL", + "BIANNUAL", + "QUARTERLY", + "BIMONTHLY", + "MONTHLY", + "BIWEEKLY", + "WEEKLY", + "SEMIWEEKLY", + "DAILY", + "OTHER", + null + ], + "title": "Periodicity" + }, + "Provenance": { + "additionalProperties": false, + "properties": { + "origin": { + "anyOf": [ + { + "$ref": "#/$defs/Origin" + }, + { + "type": "null" + } + ], + "default": null + }, + "temporal": { + "$ref": "#/$defs/Temporal" + } + }, + "required": [ + "temporal" + ], + "title": "Provenance", + "type": "object" + }, + "Required": { + "properties": { + "gatewayId": { + "description": "Need a field in Mauro that captures the datasetID to link to gateway database - or can we just use the one created in Mauro?", + "maxLength": 50, + "minLength": 1, + "title": "Gatewayid", + "type": "string" + }, + "gatewayPid": { + "description": "Need a field in Mauro that captures the dataset pid to link to gateway database", + "maxLength": 50, + "minLength": 2, + "title": "Gatewaypid", + "type": "string" + }, + "issued": { + "description": "Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different?", + "format": "date-time", + "title": "Issued", + "type": "string" + }, + "modified": { + "description": "Aren't issued and modified always the same because of versioning? Is that fine to duplicate because datasets in dcat might look different?", + "format": "date-time", + "title": "Modified", + "type": "string" + }, + "revisions": { + "items": { + "$ref": "#/$defs/Revision" + }, + "title": "Revisions", + "type": "array" + }, + "version": { + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "pattern": "^\\d+\\.\\d+\\.\\d+$", + "title": "Dataset Version", + "type": "string" + } + }, + "required": [ + "gatewayId", + "gatewayPid", + "issued", + "modified", + "revisions", + "version" + ], + "title": "Required", + "type": "object" + }, + "Revision": { + "properties": { + "version": { + "description": "Version number used for previous version of this dataset", + "example": "6.0.0", + "maxLength": 100, + "minLength": 2, + "title": "revision version", + "type": "string" + }, + "url": { + "allOf": [ + { + "$ref": "#/$defs/Url" + } + ], + "description": "Some url with a reference to the record of a previous version of this dataset", + "example": "https://api.service.nhs.uk/health-research-data-catalogue/datasetrevisions/841f7da2-b018-41f6-b4ae-2e0aadab6561", + "title": "revision url" + } + }, + "required": [ + "version", + "url" + ], + "title": "Revision", + "type": "object" + }, + "SampleDonor": { + "properties": { + "id": { + "anyOf": [ + { + "maxLength": 50, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "ID of the sample donor", + "title": "Donor ID" + }, + "sex": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Sex of the sample donor", + "title": "Donor Sex" + }, + "birthDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Date of birth of the sample donor", + "title": "Donor birth date" + }, + "dataCategories": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Data categories related to the sample donor", + "title": "Donor Data Categories" + } + }, + "title": "SampleDonor", + "type": "object" + }, + "ShortTitle": { + "anyOf": [ + { + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "ShortTitle" + }, + "StatisticalPopulationConstrained": { + "enum": [ + "PERSONS", + "EVENTS", + "FINDINGS" + ], + "title": "StatisticalPopulationConstrained", + "type": "string" + }, + "Summary": { + "properties": { + "title": { + "allOf": [ + { + "$ref": "#/$defs/TwoHundredFiftyFiveCharacters" + } + ], + "description": "The main title of the dataset", + "example": "Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations", + "title": "Title" + }, + "shortTitle": { + "anyOf": [ + { + "$ref": "#/$defs/ShortTitle" + }, + { + "type": "null" + } + ], + "description": "A shorter descriptive title of the dataset", + "example": "ONS 2011 Census Wales (CENW)", + "title": "Shorttitle" + }, + "doiName": { + "anyOf": [ + { + "$ref": "#/$defs/Doi" + }, + { + "type": "null" + } + ], + "description": "DOI associated to this dataset", + "example": "10.1093/ije/dyx196", + "title": "Doiname" + }, + "abstract": { + "allOf": [ + { + "$ref": "#/$defs/LongAbstractText" + } + ], + "description": "Longer abstract detailing the dataset.", + "example": "COVID-19 Key Worker Testing Results data is required by NHS Digital to support COVID-19 requests for linkage, analysis and dissemination to other organisations who require the data in a timely manner.", + "title": "Abstract" + }, + "keywords": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Comma separated key words associated to this dataset.", + "example": "Preprints,Papers,HDR UK", + "title": "Keywords" + }, + "controlledKeywords": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Keywords that have been filtered and limited", + "title": "Controlled Keywords" + }, + "contactPoint": { + "anyOf": [ + { + "format": "email", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "email of a person who can be the main contact point of this dataset", + "example": "susheel.varma@hdruk.ac.uk", + "title": "Contact Point" + }, + "datasetType": { + "anyOf": [ + { + "$ref": "#/$defs/DatasetType" + }, + { + "type": "null" + } + ], + "description": "What type of dataset is this?", + "title": "Dataset type" + }, + "description": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "description": "Longer description of the dataset in detail", + "example": "Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations\n\nThis will include:\n- Papers\n- COVID-19 Papers\n- COVID-19 Preprint", + "title": "Description" + }, + "publisher": { + "anyOf": [ + { + "$ref": "#/$defs/Organisation" + }, + { + "type": "null" + } + ], + "description": "Link to details about the publisher of this dataset", + "title": "Publisher" + }, + "populationSize": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Summary population size of the cohort", + "title": "Population size" + }, + "datasetSubType": { + "anyOf": [ + { + "$ref": "#/$defs/DatasetType" + }, + { + "type": "null" + } + ], + "default": null, + "description": "What us the subtype for this dataset?", + "title": "Dataset sub type" + } + }, + "required": [ + "title", + "shortTitle", + "doiName", + "abstract", + "keywords", + "controlledKeywords", + "contactPoint", + "datasetType", + "description", + "publisher" + ], + "title": "Summary", + "type": "object" + }, + "Temporal": { + "additionalProperties": false, + "properties": { + "startDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The start of the time period that the dataset provides coverage for", + "example": "12/03/2020", + "title": "Start Date" + }, + "endDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The end of the time period that the dataset provides coverage for", + "example": "12/03/2020", + "title": "End Date" + }, + "timeLag": { + "allOf": [ + { + "$ref": "#/$defs/TimeLag" + } + ], + "description": "Typical time-lag between an event and the data for that event appearing in the dataset", + "example": "LESS 1 WEEK", + "title": "Time Lag" + }, + "accrualPeriodicity": { + "allOf": [ + { + "$ref": "#/$defs/Periodicity" + } + ], + "description": "frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity.", + "example": "MONTHLY" + }, + "distributionReleaseDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity.", + "title": "Release Date" + } + }, + "required": [ + "startDate", + "timeLag", + "accrualPeriodicity" + ], + "title": "Temporal", + "type": "object" + }, + "TimeLag": { + "enum": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NO TIMELAG", + "NOT APPLICABLE", + "OTHER", + null + ], + "title": "TimeLag" + }, + "TissueSampleMetadata": { + "properties": { + "id": { + "anyOf": [ + { + "maxLength": 50, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "ID of the tissue sample metadata", + "title": "Metadata ID" + }, + "sampleDonor": { + "anyOf": [ + { + "$ref": "#/$defs/SampleDonor" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Information about the sample donor", + "title": "Sample Donor" + }, + "sampleType": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Type of the tissue sample", + "title": "Sample Type" + }, + "storageTemperature": { + "anyOf": [ + { + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Storage temperature of the tissue sample", + "title": "Storage Temperature" + }, + "creationDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Date when the tissue sample metadata was created", + "title": "Creation Date" + }, + "anatomicalSiteOntologyCode": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Ontology code for the anatomical site", + "title": "Anatomical Site Ontology Code" + }, + "anatomicalSiteOntologyDescription": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Ontology description for the anatomical site", + "title": "Anatomical Site Ontology Description" + }, + "anatomicalSiteFreeText": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Free text describing the anatomical site", + "title": "Anatomical Site Free Text" + }, + "sampleContentDiagnosis": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Diagnosis related to the sample content", + "title": "Sample Content Diagnosis" + }, + "useRestrictions": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Restrictions on the use of the tissue sample", + "title": "Use Restrictions" + } + }, + "title": "TissueSampleMetadata", + "type": "object" + }, + "TissuesSampleCollection": { + "properties": { + "id": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "ID of the tissue sample collection", + "title": "ID" + }, + "dataCategories": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Data categories related to the tissue sample collection", + "title": "Data Categories" + }, + "materialType": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Material type of the tissue sample collection", + "title": "Material Type" + }, + "accessConditions": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Access conditions for the tissue sample collection", + "title": "Access Conditions" + }, + "collectionType": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Type of the tissue sample collection", + "title": "Collection Type" + }, + "disease": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Disease associated with the tissue sample collection", + "title": "Disease" + }, + "storageTemperature": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Storage temperature of the tissue sample collection", + "title": "Storage Temperature" + }, + "sampleAgeRange": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Age range of the tissue sample collection", + "title": "Sample Age Range" + }, + "tissueSampleMetadata": { + "anyOf": [ + { + "$ref": "#/$defs/TissueSampleMetadata" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Metadata related to the tissue sample", + "title": "Tissue Sample Metadata" + } + }, + "title": "TissuesSampleCollection", + "type": "object" + }, + "TwoHundredFiftyFiveCharacters": { + "maxLength": 255, + "minLength": 2, + "title": "TwoHundredFiftyFiveCharacters", + "type": "string" + }, + "Url": { + "anyOf": [ + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Url" + }, + "Usage": { + "additionalProperties": false, + "properties": { + "dataUseLimitation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Any restrictions to its usage", + "example": "GENERAL RESEARCH USE,PROJECT SPECIFIC RESTRICTIONS", + "title": "Data Use Limitation" + }, + "dataUseRequirement": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Any requirements needed for data usage", + "example": "PROJECT SPECIFIC RESTRICTIONS,TIME LIMIT ON USE,USER SPECIFIC RESTRICTION", + "title": "Data Use Requirements" + }, + "resourceCreator": { + "anyOf": [ + { + "$ref": "#/$defs/Organisation" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Who has created this resource", + "title": "Resource Creator" + } + }, + "required": [ + "dataUseLimitation", + "dataUseRequirement" + ], + "title": "Usage", + "type": "object" + } + }, + "additionalProperties": false, + "properties": { + "required": { + "allOf": [ + { + "$ref": "#/$defs/Required" + } + ], + "description": "required metadata needed for the GWDM" + }, + "summary": { + "allOf": [ + { + "$ref": "#/$defs/Summary" + } + ], + "description": "Summary of metadata describing key pieces of information." + }, + "coverage": { + "anyOf": [ + { + "$ref": "#/$defs/Coverage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Observational, Spatial and Temporal coverage", + "title": "Coverage" + }, + "provenance": { + "anyOf": [ + { + "$ref": "#/$defs/Provenance" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Provenance information", + "title": "Provenance" + }, + "accessibility": { + "allOf": [ + { + "$ref": "#/$defs/Accessibility" + } + ], + "default": null, + "description": "Accessibility information.", + "title": "Accessibility" + }, + "linkage": { + "anyOf": [ + { + "$ref": "#/$defs/Linkage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Linkage and enrichment.", + "title": "Linkage" + }, + "observations": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Observation" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Obsservations", + "title": "Observations" + }, + "structuralMetadata": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/DataTable" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "title": "Structural Metadata" + }, + "tissuesSampleCollection": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/TissuesSampleCollection" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Metadata collection for Tissue Samples datasets", + "title": "Tissues Sample Collection" + } + }, + "required": [ + "required", + "summary" + ], + "title": "Gwdm12", + "type": "object" +} \ No newline at end of file diff --git a/hdr_schemata/models/GWDM/create_json_schema.py b/hdr_schemata/models/GWDM/create_json_schema.py index b8c2254..8a5f0e0 100644 --- a/hdr_schemata/models/GWDM/create_json_schema.py +++ b/hdr_schemata/models/GWDM/create_json_schema.py @@ -1,7 +1,7 @@ -from pydantic import ValidationError from v1_0 import Gwdm10 from v1_1 import Gwdm11 -import json +from v1_2 import Gwdm12 Gwdm10.save_schema("1.0/schema.json") Gwdm11.save_schema("1.1/schema.json") +Gwdm12.save_schema("1.2/schema.json") diff --git a/hdr_schemata/models/GWDM/v1_0/__init__.py b/hdr_schemata/models/GWDM/v1_0/__init__.py index 26758fc..946a55d 100644 --- a/hdr_schemata/models/GWDM/v1_0/__init__.py +++ b/hdr_schemata/models/GWDM/v1_0/__init__.py @@ -15,62 +15,61 @@ from .Linkage import Linkage from .Observations import Observation from .DataTable import DataTable + from .Usage import Usage +from .Access import Access + class Gwdm10(BaseModel): class Config: - extra = 'forbid' + extra = "forbid" required: Required = Field( - ..., - description='required metadata needed for the GWDM', - title='Required' + ..., description="required metadata needed for the GWDM", title="Required" ) summary: Summary = Field( ..., - description='Summary of metadata describing key pieces of information.', - title='Summary', + description="Summary of metadata describing key pieces of information.", + title="Summary", ) - + coverage: Optional[Coverage] = Field( None, - description='Spatial and Temporal coverage', - title='Coverage', + description="Spatial and Temporal coverage", + title="Coverage", ) provenance: Optional[Provenance] = Field( None, - description='Provenance information', - title='Provenance', + description="Provenance information", + title="Provenance", ) - + accessibility: Accessibility = Field( None, - description='Accessibility information.', - title='Accessibility', + description="Accessibility information.", + title="Accessibility", ) - + linkage: Optional[Linkage] = Field( None, - description='Linkage and enrichment.', - title='Linkage', + description="Linkage and enrichment.", + title="Linkage", ) - + observations: Optional[List[Observation]] = Field( None, - description='Obsservations', - title='Observations', + description="Obsservations", + title="Observations", ) structuralMetadata: Optional[List[DataTable]] = Field( None, - description='Descriptions of all tables and data elements that can be included in the dataset', - title='Structural Metadata', + description="Descriptions of all tables and data elements that can be included in the dataset", + title="Structural Metadata", ) - @classmethod - def save_schema(cls,location='./1.0/schema.json'): - with open(location,'w') as f: - json.dump(cls.model_json_schema(),f,indent=6) - + def save_schema(cls, location="./1.0/schema.json"): + with open(location, "w") as f: + json.dump(cls.model_json_schema(), f, indent=6) diff --git a/hdr_schemata/models/GWDM/v1_1/__init__.py b/hdr_schemata/models/GWDM/v1_1/__init__.py index 4ba9a34..42e3d7b 100644 --- a/hdr_schemata/models/GWDM/v1_1/__init__.py +++ b/hdr_schemata/models/GWDM/v1_1/__init__.py @@ -1,10 +1,12 @@ from hdr_schemata.models.GWDM import Gwdm10 + +from hdr_schemata.models.GWDM.v1_0 import * from .Coverage import Coverage from .Accessibility import Accessibility from .Required import Required from .Summary import Summary from .TissuesSampleCollection import TissuesSampleCollection -from typing import Optional,List +from typing import Optional, List from pydantic import Field diff --git a/hdr_schemata/models/GWDM/v1_2/Access.py b/hdr_schemata/models/GWDM/v1_2/Access.py new file mode 100644 index 0000000..4bbc6cd --- /dev/null +++ b/hdr_schemata/models/GWDM/v1_2/Access.py @@ -0,0 +1,14 @@ +from typing import Optional +from pydantic import Field +from hdr_schemata.definitions.HDRUK import CommaSeparatedValues + +from hdr_schemata.models.GWDM.v1_1 import Access as BaseAccess + + +class Access(BaseAccess): + accessServiceCategory: Optional[CommaSeparatedValues] = Field( + None, + description="Where access to data come from: TRE/SED, direct access, open acccess, varies based on project.", + example="", + title="Access/governance requirementss", + ) diff --git a/hdr_schemata/models/GWDM/v1_2/Accessibility.py b/hdr_schemata/models/GWDM/v1_2/Accessibility.py new file mode 100644 index 0000000..5f1cdab --- /dev/null +++ b/hdr_schemata/models/GWDM/v1_2/Accessibility.py @@ -0,0 +1,13 @@ +from pydantic import Field +from hdr_schemata.definitions.HDRUK import * + +from hdr_schemata.models.GWDM.v1_1 import Accessibility as BaseAccessibility +from .Access import Access + + +class Accessibility(BaseAccessibility): + access: Access = Field( + ..., + description="This section includes information about data access", + title="Access", + ) diff --git a/hdr_schemata/models/GWDM/v1_2/Linkage.py b/hdr_schemata/models/GWDM/v1_2/Linkage.py new file mode 100644 index 0000000..f95c90e --- /dev/null +++ b/hdr_schemata/models/GWDM/v1_2/Linkage.py @@ -0,0 +1,13 @@ +from typing import Optional +from hdr_schemata.models.GWDM.v1_1 import Linkage as BaseLinkage +from pydantic import Field +from hdr_schemata.definitions.HDRUK import CommaSeparatedValues + + +class Linkage(BaseLinkage): + syntheticDataWebLink: Optional[CommaSeparatedValues] = Field( + None, + description="Links to locations of information and or raw downloads of synthetic data associated with this dataset", + example="", + title="Synthetic Data Web Links", + ) diff --git a/hdr_schemata/models/GWDM/v1_2/__init__.py b/hdr_schemata/models/GWDM/v1_2/__init__.py new file mode 100644 index 0000000..5165ce7 --- /dev/null +++ b/hdr_schemata/models/GWDM/v1_2/__init__.py @@ -0,0 +1,22 @@ +from hdr_schemata.models.GWDM import Gwdm11 +from hdr_schemata.models.GWDM.v1_1 import * + +from .Accessibility import Accessibility +from .Linkage import Linkage + +from typing import Optional, List +from pydantic import Field + + +class Gwdm12(Gwdm11): + linkage: Optional[Linkage] = Field( + None, + description="Linkage and enrichment.", + title="Linkage", + ) + + accessibility: Accessibility = Field( + None, + description="Accessibility information.", + title="Accessibility", + ) diff --git a/hdr_schemata/models/HDRUK/2.2.1/schema.json b/hdr_schemata/models/HDRUK/2.2.1/schema.json new file mode 100644 index 0000000..b527cab --- /dev/null +++ b/hdr_schemata/models/HDRUK/2.2.1/schema.json @@ -0,0 +1,2055 @@ +{ + "$defs": { + "AbstractText": { + "anyOf": [ + { + "maxLength": 500, + "minLength": 5, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "AbstractText" + }, + "Access": { + "additionalProperties": false, + "properties": { + "accessRights": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Optional link(s) or a description of where the license associated to accessing this dataset", + "example": "https://raw.githubusercontent.com/HDRUK/papers/master/LICENSE", + "title": "Access Rights" + }, + "accessService": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "", + "example": "The SAIL Databank is powered by the UK Secure e-Research Platform (UKSeRP). Following approval through safeguard processes, access to project-specific data within the secure environment is permitted using two-factor authentication.", + "title": "Access Service" + }, + "accessRequestCost": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "", + "example": "Data provision is free from SAIL. Overall project costing depends on the number of people that require access to the SAIL Gateway, the activities that SAIL needs to complete (e.g. loading non-standard datasets), data refreshes, analytical work required, disclosure control process, and special case technological requirements.", + "title": "Organisation Access Request Cost" + }, + "deliveryLeadTime": { + "anyOf": [ + { + "$ref": "#/$defs/DeliveryLeadTime" + }, + { + "type": "null" + } + ], + "default": null, + "description": "An arbitrary guess at the time to gain access to the dataset...", + "example": "2-6 MONTHS", + "title": "Access Request Duration" + }, + "jurisdiction": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Comma separated country codes of where the data jurisdiction is.", + "example": "GB-WLS,GB-GBN,GB-SCT", + "title": "Jurisdiction" + }, + "dataController": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "description": "Name of the data controller", + "example": "SAIL Databank", + "title": "Data Controller" + }, + "dataProcessor": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Name of the data processors", + "example": "SAIL Databank", + "title": "Data Processor" + }, + "accessServiceCategory": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/AccessService" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Where access to data come from: TRE/SED, direct access, open acccess, varies based on project.", + "example": "", + "title": "Access/governance requirementss" + } + }, + "required": [ + "accessRights", + "jurisdiction", + "dataController" + ], + "title": "Access", + "type": "object" + }, + "AccessService": { + "enum": [ + "TRE/SDE", + "Direct access", + "Open access", + "Varies based on project" + ], + "title": "AccessService", + "type": "string" + }, + "Accessibility": { + "additionalProperties": false, + "properties": { + "usage": { + "anyOf": [ + { + "$ref": "#/$defs/Usage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "This section includes information about how the data can be used and how it is currently being used", + "title": "Usage" + }, + "access": { + "allOf": [ + { + "$ref": "#/$defs/Access" + } + ], + "description": "This section includes information about data access" + }, + "formatAndStandards": { + "anyOf": [ + { + "$ref": "#/$defs/FormatAndStandards" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Section includes technical attributes for language vocabularies, sizes etc. and gives researchers facts about and processing the underlying data in the dataset.", + "title": "Format and Standards" + } + }, + "required": [ + "access" + ], + "title": "Accessibility", + "type": "object" + }, + "AgeRange": { + "anyOf": [ + { + "pattern": "Not Known|(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])-(150|1[0-4][0-9]|[0-9]|[1-8][0-9]|9[0-9])", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "AgeRange" + }, + "AnthropometricType": { + "enum": [ + "Blood Pressure", + "Hip Circumference", + "Height", + "Waist Circumference", + "Weight" + ], + "title": "AnthropometricType", + "type": "string" + }, + "BiologicalSampleType": { + "enum": [ + "Blood", + "Other", + "Urine", + "Saliva" + ], + "title": "BiologicalSampleType", + "type": "string" + }, + "CommaSeparatedValues": { + "anyOf": [ + { + "pattern": "([^,]+)", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "CommaSeparatedValues" + }, + "Coverage": { + "additionalProperties": false, + "properties": { + "spatial": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "default": null, + "description": "List of countries where the data was taken from", + "example": "United Kingdom,Wales,England", + "title": "Spatial" + }, + "pathway": { + "anyOf": [ + { + "$ref": "#/$defs/LongDescription" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Long description of the clinical/diagnostic/treatment pathway if applicable. This could include if the dataset is from a single speciality or area, a single tier of care, linked across two tier (e.g. primary and secondary care), or an integrated care record covering the whole patient pathway.", + "example": "The lookup contains references to link data held elsewhere on:\n\u2022 individuals appearing as defendants in criminal cases dealt with by the magistrates' or Crown Court in England and Wales (including Youth Courts). \n\u2022 individuals supervised by the probation service in England and Wales\n\u2022 individuals serving custodial sentences in England & Wales who appear within records from the prison data source, p-NOMIS. Young Offenders are included if resident at prisons or Young Offender Institutes (YOIs) that use p-NOMIS, however, this excludes the majority of Secure Schools and Secure Training Centres. \"\n\n\"The linking dataset includes a person ID and link to record in other data first datasets for: \n\u2022 Disposals in the magistrates\u2019 court from 1 January 2011 to 31 December 2020\n\u2022 Disposals in the Crown Court from 1 January 2013 to 31 December 2020\n\u2022 Custodial sentences of offenders in custody from January 2011 to September 2021 (including sentences begun before 2011) \n\u2022 Offender probation records from January 2014 to December 2020.", + "title": "Pathway" + }, + "followup": { + "anyOf": [ + { + "$ref": "#/$defs/Followup" + }, + { + "type": "null" + } + ], + "default": null, + "description": "What is the typical time span that a patient appears in the dataset (follow up period)", + "example": "CONTINUOUS", + "title": "Followup" + }, + "typicalAgeRange": { + "anyOf": [ + { + "$ref": "#/$defs/AgeRange" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Age range in whole years of participants in the dataset. Please provide range in the following format '[min age] \u2013 [max age]' where both the minimum and maximum are whole numbers (integers).", + "example": "1-150", + "title": "Typical Age Range" + }, + "gender": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/GenderType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Male, Female, Other", + "title": "Gender" + }, + "biologicalsamples": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/BiologicalSampleType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Blood, Saliva, Urine, Other", + "title": "Biological Samples" + }, + "psychological": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/PsychologicalType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Mental health, Cognitive function", + "title": "Psychological" + }, + "physical": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/PhysicalType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Cardiovascular, Respiratory, Musculoskeletal, Hearing and Vision, Reproductive", + "title": "Physical" + }, + "anthropometric": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/AnthropometricType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Height, Weight, Waist circumference, Hip circumference, Blood pressure", + "title": "Anthropometric" + }, + "lifestyle": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/LifestylesType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Cohort lifestyle habits: Smoking, Physical activity, Dietary habits, Alcohol", + "title": "Lifestyle" + }, + "socioeconomic": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/SocioEconomicType" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Occupation, Family circumstances, Housing, Education, Ethnic group, Martial status, Social support", + "title": "Socio-economic" + } + }, + "title": "Coverage", + "type": "object" + }, + "DataClass": { + "additionalProperties": false, + "properties": { + "name": { + "anyOf": [ + { + "maxLength": 500, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The name of a table in a dataset.", + "title": "Table Name" + }, + "description": { + "anyOf": [ + { + "maxLength": 20000, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of a table in a dataset.", + "title": "Table Description" + }, + "elements": { + "description": "A list of data elements contained within a table in a dataset.", + "items": { + "$ref": "#/$defs/DataElement" + }, + "title": "Data Elements", + "type": "array" + } + }, + "required": [ + "name", + "elements" + ], + "title": "DataClass", + "type": "object" + }, + "DataElement": { + "additionalProperties": true, + "properties": { + "name": { + "allOf": [ + { + "$ref": "#/$defs/Name" + } + ], + "description": "The name of a column in a table.", + "title": "Column Name" + }, + "dataType": { + "description": "The data type of values in the column", + "title": "Data Type", + "type": "string" + }, + "description": { + "anyOf": [ + { + "maxLength": 20000, + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A description of a column in a table.", + "title": "Column Description" + }, + "sensitive": { + "description": "A True or False value, indicating if the field is sensitive or not", + "title": "Sensitive", + "type": "boolean" + } + }, + "required": [ + "name", + "dataType", + "sensitive" + ], + "title": "DataElement", + "type": "object" + }, + "DatasetType": { + "anyOf": [ + { + "maxLength": 100, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "DatasetType" + }, + "DeliveryLeadTime": { + "enum": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NOT APPLICABLE", + "OTHER", + null + ], + "title": "DeliveryLeadTime" + }, + "Description": { + "anyOf": [ + { + "maxLength": 10000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Description" + }, + "Documentation": { + "additionalProperties": false, + "properties": { + "description": { + "anyOf": [ + { + "$ref": "#/$defs/Description" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A free-text description of the record.", + "title": "Description" + }, + "associatedMedia": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide any media associated with the Gateway Organisation using a valid URI for the content. This is an opportunity to provide additional context that could be useful for researchers wanting to understand more about the dataset and its relevance to their research question. The following formats will be accepted .jpg, .png or .svg, .pdf, .xslx or .docx. Note: media asset can be hosted by the organisation or uploaded using the onboarding portal.", + "examples": [ + "PDF Document that describes study protocol" + ], + "title": "Associated Media" + }, + "isPartOf": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + { + "$ref": "#/$defs/IsPartOfEnum" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": "NOT APPLICABLE", + "description": "Please complete only if the dataset is part of a group or family", + "examples": [ + "Hospital Episodes Statistics datasets (A&E, APC, OP, AC MSDS)." + ], + "title": "Group" + } + }, + "title": "Documentation", + "type": "object" + }, + "Doi": { + "anyOf": [ + { + "pattern": "^10.\\d{4,9}/[-._;()/:a-zA-Z0-9]+$", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Doi" + }, + "EmailAddress": { + "anyOf": [ + { + "format": "email", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "EmailAddress" + }, + "EndDateEnum": { + "enum": [ + "CONTINUOUS", + null + ], + "title": "EndDateEnum" + }, + "EnrichmentAndLinkage": { + "additionalProperties": false, + "properties": { + "qualifiedRelation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "If applicable, please provide the DOI of other datasets that have previously been linked to this dataset and their availability. If no DOI is available, please provide the title of the datasets that can be linked, where possible using the same title of a dataset previously onboarded to the HOP. Note: If all the datasets from Gateway organisation can be linked please indicate \u201cALL\u201d and the onboarding portal will automate linkage across the datasets submitted.", + "title": "Linked Datasets" + }, + "derivation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Indicate if derived datasets or predefined extracts are available and the type of derivation available. Notes. Single or multiple dimensions can be provided as a derived extract alongside the dataset.", + "title": "Derivations" + }, + "tools": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide the URL of any analysis tools or models that have been created for this dataset and are available for further use. Multiple tools may be provided. Note: We encourage users to adopt a model along the lines of https://www.ga4gh.org/news/tool-registry-service-api-enabling-an-interoperable-library-of-genomics-analysis-tools/", + "title": "Tools" + }, + "syntheticDataWebLink": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/Url" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Links to locations of information and or raw downloads of synthetic data associated with this dataset", + "example": "", + "title": "Synthetic Data Web Links" + } + }, + "title": "EnrichmentAndLinkage", + "type": "object" + }, + "Followup": { + "enum": [ + "0 - 6 MONTHS", + "6 - 12 MONTHS", + "1 - 10 YEARS", + "> 10 YEARS", + "UNKNOWN", + "CONTINUOUS", + "OTHER", + null + ], + "title": "Followup" + }, + "FormatAndStandards": { + "additionalProperties": false, + "properties": { + "vocabularyEncodingSchemes": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Code value of the ontology vocabulary encoding", + "example": "OPCS4,NHS NATIONAL CODES,ICD10,OTHER", + "title": "Controlled Vocabulary" + }, + "conformsTo": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "What the vocabulary conforms to.", + "example": "LOCAL,NHS DATA DICTIONARY", + "title": "Conforms To" + }, + "languages": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Language code(s) of the language of the dataset metadata and underlying data is made available.", + "example": "en", + "title": "Language Code(s)" + }, + "formats": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Format(s) the dataset can be made available in", + "example": "CSV,JSON,SQL database table", + "title": "Dataset Format" + } + }, + "required": [ + "vocabularyEncodingSchemes", + "conformsTo", + "languages", + "formats" + ], + "title": "FormatAndStandards", + "type": "object" + }, + "GenderType": { + "enum": [ + "Male", + "Female", + "Other" + ], + "title": "GenderType", + "type": "string" + }, + "ICD_0_3": { + "anyOf": [ + { + "pattern": "^[C\\d]{3}\\.\\d{4}\\/\\d{1,4}$", + "type": "string" + }, + { + "type": "null" + } + ], + "title": "ICD_0_3" + }, + "IsPartOfEnum": { + "const": "NOT APPLICABLE", + "title": "IsPartOfEnum" + }, + "LifestylesType": { + "enum": [ + "Smoking", + "Dietary Habits", + "Physical Activity", + "Alcohol" + ], + "title": "LifestylesType", + "type": "string" + }, + "LongDescription": { + "anyOf": [ + { + "maxLength": 50000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "LongDescription" + }, + "MaterialTypeCategories": { + "enum": [ + "Blood", + "DNA", + "Faeces", + "Immortalized Cell Lines", + "Isolated Pathogen", + "Other", + "Plasma", + "RNA", + "Saliva", + "Serum", + "Tissue (Frozen)", + "Tissue (FFPE)", + "Urine" + ], + "title": "MaterialTypeCategories", + "type": "string" + }, + "MeasuredProperty": { + "title": "MeasuredProperty" + }, + "MemberOf": { + "enum": [ + "HUB", + "ALLIANCE", + "OTHER", + "NCS" + ], + "title": "MemberOf", + "type": "string" + }, + "Name": { + "title": "Name" + }, + "Observation": { + "additionalProperties": false, + "properties": { + "observedNode": { + "allOf": [ + { + "$ref": "#/$defs/StatisticalPopulationConstrained" + } + ], + "description": "Please select one of the following statistical populations for you observation", + "examples": [ + "PERSONS" + ], + "title": "Statistical Population" + }, + "measuredValue": { + "description": "Please provide the population size associated with the population type the dataset i.e. 1000 people in a study, or 87 images (MRI) of Knee Usage Note: Used with Statistical Population, which specifies the type of the population in the dataset.", + "title": "Measured Value", + "type": "integer" + }, + "disambiguatingDescription": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ], + "default": null, + "description": "If SNOMED CT term does not provide sufficient detail, please provide a description that disambiguates the population type.", + "title": "Disambiguating Description" + }, + "observationDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + } + ], + "description": "Please provide the date that the observation was made. Some datasets may be continuously updated and the number of records will change regularly, so the observation date provides users with the date that the analysis or query was run to generate the particular observation. Multiple observations can be made i.e. an observation of cumulative COVID positive cases by specimen on the 1/1/2021 could be 2M. On the 8/1/2021 a new observation could be 2.1M. Users can add multiple observations.", + "title": "Observation Date" + }, + "measuredProperty": { + "allOf": [ + { + "$ref": "#/$defs/MeasuredProperty" + } + ], + "description": "Initially this will be defaulted to \"COUNT\"", + "title": "Measured Property" + } + }, + "required": [ + "observedNode", + "measuredValue", + "observationDate", + "measuredProperty" + ], + "title": "Observation", + "type": "object" + }, + "OneHundredFiftyCharacters": { + "maxLength": 150, + "minLength": 2, + "title": "OneHundredFiftyCharacters", + "type": "string" + }, + "Origin": { + "additionalProperties": false, + "properties": { + "purpose": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Purpose" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Pleases indicate the purpose(s) that the dataset was collected.", + "title": "Purpose" + }, + "source": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Source" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Pleases indicate the source of the data extraction", + "title": "Source" + }, + "collectionSituation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/Setting" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Pleases indicate the setting(s) where data was collected. Multiple settings may be provided", + "title": "Setting" + } + }, + "title": "Origin", + "type": "object" + }, + "Periodicity": { + "enum": [ + "STATIC", + "IRREGULAR", + "CONTINUOUS", + "BIENNIAL", + "ANNUAL", + "BIANNUAL", + "QUARTERLY", + "BIMONTHLY", + "MONTHLY", + "BIWEEKLY", + "WEEKLY", + "SEMIWEEKLY", + "DAILY", + "OTHER", + null + ], + "title": "Periodicity" + }, + "PhysicalType": { + "enum": [ + "Respiratory", + "Vision", + "Hearing", + "Musculoskeletal", + "Cardiovascular", + "Reproductive" + ], + "title": "PhysicalType", + "type": "string" + }, + "Provenance": { + "additionalProperties": false, + "properties": { + "origin": { + "anyOf": [ + { + "$ref": "#/$defs/Origin" + }, + { + "type": "null" + } + ], + "default": null + }, + "temporal": { + "$ref": "#/$defs/Temporal" + } + }, + "required": [ + "temporal" + ], + "title": "Provenance", + "type": "object" + }, + "PsychologicalType": { + "enum": [ + "Cognitive Function", + "Mental Health" + ], + "title": "PsychologicalType", + "type": "string" + }, + "Purpose": { + "enum": [ + "STUDY", + "DISEASE REGISTRY", + "TRIAL", + "CARE", + "AUDIT", + "ADMINISTRATIVE", + "FINANCIAL", + "STATUTORY", + "OTHER", + null + ], + "title": "Purpose" + }, + "Revision": { + "additionalProperties": false, + "properties": { + "version": { + "allOf": [ + { + "$ref": "#/$defs/Semver" + } + ], + "description": "Semantic Version" + }, + "url": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "description": "URL endpoint to obtain the version" + } + }, + "required": [ + "version", + "url" + ], + "title": "Revision", + "type": "object" + }, + "Semver": { + "pattern": "^([0-9]+)\\.([0-9]+)\\.([0-9]+)$", + "title": "Semver", + "type": "string" + }, + "Setting": { + "enum": [ + "CLINIC", + "PRIMARY CARE", + "ACCIDENT AND EMERGENCY", + "OUTPATIENTS", + "IN-PATIENTS", + "SERVICES", + "COMMUNITY", + "HOME", + "PRIVATE", + "PHARMACY", + "SOCIAL CARE", + "LOCAL AUTHORITY", + "NATIONAL GOVERNMENT", + "OTHER" + ], + "title": "Setting", + "type": "string" + }, + "ShortDescription": { + "anyOf": [ + { + "maxLength": 1000, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "ShortDescription" + }, + "SocioEconomicType": { + "enum": [ + "Finances", + "Family Circumstances", + "Housing", + "Education", + "Marital Status", + "Occupation", + "Ethnic Group", + "Social Support" + ], + "title": "SocioEconomicType", + "type": "string" + }, + "Source": { + "enum": [ + "EPR", + "ELECTRONIC SURVEY", + "LIMS", + "OTHER INFORMATION SYSTEM", + "PAPER BASED", + "FREETEXT NLP", + "MACHINE GENERATED", + "OTHER" + ], + "title": "Source", + "type": "string" + }, + "StatisticalPopulationConstrained": { + "enum": [ + "PERSONS", + "EVENTS", + "FINDINGS" + ], + "title": "StatisticalPopulationConstrained", + "type": "string" + }, + "Summary": { + "additionalProperties": false, + "properties": { + "title": { + "allOf": [ + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + } + ], + "description": "Title of the dataset limited to 150 characters. It should provide a short description of the dataset and be unique across the gateway. If your title is not unique, please add a prefix with your organisation name or identifier to differentiate it from other datasets within the Gateway. Please avoid acronyms wherever possible. Good titles should summarise the content of the dataset and if relevant, the region the dataset covers.", + "examples": [ + [ + "North West London COVID-19 Patient Level Situation Report" + ] + ], + "title": "Title" + }, + "abstract": { + "anyOf": [ + { + "$ref": "#/$defs/AbstractText" + }, + { + "type": "null" + } + ], + "description": "Provide a clear and brief descriptive signpost for researchers who are searching for data that may be relevant to their research. The abstract should allow the reader to determine the scope of the data collection and accurately summarise its content. The optimal length is one paragraph (limited to 255 characters) and effective abstracts should avoid long sentences and abbreviations where possible", + "examples": [ + "CPRD Aurum contains primary care data contributed by General Practitioner (GP) practices using EMIS Web\u00ae including patient registration information and all care events that GPs have chosen to record as part of their usual medical practice." + ], + "title": "Dataset Abstract" + }, + "publisher": { + "allOf": [ + { + "$ref": "#/$defs/hdr_schemata__models__HDRUK__v2_1_2__Organisation__Organisation" + } + ], + "description": "This is the organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member). However, in some cases this will be different i.e. Tissue Directory are an HDR UK Gateway organisation but coordinate activities across a number of data publishers i.e. Cambridge Blood and Stem Cell Biobank.", + "title": "Dataset publisher" + }, + "contactPoint": { + "anyOf": [ + { + "$ref": "#/$defs/EmailAddress" + }, + { + "type": "null" + } + ], + "description": "Please provide a valid email address that can be used to coordinate data access requests with the publisher. Organisations are expected to provide a dedicated email address associated with the data access request process. Notes- An employee's email address can only be provided on a temporary basis and if one is provided an explicit consent must be obtained for this purpose.", + "examples": [ + "SAILDatabank@swansea.ac.uk" + ], + "title": "Contact Point" + }, + "keywords": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "$ref": "#/$defs/OneHundredFiftyCharacters" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Please provide relevant and specific keywords that can improve the SEO of your dataset as a comma separated list. Notes: Onboarding portal will suggest keywords based on title, abstract and description. We are compiling a standardised list of keywords and synonyms across datasets to make filtering easier for users.", + "title": "Keywords" + }, + "alternateIdentifiers": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/ShortDescription" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Alternate dataset identifiers or local identifiers", + "title": "Alternate dataset identifiers" + }, + "doiName": { + "anyOf": [ + { + "$ref": "#/$defs/Doi" + }, + { + "type": "null" + } + ], + "default": null, + "description": "All HDR UK registered datasets should either have a Digital Object Identifier (DOI) or be working towards obtaining one. If a DOI is available, please provide the DOI.", + "examples": [ + "10.3399/bjgp17X692645" + ], + "title": "Digital Object Identifier" + }, + "datasetType": { + "anyOf": [ + { + "$ref": "#/$defs/DatasetType" + }, + { + "type": "null" + } + ], + "description": "Placeholder for dataset type", + "examples": [ + [ + "" + ] + ], + "title": "Datasetype" + }, + "datasetSubType": { + "anyOf": [ + { + "$ref": "#/$defs/DatasetType" + }, + { + "type": "null" + } + ], + "description": "Placeholder for dataset sub-type", + "examples": [ + [ + "" + ] + ], + "title": "Datasetype" + }, + "populationSize": { + "anyOf": [ + { + "type": "integer" + }, + { + "type": "null" + } + ], + "description": "Summary population size of the cohort", + "title": "Population size" + } + }, + "required": [ + "title", + "abstract", + "publisher", + "contactPoint", + "keywords", + "datasetType", + "datasetSubType", + "populationSize" + ], + "title": "Summary", + "type": "object" + }, + "Temporal": { + "additionalProperties": false, + "properties": { + "distributionReleaseDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Date of the latest release of the dataset. If this is a regular release i.e. quarterly, or this is a static dataset please complete this alongside Periodicity. If this is Irregular or Continuously released please leave this blank. Notes: Periodicity and release date will be used to determine when the next release is expected. E.g. if the release date is documented as 01/01/2020 and it is now 20/04/2020 and there is a quarterly release schedule, the latest release will be calculated as 01/04/2020.", + "title": "Release Date" + }, + "startDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "description": "The start of the time period that the dataset provides coverage for. If there are multiple cohorts in the dataset with varying start dates, please provide the earliest date and use the description or the media attribute to provide more information.", + "title": "Start Date" + }, + "endDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "$ref": "#/$defs/EndDateEnum" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The end of the time period that the dataset provides coverage for. If the dataset is \u201cContinuous\u201d and has no known end date, please state continuous. If there are multiple cohorts in the dataset with varying end dates, please provide the latest date and use the description or the media attribute to provide more information.", + "title": "End Date" + }, + "timeLag": { + "allOf": [ + { + "$ref": "#/$defs/TimeLag" + } + ], + "description": "Please indicate the typical time-lag between an event and the data for that event appearing in the dataset", + "title": "Time Lag" + }, + "publishingFrequency": { + "allOf": [ + { + "$ref": "#/$defs/Periodicity" + } + ], + "description": "Please indicate the frequency of distribution release. If a dataset is distributed regularly please choose a distribution release periodicity from the constrained list and indicate the next release date. When the release date becomes historical, a new release date will be calculated based on the publishing periodicity. If a dataset has been published and will remain static please indicate that it is static and indicated when it was released. If a dataset is released on an irregular basis or \u201con-demand\u201d please indicate that it is Irregular and leave release date as null. If a dataset can be published in real-time or near-real-time please indicate that it is continuous and leave release date as null. Notes: see https://www.dublincore.org/specifications/dublin-core/collection-description/frequency/", + "title": "Publishing Frequency" + } + }, + "required": [ + "startDate", + "timeLag", + "publishingFrequency" + ], + "title": "Temporal", + "type": "object" + }, + "TimeLag": { + "enum": [ + "LESS 1 WEEK", + "1-2 WEEKS", + "2-4 WEEKS", + "1-2 MONTHS", + "2-6 MONTHS", + "MORE 6 MONTHS", + "VARIABLE", + "NO TIMELAG", + "NOT APPLICABLE", + "OTHER", + null + ], + "title": "TimeLag" + }, + "TissueCollectionTypeEnum": { + "enum": [ + "Case-control", + "Cohort", + "Cross-sectional", + "Longitudinal", + "Twin-study", + "Quality control", + "Population-based", + "Disease specific", + "Birth cohort", + "Other" + ], + "title": "TissueCollectionTypeEnum", + "type": "string" + }, + "TissueDataCategoriesEnum": { + "enum": [ + "Biological samples", + "Survey data", + "Imaging data", + "Medical records", + "National registries", + "Genealogical records", + "Physiological/Biochemical measurements", + "Other" + ], + "title": "TissueDataCategoriesEnum", + "type": "string" + }, + "TissueSampleMetadata": { + "properties": { + "creationDate": { + "anyOf": [ + { + "format": "date", + "type": "string" + }, + { + "format": "date-time", + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Date when the tissue sample metadata was created", + "title": "Creation Date" + }, + "AnatomicalSiteOntologyCode": { + "anyOf": [ + { + "$ref": "#/$defs/ICD_0_3" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Ontology code for the anatomical site, this code must match an ICD-0-3 format", + "title": "Anatomical Site Ontology Code" + } + }, + "title": "TissueSampleMetadata", + "type": "object" + }, + "TissuesSampleCollection": { + "properties": { + "dataCategories": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/TissueDataCategoriesEnum" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The type of data that is associated with the samples in the study. Can be several values MIABIS-2.0-13", + "title": "Data Categories" + }, + "materialType": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/MaterialTypeCategories" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The biospecimen saved from a biological entity for propagation e.g. testing, diagnostics, treatment or research purposes. Can be several values MIABIS-2.0-14", + "title": "Material Type" + }, + "tissueSampleMetadata": { + "anyOf": [ + { + "$ref": "#/$defs/TissueSampleMetadata" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Metadata related to the tissue sample", + "title": "Tissue Sample Metadata" + }, + "collectionType": { + "anyOf": [ + { + "$ref": "#/$defs/TissueCollectionTypeEnum" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The type of the sample collection. Can be several values [MIABIS-2.0-16](https://github.com/BBMRI-ERIC/miabis/blob/master/Structured-data-and-lists.md#collection-type)", + "title": "Collection Type" + } + }, + "title": "TissuesSampleCollection", + "type": "object" + }, + "Url": { + "anyOf": [ + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "title": "Url" + }, + "Usage": { + "additionalProperties": false, + "properties": { + "dataUseLimitation": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Any restrictions to its usage", + "example": "GENERAL RESEARCH USE,PROJECT SPECIFIC RESTRICTIONS", + "title": "Data Use Limitation" + }, + "dataUseRequirement": { + "anyOf": [ + { + "$ref": "#/$defs/CommaSeparatedValues" + }, + { + "type": "null" + } + ], + "description": "Any requirements needed for data usage", + "example": "PROJECT SPECIFIC RESTRICTIONS,TIME LIMIT ON USE,USER SPECIFIC RESTRICTION", + "title": "Data Use Requirements" + }, + "resourceCreator": { + "anyOf": [ + { + "$ref": "#/$defs/hdr_schemata__models__GWDM__v1_1__Organisation__Organisation" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Who has created this resource", + "title": "Resource Creator" + } + }, + "required": [ + "dataUseLimitation", + "dataUseRequirement" + ], + "title": "Usage", + "type": "object" + }, + "Uuidv4": { + "maxLength": 36, + "minLength": 36, + "pattern": "^[a-fA-F0-9]{8}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{4}-[a-fA-F0-9]{12}$", + "title": "Uuidv4", + "type": "string" + }, + "hdr_schemata__models__GWDM__v1_1__Organisation__Organisation": { + "properties": { + "name": { + "anyOf": [ + { + "$ref": "#/$defs/Name" + }, + { + "type": "null" + } + ], + "description": "The organisation responsible for running or supporting the data access request process, as well as publishing and maintaining the metadata. In most this will be the same as the HDR UK Organisation (Hub or Alliance Member)/", + "example": "SAIL", + "title": "Name" + }, + "gatewayId": { + "anyOf": [ + { + "maxLength": 50, + "minLength": 2, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The link to an ID somewhere in the gateway where more information on the publisher can be retrieved.", + "title": "Publisher gateway id" + }, + "rorId": { + "anyOf": [ + { + "maxLength": 9, + "minLength": 9, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The Research Organization Registry (ROR) for the organisation, if applicable", + "title": "Research Organization Registry Identifier" + } + }, + "required": [ + "name" + ], + "title": "Organisation", + "type": "object" + }, + "hdr_schemata__models__HDRUK__v2_1_2__Organisation__Organisation": { + "additionalProperties": false, + "properties": { + "identifier": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a Grid.ac identifier (see https://www.grid.ac/institutes) for your organisation. If your organisation does not have a Grid.ac identifier please use the \u201csuggest and institute\u201d function here: https://www.grid.ac/institutes#", + "title": "Organisation Identifier" + }, + "name": { + "allOf": [ + { + "$ref": "#/$defs/OneHundredFiftyCharacters" + } + ], + "description": "Name of the organisation", + "title": "Organisation Name" + }, + "logo": { + "anyOf": [ + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a logo associated with the Gateway Organisation using a valid URL. The following formats will be accepted .jpg, .png or .svg.", + "title": "Organisation Logo" + }, + "description": { + "anyOf": [ + { + "$ref": "#/$defs/Description" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please provide a URL that describes the organisation.", + "title": "Organisation Description" + }, + "contactPoint": { + "anyOf": [ + { + "$ref": "#/$defs/EmailAddress" + }, + { + "items": { + "anyOf": [ + { + "$ref": "#/$defs/EmailAddress" + }, + { + "type": "null" + } + ] + }, + "type": "array" + }, + { + "type": "null" + } + ], + "description": "Organisation contact point(s)", + "title": "Organisation Contact Point" + }, + "memberOf": { + "anyOf": [ + { + "$ref": "#/$defs/MemberOf" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Please indicate if the organisation is an Alliance Member or a Hub.", + "title": "Organisation Membership" + } + }, + "required": [ + "name", + "contactPoint" + ], + "title": "Organisation", + "type": "object" + } + }, + "additionalProperties": false, + "properties": { + "identifier": { + "anyOf": [ + { + "$ref": "#/$defs/Uuidv4" + }, + { + "$ref": "#/$defs/Url" + }, + { + "type": "null" + } + ], + "description": "System dataset identifier", + "examples": [ + [ + "226fb3f1-4471-400a-8c39-2b66d46a39b6", + "https://web.www.healthdatagateway.org/dataset/226fb3f1-4471-400a-8c39-2b66d46a39b6" + ] + ], + "title": "Dataset identifier" + }, + "version": { + "allOf": [ + { + "$ref": "#/$defs/Semver" + } + ], + "description": "Dataset metadata version", + "examples": [ + "1.1.0" + ], + "title": "Dataset Version" + }, + "revisions": { + "description": "Revisions of Dataset metadata", + "items": { + "$ref": "#/$defs/Revision" + }, + "title": "Dataset Revisions", + "type": "array" + }, + "issued": { + "description": "Dataset Metadata Creation Date", + "format": "date-time", + "title": "Creation Date", + "type": "string" + }, + "modified": { + "description": "Dataset Metadata Creation Date", + "format": "date-time", + "title": "Modification Date", + "type": "string" + }, + "summary": { + "allOf": [ + { + "$ref": "#/$defs/Summary" + } + ], + "description": "Summary metadata must be completed by Data Custodians onboarding metadata into the Innovation Gateway MVP." + }, + "documentation": { + "anyOf": [ + { + "$ref": "#/$defs/Documentation" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Documentation can include a rich text description of the dataset or links to media such as documents, images, presentations, videos or links to data dictionaries, profiles or dashboards. Organisations are required to confirm that they have permission to distribute any additional media.", + "title": "Documentation" + }, + "coverage": { + "anyOf": [ + { + "$ref": "#/$defs/Coverage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Observational, Spatial and Temporal coverage", + "title": "Coverage" + }, + "provenance": { + "anyOf": [ + { + "$ref": "#/$defs/Provenance" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Provenance information allows researchers to understand data within the context of its origins and can be an indicator of quality, authenticity and timeliness.", + "title": "Provenance" + }, + "accessibility": { + "allOf": [ + { + "$ref": "#/$defs/Accessibility" + } + ], + "description": "Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets." + }, + "enrichmentAndLinkage": { + "anyOf": [ + { + "$ref": "#/$defs/EnrichmentAndLinkage" + }, + { + "type": "null" + } + ], + "default": null, + "description": "This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", + "title": "Enrichment and Linkage" + }, + "observations": { + "description": "Multiple observations about the dataset may be provided and users are expected to provide at least one observation (1..*). We will be supporting the schema.org observation model (https://schema.org/Observation) with default values. Users will be encouraged to provide their own statistical populations as the project progresses. Example: <b> Statistical Population 1 </b> type: StatisticalPopulation populationType: Persons numConstraints: 0 <b> Statistical Population 2 </b> type: StatisticalPopulation populationType: Events numConstraints: 0 <b> Statistical Population 3 </b> type: StatisticalPopulation populationType: Findings numConstraints: 0 typeOf: Observation observedNode: <b> Statistical Population 1 </b> measuredProperty: count measuredValue: 32937 observationDate: \u201c2017\u201d", + "items": { + "$ref": "#/$defs/Observation" + }, + "title": "Observations", + "type": "array" + }, + "structuralMetadata": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/DataClass" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Descriptions of all tables and data elements that can be included in the dataset", + "title": "Structural Metadata" + }, + "tissuesSampleCollection": { + "anyOf": [ + { + "items": { + "$ref": "#/$defs/TissuesSampleCollection" + }, + "type": "array" + }, + { + "type": "null" + } + ], + "default": null, + "description": "Metadata collection for Tissue Samples datasets", + "title": "Tissues Sample Collection" + } + }, + "required": [ + "identifier", + "version", + "revisions", + "issued", + "modified", + "summary", + "accessibility", + "observations" + ], + "title": "Hdruk221", + "type": "object" +} \ No newline at end of file diff --git a/hdr_schemata/models/HDRUK/__init__.py b/hdr_schemata/models/HDRUK/__init__.py index 1af2d8d..4bfaaac 100644 --- a/hdr_schemata/models/HDRUK/__init__.py +++ b/hdr_schemata/models/HDRUK/__init__.py @@ -1,3 +1,4 @@ from .v2_1_2 import Hdruk212 from .v2_1_3 import Hdruk213 from .v2_2_0 import Hdruk220 +from .v2_2_1 import Hdruk221 diff --git a/hdr_schemata/models/HDRUK/create_json_schema.py b/hdr_schemata/models/HDRUK/create_json_schema.py index 08f0b1a..d5233d6 100644 --- a/hdr_schemata/models/HDRUK/create_json_schema.py +++ b/hdr_schemata/models/HDRUK/create_json_schema.py @@ -1,9 +1,9 @@ -from pydantic import ValidationError import v2_1_2 import v2_1_3 import v2_2_0 -import json +import v2_2_1 v2_1_2.Hdruk212.save_schema() v2_1_3.Hdruk213.save_schema() v2_2_0.Hdruk220.save_schema() +v2_2_1.Hdruk221.save_schema() diff --git a/hdr_schemata/models/HDRUK/v2_1_3/__init__.py b/hdr_schemata/models/HDRUK/v2_1_3/__init__.py index 073749f..5b652f3 100644 --- a/hdr_schemata/models/HDRUK/v2_1_3/__init__.py +++ b/hdr_schemata/models/HDRUK/v2_1_3/__init__.py @@ -2,6 +2,8 @@ import json from typing import Optional from pydantic import Field + +from hdr_schemata.models.HDRUK.v2_1_2 import * from .Provenance import Provenance diff --git a/hdr_schemata/models/HDRUK/v2_2_0/__init__.py b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py index 4e2321e..9d6975c 100644 --- a/hdr_schemata/models/HDRUK/v2_2_0/__init__.py +++ b/hdr_schemata/models/HDRUK/v2_2_0/__init__.py @@ -3,6 +3,7 @@ from typing import Optional, List from pydantic import Field +from hdr_schemata.models.HDRUK.v2_1_3 import * from .TissuesSampleCollection import TissuesSampleCollection from .Summary import Summary from .Coverage import Coverage diff --git a/hdr_schemata/models/HDRUK/v2_2_1/Access.py b/hdr_schemata/models/HDRUK/v2_2_1/Access.py new file mode 100644 index 0000000..0265bb4 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_2_1/Access.py @@ -0,0 +1,14 @@ +from typing import Optional, List +from pydantic import Field +from hdr_schemata.definitions.HDRUK import AccessService + +from hdr_schemata.models.GWDM.v1_1 import Access as BaseAccess + + +class Access(BaseAccess): + accessServiceCategory: Optional[List[AccessService]] = Field( + None, + description="Where access to data come from: TRE/SED, direct access, open acccess, varies based on project.", + example="", + title="Access/governance requirementss", + ) diff --git a/hdr_schemata/models/HDRUK/v2_2_1/Accessibility.py b/hdr_schemata/models/HDRUK/v2_2_1/Accessibility.py new file mode 100644 index 0000000..5f1cdab --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_2_1/Accessibility.py @@ -0,0 +1,13 @@ +from pydantic import Field +from hdr_schemata.definitions.HDRUK import * + +from hdr_schemata.models.GWDM.v1_1 import Accessibility as BaseAccessibility +from .Access import Access + + +class Accessibility(BaseAccessibility): + access: Access = Field( + ..., + description="This section includes information about data access", + title="Access", + ) diff --git a/hdr_schemata/models/HDRUK/v2_2_1/EnrichmentAndLinkage.py b/hdr_schemata/models/HDRUK/v2_2_1/EnrichmentAndLinkage.py new file mode 100644 index 0000000..9dc6d8d --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_2_1/EnrichmentAndLinkage.py @@ -0,0 +1,16 @@ +from typing import Optional, List +from pydantic import Field +from hdr_schemata.models.HDRUK.v2_2_0 import ( + EnrichmentAndLinkage as BaseEnrichmentAndLinkage, +) + +from hdr_schemata.definitions.HDRUK import Url + + +class EnrichmentAndLinkage(BaseEnrichmentAndLinkage): + syntheticDataWebLink: Optional[List[Url]] = Field( + None, + description="Links to locations of information and or raw downloads of synthetic data associated with this dataset", + example="", + title="Synthetic Data Web Links", + ) diff --git a/hdr_schemata/models/HDRUK/v2_2_1/__init__.py b/hdr_schemata/models/HDRUK/v2_2_1/__init__.py new file mode 100644 index 0000000..0fa2541 --- /dev/null +++ b/hdr_schemata/models/HDRUK/v2_2_1/__init__.py @@ -0,0 +1,27 @@ +from hdr_schemata.models.HDRUK.v2_2_0 import Hdruk220 +import json +from typing import Optional, List +from pydantic import Field + +from .EnrichmentAndLinkage import EnrichmentAndLinkage +from .Accessibility import Accessibility + + +class Hdruk221(Hdruk220): + + enrichmentAndLinkage: Optional[EnrichmentAndLinkage] = Field( + None, + description="This section includes information about related datasets that may have previously been linked, as well as indicating if there is the opportunity to link to other datasets in the future. If a dataset has been enriched and/or derivations, scores and existing tools are available this section allows providers to indicate this to researchers.", + title="Enrichment and Linkage", + ) + + accessibility: Accessibility = Field( + ..., + description="Accessibility information allows researchers to understand access, usage, limitations, formats, standards and linkage or interoperability with toolsets.", + title="Accessibility", + ) + + @classmethod + def save_schema(cls, location="./2.2.1/schema.json"): + with open(location, "w") as f: + json.dump(cls.model_json_schema(), f, indent=6) diff --git a/hdr_schemata/utils/create_markdown.py b/hdr_schemata/utils/create_markdown.py index 665711b..caf1fb7 100644 --- a/hdr_schemata/utils/create_markdown.py +++ b/hdr_schemata/utils/create_markdown.py @@ -73,8 +73,6 @@ def get_fields(structure, model: type[BaseModel]): is_list, is_optional, type_names = extract_type_info(_type) - - value = { "name": name, "required": field.is_required(), @@ -87,11 +85,9 @@ def get_fields(structure, model: type[BaseModel]): "is_optional": is_optional, } - while hasattr(t, "__args__"): t = t.__args__[0] - - + if isinstance(t, type) and issubclass(t, BaseModel): subItems = [] get_fields(subItems, t) @@ -154,11 +150,15 @@ def create_markdown(Model, path, name): from hdr_schemata.models.HDRUK import Hdruk212 from hdr_schemata.models.HDRUK import Hdruk213 from hdr_schemata.models.HDRUK import Hdruk220 +from hdr_schemata.models.HDRUK import Hdruk221 from hdr_schemata.models.GWDM.v1_1 import Gwdm10 from hdr_schemata.models.GWDM.v1_1 import Gwdm11 +from hdr_schemata.models.GWDM.v1_2 import Gwdm12 create_markdown(Gwdm10, "./docs/GWDM/", "1.0") create_markdown(Gwdm11, "./docs/GWDM/", "1.1") +create_markdown(Gwdm12, "./docs/GWDM/", "1.2") create_markdown(Hdruk212, "./docs/HDRUK/", "2.1.2") create_markdown(Hdruk213, "./docs/HDRUK/", "2.1.3") create_markdown(Hdruk220, "./docs/HDRUK/", "2.2.0") +create_markdown(Hdruk221, "./docs/HDRUK/", "2.2.1")