Skip to content

Commit

Permalink
Merge pull request #8 from HDRUK/fix/remove_fields_bioschema
Browse files Browse the repository at this point in the history
Fix/remove fields bioschema
  • Loading branch information
calmacx authored Oct 2, 2023
2 parents ff71910 + 3b72631 commit afdb39e
Show file tree
Hide file tree
Showing 7 changed files with 20 additions and 269 deletions.
7 changes: 0 additions & 7 deletions hdr_schemata/examples/SchemaOrg/BioSchema/example.json
Original file line number Diff line number Diff line change
Expand Up @@ -9,18 +9,11 @@
"alternateName": "HDR UK Papers & Preprints",
"description": "Publications that mention HDR-UK (or any variant thereof) in Acknowledgements or Author Affiliations\n\nThis will include:\n- Papers\n- COVID-19 Papers\n- COVID-19 Preprint",
"citation": "10.1093/ije/dyx196",
"funder": {
"@type": "Organization",
"legalName": "HDR UK Science Team",
"name": "HDR UK Science Team"
},
"creator": {
"@type": "Organization",
"legalName": "HDR UK Science Team",
"name": "HDR UK Science Team"
},
"spatialCoverage": "MACHINE GENERATED",
"temporalCoverage": "2020-03-31/2022-04-30",
"isAccessibleForFree": true,
"keywords": "Preprints,Papers,HDR UK",
"license": "https://raw.githubusercontent.com/HDRUK/papers/master/LICENSE"
Expand Down
4 changes: 2 additions & 2 deletions hdr_schemata/models/GWDM/1.0/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -313,15 +313,15 @@
"type": "null"
}
],
"default": null,
"description": "Data values contained within the column",
"title": "Values"
}
},
"required": [
"name",
"dataType",
"sensitive",
"values"
"sensitive"
],
"title": "DataColumn",
"type": "object"
Expand Down
2 changes: 1 addition & 1 deletion hdr_schemata/models/GWDM/base/DataColumn.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ class Config:
)

values: Optional[List[DataValue]] = Field(
...,
None,
description='Data values contained within the column',
title='Values',
)
11 changes: 8 additions & 3 deletions hdr_schemata/models/SchemaOrg/BioSchema/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -51,10 +51,15 @@ class Dataset(BaseDataset):
all_keys = list(Dataset.model_fields.keys())
for field in all_keys:
if not field in __fields_to_keep:
if type(Dataset.__fields_set__) == set:
Dataset.__fields_set__.remove(field)
Dataset.model_fields_set.deleter(field)
Dataset.model_computed_fields.deleter(field)
del Dataset.model_fields[field]

for field in __fields_to_keep:
if not field in Dataset.model_fields.keys():
raise NotImplementedError(f'Field "{field}" has not been implemented!')


Dataset.__pydantic_complete__ = False
del Dataset.__pydantic_core_schema__
Dataset.model_rebuild(force=True)
109 changes: 0 additions & 109 deletions hdr_schemata/models/SchemaOrg/BioSchema/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -152,35 +152,10 @@
"title": "DataDownload",
"type": "object"
},
"GeoCoordinates": {
"properties": {
"latitude": {
"pattern": "^[-]?([0-8]?[0-9]|90)(\\.[0-9]+)?$",
"title": "Latitude",
"type": "string"
},
"longitude": {
"pattern": "^[-]?((1[0-7][0-9])|([0-9]?[0-9]))(\\.[0-9]+)?$",
"title": "Longitude",
"type": "string"
}
},
"required": [
"latitude",
"longitude"
],
"title": "GeoCoordinates",
"type": "object"
},
"Number": {
"title": "Number",
"type": "string"
},
"OpenEndedTimePeriod": {
"pattern": "^\\d{4}-\\d{2}-\\d{2}\\/\\.\\.$",
"title": "OpenEndedTimePeriod",
"type": "string"
},
"Organization": {
"properties": {
"@type": {
Expand Down Expand Up @@ -328,36 +303,9 @@
"title": "Person",
"type": "object"
},
"Place": {
"properties": {
"geo": {
"allOf": [
{
"$ref": "#/$defs/GeoCoordinates"
}
],
"description": "GeoCoordinates of the place"
}
},
"required": [
"geo"
],
"title": "Place",
"type": "object"
},
"SingleDate": {
"format": "date",
"title": "SingleDate",
"type": "string"
},
"Text": {
"title": "Text",
"type": "string"
},
"TimePeriod": {
"pattern": "^\\d{4}-\\d{2}-\\d{2}\\/\\d{4}-\\d{2}-\\d{2}$",
"title": "TimePeriod",
"type": "string"
}
},
"additionalProperties": false,
Expand Down Expand Up @@ -500,19 +448,6 @@
"description": "A citation or reference to another creative work, such as another publication, web page, scholarly article, etc.",
"title": "citation"
},
"funder": {
"anyOf": [
{
"$ref": "#/$defs/Organization"
},
{
"type": "null"
}
],
"default": null,
"description": "A person or organization that supports (sponsors) something through some kind of financial contribution.",
"title": "Funder"
},
"isPartOf": {
"anyOf": [
{
Expand Down Expand Up @@ -687,50 +622,6 @@
"description": "The URL of a reference web page that unambiguously indicates the dataset's identity.",
"title": "Same As (dataset)"
},
"spatialCoverage": {
"anyOf": [
{
"$ref": "#/$defs/Text"
},
{
"$ref": "#/$defs/Place"
},
{
"type": "null"
}
],
"default": null,
"description": "You can provide a single point that describes the spatial aspect of the dataset. Only include this property if the dataset has a spatial dimension. For example, a single point where all the measurements were collected, or the coordinates of a bounding box for an area.",
"examples": [
{
"@type": "Place",
"geo": {
"@type": "GeoShape",
"box": "39.3280 120.1633 40.445 123.7878"
}
}
],
"title": "Spatial Coverage"
},
"temporalCoverage": {
"anyOf": [
{
"$ref": "#/$defs/SingleDate"
},
{
"$ref": "#/$defs/TimePeriod"
},
{
"$ref": "#/$defs/OpenEndedTimePeriod"
},
{
"type": "null"
}
],
"default": null,
"description": "The data in the dataset covers a specific time interval. Only include this property if the dataset has a temporal dimension. Schema.org uses the ISO 8601 standard to describe time intervals and time points. You can describe dates differently depending upon the dataset interval. Indicate open-ended intervals with two decimal points (..).",
"title": "Temporal Coverage"
},
"variableMeasured": {
"anyOf": [
{
Expand Down
13 changes: 9 additions & 4 deletions hdr_schemata/models/SchemaOrg/GoogleRecommended/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -242,14 +242,19 @@ class Dataset(BaseDataset):
# - There is a problem with pydantic v2 where the 'exclude' feature doesnt currently work
# see: https://github.com/pydantic/pydantic/discussions/2686
# - This hack means that I can inherit from the Schema.Org model but then exclude fields
# that are not needed for the BioSchema
# that are not needed for the Google Recommended Schema
all_keys = list(Dataset.model_fields.keys())
for field in all_keys:
if not field in __fields_to_keep:
if type(Dataset.__fields_set__) == set:
Dataset.__fields_set__.remove(field)
Dataset.model_fields_set.deleter(field)
Dataset.model_computed_fields.deleter(field)
del Dataset.model_fields[field]

for field in __fields_to_keep:
if not field in Dataset.model_fields.keys():
raise NotImplementedError(f'Field "{field}" has not been implemented!')


Dataset.__pydantic_complete__ = False
del Dataset.__pydantic_core_schema__
Dataset.model_rebuild(force=True)
143 changes: 0 additions & 143 deletions hdr_schemata/models/SchemaOrg/GoogleRecommended/schema.json
Original file line number Diff line number Diff line change
Expand Up @@ -66,32 +66,6 @@
"title": "CreativeWork",
"type": "object"
},
"DataCatalog": {
"description": "\n This is pretty incomplete, but we might not need it... \n ",
"properties": {
"@type": {
"allOf": [
{
"$ref": "#/$defs/Text"
}
],
"default": "DataCatalog"
},
"name": {
"allOf": [
{
"$ref": "#/$defs/Text"
}
],
"description": "The name of the item."
}
},
"required": [
"name"
],
"title": "DataCatalog",
"type": "object"
},
"DataDownload": {
"properties": {
"@type": {
Expand Down Expand Up @@ -478,37 +452,6 @@
"description": "A downloadable form of this dataset, at a specific location, in a specific format. This property can be repeated if different variations are available. There is no expectation that different downloadable distributions must contain exactly equivalent information (see also DCAT on this point). Different distributions might include or exclude different subsets of the entire dataset, for example",
"title": "distribution"
},
"includedInDataCatalog": {
"anyOf": [
{
"$ref": "#/$defs/DataCatalog"
},
{
"type": "null"
}
],
"default": null,
"description": "A data catalog which contains this dataset. Supersedes includedDataCatalog, catalog. Inverse property: dataset",
"title": "Included In Data Catalog"
},
"isBasedOn": {
"anyOf": [
{
"$ref": "#/$defs/CreativeWork"
},
{
"format": "uri",
"minLength": 1,
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Use to link a Dataset to the Study that it was generated from. A resource that was used in the creation of this resource. This term can be repeated for multiple sources. For example, http://example.com/great-multiplication-intro.html. Supersedes isBasedOnUrl.",
"title": "Is Based On"
},
"name": {
"allOf": [
{
Expand Down Expand Up @@ -882,92 +825,6 @@
"default": null,
"description": "Location of a page describing the dataset.",
"title": "url"
},
"dateCreated": {
"anyOf": [
{
"format": "date",
"type": "string"
},
{
"format": "date-time",
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "The date on which the CreativeWork was created or the item was added to a DataFeed.",
"title": "Date Created"
},
"dateModified": {
"anyOf": [
{
"format": "date",
"type": "string"
},
{
"format": "date-time",
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "The date on which the CreativeWork was most recently modified or when the item's entry was modified within a DataFeed.",
"title": "Date (last) Modified"
},
"datePublished": {
"anyOf": [
{
"format": "date",
"type": "string"
},
{
"format": "date-time",
"type": "string"
},
{
"type": "null"
}
],
"default": null,
"description": "Date of first broadcast/publication.",
"title": "Date Published"
},
"maintainer": {
"anyOf": [
{
"$ref": "#/$defs/Person"
},
{
"$ref": "#/$defs/hdr_schemata__models__SchemaOrg__base__Organization__Organization"
},
{
"type": "null"
}
],
"default": null,
"description": "A maintainer of a Dataset, software package (SoftwareApplication), or other Project. A maintainer is a Person or Organization that manages contributions to, and/or publication of, some (typically complex) artifact. It is common for distributions of software and data to be based on 'upstream' sources. When maintainer is applied to a specific version of something e.g. a particular version or packaging of a Dataset, it is always possible that the upstream source has a different maintainer. The isBasedOn property can be used to indicate such relationships between datasets to make the different maintenance roles clear. Similarly in the case of software, a package may have dedicated maintainers working on integration into software distributions such as Ubuntu, as well as upstream maintainers of the underlying work.",
"title": "Maintainer"
},
"publisher": {
"anyOf": [
{
"$ref": "#/$defs/Person"
},
{
"$ref": "#/$defs/hdr_schemata__models__SchemaOrg__base__Organization__Organization"
},
{
"type": "null"
}
],
"default": null,
"description": "The publisher of the creative work.",
"title": "Publisher"
}
},
"required": [
Expand Down

0 comments on commit afdb39e

Please sign in to comment.