From 386be97cdd24177c2178774aab74b5bbead9d16c Mon Sep 17 00:00:00 2001 From: Calum Macdonald Date: Thu, 26 Oct 2023 16:30:29 +0100 Subject: [PATCH] adding more for schema.org --- .../models/SchemaOrg/BioSchema/schema.json | 172 +++++++++++++++- .../SchemaOrg/GoogleRecommended/schema.json | 17 +- .../models/SchemaOrg/base/CreativeWork.py | 9 +- .../models/SchemaOrg/base/__init__.py | 3 +- .../models/SchemaOrg/default/schema.json | 189 +++++++++++++++++- .../SchemaOrg/helpers/get_from_schemaorg.py | 16 +- 6 files changed, 396 insertions(+), 10 deletions(-) diff --git a/hdr_schemata/models/SchemaOrg/BioSchema/schema.json b/hdr_schemata/models/SchemaOrg/BioSchema/schema.json index ea4b906..78ef231 100644 --- a/hdr_schemata/models/SchemaOrg/BioSchema/schema.json +++ b/hdr_schemata/models/SchemaOrg/BioSchema/schema.json @@ -128,9 +128,12 @@ "title": "Jurisdiction" }, "typicalAgeRange": { - "allOf": [ + "anyOf": [ { "$ref": "#/$defs/Text" + }, + { + "type": "null" } ], "default": null, @@ -148,6 +151,18 @@ "default": null, "description": "The typical expected age range, e.g. '7-9', '11-'.", "title": "temporalCoverage" + }, + "inLanguage": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The language of the content or performance or used in an action. Please use one of the language codes from the IETF BCP 47 standard. See also availableLanguage. Supersedes language." } }, "required": [ @@ -420,6 +435,152 @@ "title": "Person", "type": "object" }, + "PropertyValue": { + "properties": { + "@type": { + "allOf": [ + { + "$ref": "#/$defs/Text" + } + ], + "default": "PropertyValue" + }, + "maxValue": { + "anyOf": [ + { + "$ref": "#/$defs/Number" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The upper value of some characteristic or property." + }, + "measurementMethod": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A subproperty of measurementTechnique that can be used for specifying specific methods, in particular via MeasurementMethodEnum.", + "title": "Measurementmethod" + }, + "measurementTechnique": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A technique, method or technology used in an Observation, StatisticalVariable or Dataset (or DataDownload, DataCatalog), corresponding to the method used for measuring the corresponding variable(s) (for datasets, described using variableMeasured; for Observation, a StatisticalVariable). Often but not necessarily each variableMeasured will have an explicit representation as (or mapping to) an property such as those defined in Schema.org, or other RDF vocabularies and \"knowledge graphs\". In that case the subproperty of variableMeasured called measuredProperty is applicable.\n \n The measurementTechnique property helps when extra clarification is needed about how a measuredProperty was measured. This is oriented towards scientific and scholarly dataset publication but may have broader applicability; it is not intended as a full representation of measurement, but can often serve as a high level summary for dataset discovery. \n \n For example, if variableMeasured is: molecule concentration, measurementTechnique could be: \"mass spectrometry\" or \"nmr spectroscopy\" or \"colorimetry\" or \"immunofluorescence\". If the variableMeasured is \"depression rating\", the measurementTechnique could be \"Zung Scale\" or \"HAM-D\" or \"Beck Depression Inventory\". \n \n If there are several variableMeasured properties recorded for some given data object, use a PropertyValue for each variableMeasured and attach the corresponding measurementTechnique. The value can also be from an enumeration, organized as a MeasurementMetholdEnumeration.", + "title": "Measurementtechnique" + }, + "minValue": { + "anyOf": [ + { + "$ref": "#/$defs/Number" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The lower value of some characteristic or property." + }, + "propertyID": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A commonly used identifier for the characteristic represented by the property, e.g. a manufacturer or a standard code for a property. propertyID can be\n (1) a prefixed string, mainly meant to be used with standards for product properties; (2) a site-specific, non-prefixed string (e.g. the primary key of the property or the vendor-specific ID of the property), or (3)\n a URL indicating the type of the property, either pointing to an external vocabulary, or a Web resource that describes the property (e.g. a glossary entry).\n Standards bodies should promote a standard prefix for the identifiers of properties from their standards.", + "title": "Propertyid" + }, + "unitCode": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The unit of measurement given using the UN/CEFACT Common Code (3 characters) or a URL. Other codes than the UN/CEFACT Common Code may be used with a prefix followed by a colon.", + "title": "Unitcode" + }, + "unitText": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A string or text indicating the unit of measurement. Useful if you cannot provide a standard unit code for\n unitCode." + }, + "value": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The value of a QuantitativeValue (including Observation) or property value node.\n \n For QuantitativeValue and MonetaryAmount, the recommended type for values is 'Number'.\n For PropertyValue, it can be 'Text', 'Number', 'Boolean', or 'StructuredValue'.\n Use values from 0123456789 (Unicode 'DIGIT ZERO' (U+0030) to 'DIGIT NINE' (U+0039)) rather than superficially similar Unicode symbols.\n Use '.' (Unicode 'FULL STOP' (U+002E)) rather than ',' to indicate a decimal point. Avoid using these symbols as a readability separator." + }, + "valueReference": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A secondary value that provides additional information on the original value, e.g. a reference temperature or a type of measurement." + } + }, + "title": "PropertyValue", + "type": "object" + }, "Text": { "title": "Text", "type": "string" @@ -760,6 +921,15 @@ { "$ref": "#/$defs/Text" }, + { + "$ref": "#/$defs/PropertyValue" + }, + { + "items": { + "$ref": "#/$defs/PropertyValue" + }, + "type": "array" + }, { "type": "null" } diff --git a/hdr_schemata/models/SchemaOrg/GoogleRecommended/schema.json b/hdr_schemata/models/SchemaOrg/GoogleRecommended/schema.json index 756e612..6da22c3 100644 --- a/hdr_schemata/models/SchemaOrg/GoogleRecommended/schema.json +++ b/hdr_schemata/models/SchemaOrg/GoogleRecommended/schema.json @@ -128,9 +128,12 @@ "title": "Jurisdiction" }, "typicalAgeRange": { - "allOf": [ + "anyOf": [ { "$ref": "#/$defs/Text" + }, + { + "type": "null" } ], "default": null, @@ -148,6 +151,18 @@ "default": null, "description": "The typical expected age range, e.g. '7-9', '11-'.", "title": "temporalCoverage" + }, + "inLanguage": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The language of the content or performance or used in an action. Please use one of the language codes from the IETF BCP 47 standard. See also availableLanguage. Supersedes language." } }, "required": [ diff --git a/hdr_schemata/models/SchemaOrg/base/CreativeWork.py b/hdr_schemata/models/SchemaOrg/base/CreativeWork.py index 12dc227..1bfb34b 100644 --- a/hdr_schemata/models/SchemaOrg/base/CreativeWork.py +++ b/hdr_schemata/models/SchemaOrg/base/CreativeWork.py @@ -71,7 +71,7 @@ class CreativeWork(BaseModel): description="Indicates a legal jurisdiction, e.g. of some legislation, or where some government service is based." ) - typicalAgeRange: Text = Field( + typicalAgeRange: Optional[Text] = Field( None, description="The typical expected age range, e.g. '7-9', '11-'." ) @@ -81,4 +81,9 @@ class CreativeWork(BaseModel): title='temporalCoverage', description="The typical expected age range, e.g. '7-9', '11-'." ) - + + inLanguage: Optional[Text] = Field( + None, + description="The language of the content or performance or used in an action. Please use one of the language codes from the IETF BCP 47 standard. See also availableLanguage. Supersedes language." + ) + diff --git a/hdr_schemata/models/SchemaOrg/base/__init__.py b/hdr_schemata/models/SchemaOrg/base/__init__.py index 729dae7..6174dbc 100644 --- a/hdr_schemata/models/SchemaOrg/base/__init__.py +++ b/hdr_schemata/models/SchemaOrg/base/__init__.py @@ -9,6 +9,7 @@ from .Place import Place from .DataDownload import DataDownload from .DataCatalog import DataCatalog +from .PropertyValue import PropertyValue from hdr_schemata.definitions.SchemaOrg import Text, Text50, Number from hdr_schemata.definitions.SchemaOrg import SingleDate, TimePeriod, OpenEndedTimePeriod @@ -176,7 +177,7 @@ class Dataset(CreativeWork): description='The data in the dataset covers a specific time interval. Only include this property if the dataset has a temporal dimension. Schema.org uses the ISO 8601 standard to describe time intervals and time points. You can describe dates differently depending upon the dataset interval. Indicate open-ended intervals with two decimal points (..).' ) - variableMeasured: Optional[Text] = Field( + variableMeasured: Optional[Union[Text,PropertyValue,List[PropertyValue]]] = Field( None, title='Variable Measured', description='The variable that this dataset measures. For example, temperature or pressure.' diff --git a/hdr_schemata/models/SchemaOrg/default/schema.json b/hdr_schemata/models/SchemaOrg/default/schema.json index 11ee51f..ac8ca75 100644 --- a/hdr_schemata/models/SchemaOrg/default/schema.json +++ b/hdr_schemata/models/SchemaOrg/default/schema.json @@ -128,9 +128,12 @@ "title": "Jurisdiction" }, "typicalAgeRange": { - "allOf": [ + "anyOf": [ { "$ref": "#/$defs/Text" + }, + { + "type": "null" } ], "default": null, @@ -148,6 +151,18 @@ "default": null, "description": "The typical expected age range, e.g. '7-9', '11-'.", "title": "temporalCoverage" + }, + "inLanguage": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The language of the content or performance or used in an action. Please use one of the language codes from the IETF BCP 47 standard. See also availableLanguage. Supersedes language." } }, "required": [ @@ -538,6 +553,152 @@ "title": "Place", "type": "object" }, + "PropertyValue": { + "properties": { + "@type": { + "allOf": [ + { + "$ref": "#/$defs/Text" + } + ], + "default": "PropertyValue" + }, + "maxValue": { + "anyOf": [ + { + "$ref": "#/$defs/Number" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The upper value of some characteristic or property." + }, + "measurementMethod": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A subproperty of measurementTechnique that can be used for specifying specific methods, in particular via MeasurementMethodEnum.", + "title": "Measurementmethod" + }, + "measurementTechnique": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A technique, method or technology used in an Observation, StatisticalVariable or Dataset (or DataDownload, DataCatalog), corresponding to the method used for measuring the corresponding variable(s) (for datasets, described using variableMeasured; for Observation, a StatisticalVariable). Often but not necessarily each variableMeasured will have an explicit representation as (or mapping to) an property such as those defined in Schema.org, or other RDF vocabularies and \"knowledge graphs\". In that case the subproperty of variableMeasured called measuredProperty is applicable.\n \n The measurementTechnique property helps when extra clarification is needed about how a measuredProperty was measured. This is oriented towards scientific and scholarly dataset publication but may have broader applicability; it is not intended as a full representation of measurement, but can often serve as a high level summary for dataset discovery. \n \n For example, if variableMeasured is: molecule concentration, measurementTechnique could be: \"mass spectrometry\" or \"nmr spectroscopy\" or \"colorimetry\" or \"immunofluorescence\". If the variableMeasured is \"depression rating\", the measurementTechnique could be \"Zung Scale\" or \"HAM-D\" or \"Beck Depression Inventory\". \n \n If there are several variableMeasured properties recorded for some given data object, use a PropertyValue for each variableMeasured and attach the corresponding measurementTechnique. The value can also be from an enumeration, organized as a MeasurementMetholdEnumeration.", + "title": "Measurementtechnique" + }, + "minValue": { + "anyOf": [ + { + "$ref": "#/$defs/Number" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The lower value of some characteristic or property." + }, + "propertyID": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A commonly used identifier for the characteristic represented by the property, e.g. a manufacturer or a standard code for a property. propertyID can be\n (1) a prefixed string, mainly meant to be used with standards for product properties; (2) a site-specific, non-prefixed string (e.g. the primary key of the property or the vendor-specific ID of the property), or (3)\n a URL indicating the type of the property, either pointing to an external vocabulary, or a Web resource that describes the property (e.g. a glossary entry).\n Standards bodies should promote a standard prefix for the identifiers of properties from their standards.", + "title": "Propertyid" + }, + "unitCode": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "format": "uri", + "minLength": 1, + "type": "string" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The unit of measurement given using the UN/CEFACT Common Code (3 characters) or a URL. Other codes than the UN/CEFACT Common Code may be used with a prefix followed by a colon.", + "title": "Unitcode" + }, + "unitText": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A string or text indicating the unit of measurement. Useful if you cannot provide a standard unit code for\n unitCode." + }, + "value": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The value of a QuantitativeValue (including Observation) or property value node.\n \n For QuantitativeValue and MonetaryAmount, the recommended type for values is 'Number'.\n For PropertyValue, it can be 'Text', 'Number', 'Boolean', or 'StructuredValue'.\n Use values from 0123456789 (Unicode 'DIGIT ZERO' (U+0030) to 'DIGIT NINE' (U+0039)) rather than superficially similar Unicode symbols.\n Use '.' (Unicode 'FULL STOP' (U+002E)) rather than ',' to indicate a decimal point. Avoid using these symbols as a readability separator." + }, + "valueReference": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "A secondary value that provides additional information on the original value, e.g. a reference temperature or a type of measurement." + } + }, + "title": "PropertyValue", + "type": "object" + }, "SingleDate": { "format": "date", "title": "SingleDate", @@ -700,9 +861,12 @@ "title": "Jurisdiction" }, "typicalAgeRange": { - "allOf": [ + "anyOf": [ { "$ref": "#/$defs/Text" + }, + { + "type": "null" } ], "default": null, @@ -727,6 +891,18 @@ "description": "The data in the dataset covers a specific time interval. Only include this property if the dataset has a temporal dimension. Schema.org uses the ISO 8601 standard to describe time intervals and time points. You can describe dates differently depending upon the dataset interval. Indicate open-ended intervals with two decimal points (..).", "title": "Temporal Coverage" }, + "inLanguage": { + "anyOf": [ + { + "$ref": "#/$defs/Text" + }, + { + "type": "null" + } + ], + "default": null, + "description": "The language of the content or performance or used in an action. Please use one of the language codes from the IETF BCP 47 standard. See also availableLanguage. Supersedes language." + }, "@id": { "allOf": [ { @@ -1019,6 +1195,15 @@ { "$ref": "#/$defs/Text" }, + { + "$ref": "#/$defs/PropertyValue" + }, + { + "items": { + "$ref": "#/$defs/PropertyValue" + }, + "type": "array" + }, { "type": "null" } diff --git a/hdr_schemata/models/SchemaOrg/helpers/get_from_schemaorg.py b/hdr_schemata/models/SchemaOrg/helpers/get_from_schemaorg.py index 1cf3bdb..0cece94 100644 --- a/hdr_schemata/models/SchemaOrg/helpers/get_from_schemaorg.py +++ b/hdr_schemata/models/SchemaOrg/helpers/get_from_schemaorg.py @@ -10,6 +10,7 @@ def remove_leading(text): url = 'https://schema.org/Dataset' url = 'https://schema.org/CreativeWork' +url = 'https://schema.org/PropertyValue' response = requests.get(url) @@ -20,17 +21,26 @@ def remove_leading(text): template = r''' {name}: {_type} = Field( None, - description="{description}" + description=r{quotes}{description}{quotes} )''' model = [] -for table in tables: +for table in tables[:1]: trs = table.find_all('tr') for tr in trs[1:]: tds = tr.find_all(['th','td']) if (len(tds) != 3): continue name,_type,description = [x.text.strip() for x in tds] - filled_template = template.format(name=name, _type=_type, description=description) + + if 'or' in _type: + _types = ', '.join([x.strip() for x in _type.strip().split("or")]) + _type = f'Union[{_types}]' + + _type = _type.replace('URL','AnyURL') + + _type = f'Optional[{_type}]' + + filled_template = template.format(name=name, _type=_type, description=description,quotes="'''") model.append(filled_template) print ("\n".join(model))