Skip to content

Commit

Permalink
Expose OGC services distributions as dataservice (#3203)
Browse files Browse the repository at this point in the history
Close datagouv/data.gouv.fr#1538

Inspired from the conversion in the SEMIC XSLT:
https://github.com/SEMICeu/iso-19139-to-dcat-ap/blob/f61b2921dd398b90b2dd2db14085e75687f7616b/iso-19139-to-dcat-ap.xsl#L1419

If the resource is an OGC Service, we add a `DCAT.accessService`
property on the distribution and describe the DataService accordingly.

We support resources that have a format of `(ogc:)wms` or `(ogc:)wfs`.
We don't base our decision on `request=GetCapabilities` as in the XSLT
for now.
  • Loading branch information
maudetes authored Nov 27, 2024
1 parent bfeae1b commit 7a6aaa8
Show file tree
Hide file tree
Showing 4 changed files with 75 additions and 1 deletion.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@

## Current (in progress)

- Expose OGC services distributions as dataservice [#3203](https://github.com/opendatateam/udata/pull/3203)
- Add a matomo "campaign" parameter on links in emails if `MAIL_CAMPAIGN` is configured [#3190](https://github.com/opendatateam/udata/pull/3190)
- Add DCAT-AP HVD properties in RDF output if the dataservice or its datasets are tagged hvd [#3187](https://github.com/opendatateam/udata/pull/3187)

Expand Down
2 changes: 2 additions & 0 deletions udata/core/dataset/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,8 @@
]
)

OGC_SERVICE_FORMATS = ["ogc:wms", "ogc:wfs", "wms", "wfs"]

CHECKSUM_TYPES = ("sha1", "sha2", "sha256", "md5", "crc")
DEFAULT_CHECKSUM_TYPE = "sha1"

Expand Down
45 changes: 44 additions & 1 deletion udata/core/dataset/rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@
from udata.uris import endpoint_for
from udata.utils import get_by, safe_unicode

from .constants import UPDATE_FREQUENCIES
from .constants import OGC_SERVICE_FORMATS, UPDATE_FREQUENCIES
from .models import Checksum, Dataset, License, Resource

log = logging.getLogger(__name__)
Expand Down Expand Up @@ -126,6 +126,44 @@ def owner_to_rdf(dataset, graph=None):
return


def ogc_service_to_rdf(dataset, resource, graph=None, is_hvd=False):
"""
Build a dataservice on the fly for OGC services distributions
Inspired from https://github.com/SEMICeu/iso-19139-to-dcat-ap/blob/f61b2921dd398b90b2dd2db14085e75687f7616b/iso-19139-to-dcat-ap.xsl#L1419
"""
graph = graph or Graph(namespace_manager=namespace_manager)
service = graph.resource(BNode())
service.set(RDF.type, DCAT.DataService)
service.set(DCT.title, Literal(resource.title))
service.set(DCAT.endpointURL, URIRef(resource.url.split("?")[0]))
if "request=getcapabilities" in resource.url.lower():
service.set(DCAT.endpointDescription, URIRef(resource.url))
service.set(
DCT.conformsTo,
URIRef("http://www.opengeospatial.org/standards/" + resource.format.split(":")[-1]),
)

if dataset and dataset.license:
service.add(DCT.rights, Literal(dataset.license.title))
if dataset.license.url:
service.add(DCT.license, URIRef(dataset.license.url))

if dataset and dataset.contact_point:
contact_point = contact_point_to_rdf(dataset.contact_point, graph)
if contact_point:
service.set(DCAT.contactPoint, contact_point)

if is_hvd:
# DCAT-AP HVD applicable legislation is also expected at the distribution > accessService level
service.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))
for tag in dataset.tags:
# Add HVD category if this dataset is tagged HVD
if tag in TAG_TO_EU_HVD_CATEGORIES:
service.add(DCATAP.hvdCategory, URIRef(TAG_TO_EU_HVD_CATEGORIES[tag]))

return service


def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
"""
Map a Resource domain model to a DCAT/RDF graph
Expand Down Expand Up @@ -175,6 +213,11 @@ def resource_to_rdf(resource, dataset=None, graph=None, is_hvd=False):
if is_hvd:
# DCAT-AP HVD applicable legislation is also expected at the distribution level
r.add(DCATAP.applicableLegislation, URIRef(HVD_LEGISLATION))

# Add access service for known OGC service formats
if resource.format in OGC_SERVICE_FORMATS:
r.add(DCAT.accessService, ogc_service_to_rdf(dataset, resource, graph, is_hvd))

return r


Expand Down
28 changes: 28 additions & 0 deletions udata/tests/dataset/test_dataset_rdf.py
Original file line number Diff line number Diff line change
Expand Up @@ -197,6 +197,34 @@ def test_all_resource_fields(self):
assert r.graph.value(checksum.identifier, SPDX.algorithm) == SPDX.checksumAlgorithm_sha1
assert checksum.value(SPDX.checksumValue) == Literal(resource.checksum.value)

def test_ogc_resource_access_service(self):
license = LicenseFactory()
resource = ResourceFactory(
format="ogc:wms",
url="https://services.data.shom.fr/INSPIRE/wms/r?service=WMS&request=GetCapabilities&version=1.3.0",
)
contact = ContactPointFactory()
dataset = DatasetFactory(resources=[resource], license=license, contact_point=contact)

r = resource_to_rdf(resource, dataset)

service = r.value(DCAT.accessService)
assert service.value(RDF.type).identifier == DCAT.DataService
assert service.value(DCT.title) == Literal(resource.title)
assert service.value(DCAT.endpointDescription).identifier == URIRef(
"https://services.data.shom.fr/INSPIRE/wms/r?service=WMS&request=GetCapabilities&version=1.3.0"
)
assert service.value(DCAT.endpointURL).identifier == URIRef(
"https://services.data.shom.fr/INSPIRE/wms/r"
)
assert service.value(DCT.conformsTo).identifier == URIRef(
"http://www.opengeospatial.org/standards/wms"
)
assert service.value(DCT.license).identifier == URIRef(license.url)

contact_rdf = service.value(DCAT.contactPoint)
assert contact_rdf.value(RDF.type).identifier == VCARD.Kind

def test_temporal_coverage(self):
start = faker.past_date(start_date="-30d")
end = faker.future_date(end_date="+30d")
Expand Down

0 comments on commit 7a6aaa8

Please sign in to comment.