Skip to content

Commit

Permalink
[#56] push
Browse files Browse the repository at this point in the history
  • Loading branch information
amercader committed May 20, 2024
1 parent 000baa4 commit 770628e
Show file tree
Hide file tree
Showing 3 changed files with 123 additions and 17 deletions.
30 changes: 21 additions & 9 deletions ckanext/dcat/cli.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,12 @@
import ckan.plugins.toolkit as tk

import ckanext.dcat.utils as utils
from ckanext.dcat.processors import RDFParser, RDFSerializer, DEFAULT_RDF_PROFILES
from ckanext.dcat.processors import (
RDFParser,
RDFSerializer,
DEFAULT_RDF_PROFILES,
RDF_PROFILES_CONFIG_OPTION,
)


@click.group()
Expand Down Expand Up @@ -40,8 +45,8 @@ def generate_static(output):
@click.option(
"-p",
"--profiles",
default=" ".join(DEFAULT_RDF_PROFILES),
help="RDF profiles to use",
help=f"RDF profiles to use. If not provided will be read from config, "
"if not present there the default will be used: {DEFAULT_RDF_PROFILES}",
)
@click.option(
"-P", "--pretty", is_flag=True, help="Make the output more human readable"
Expand All @@ -65,6 +70,11 @@ def consume(input, output, format, profiles, pretty, compat_mode):

if profiles:
profiles = profiles.split()
elif tk.config.get(RDF_PROFILES_CONFIG_OPTION):
profiles = tk.aslist(tk.config[RDF_PROFILES_CONFIG_OPTION])
else:
profiles = None

parser = RDFParser(profiles=profiles, compatibility_mode=compat_mode)
parser.parse(contents, _format=format)

Expand Down Expand Up @@ -92,8 +102,8 @@ def consume(input, output, format, profiles, pretty, compat_mode):
@click.option(
"-p",
"--profiles",
default=" ".join(DEFAULT_RDF_PROFILES),
help="RDF profiles to use",
help=f"RDF profiles to use. If not provided will be read from config, "
"if not present there the default will be used: {DEFAULT_RDF_PROFILES}",
)
@click.option(
"-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)"
Expand All @@ -114,10 +124,12 @@ def produce(input, output, format, profiles, compat_mode):

if profiles:
profiles = profiles.split()
serializer = RDFSerializer(
profiles=profiles,
compatibility_mode=compat_mode
)
elif tk.config.get(RDF_PROFILES_CONFIG_OPTION):
profiles = tk.aslist(tk.config[RDF_PROFILES_CONFIG_OPTION])
else:
profiles = None

serializer = RDFSerializer(profiles=profiles, compatibility_mode=compat_mode)

dataset = json.loads(contents)
if isinstance(dataset, list):
Expand Down
109 changes: 101 additions & 8 deletions ckanext/dcat/profiles.py
Original file line number Diff line number Diff line change
Expand Up @@ -731,6 +731,18 @@ def _schema_field(self, key):
if field['field_name'] == key:
return field

def _schema_resource_field(self, key):
'''
Returns the schema field information if the provided key exists as a field in
the resources fields of the dataset schema (if one was provided)
'''
if not self._dataset_schema:
return None

for field in self._dataset_schema['resource_fields']:
if field['field_name'] == key:
return field

def _set_dataset_value(self, dataset_dict, key, value):
'''
Sets the value for a given key in a CKAN dataset dict
Expand Down Expand Up @@ -758,6 +770,15 @@ def _set_list_dataset_value(self, dataset_dict, key, value):
else:
return self._set_dataset_value(dataset_dict, key, json.dumps(value))

def _set_list_resource_value(self, resource_dict, key, value):
schema_field = self._schema_resource_field(key)
if schema_field and 'scheming_multiple_text' in schema_field['validators']:
resource_dict[key] = value
else:
resource_dict[key] = json.dumps(value)

return resource_dict

def _get_dataset_value(self, dataset_dict, key, default=None):
'''
Returns the value for the given key on a CKAN dict
Expand Down Expand Up @@ -1084,7 +1105,7 @@ def parse_dataset(self, dataset_dict, dataset_ref):
):
value = self._object_value(dataset_ref, predicate)
if value:
self._set_dataset_value(dataset_dict, key, value)
dataset_dict['extras'].append({'key': key, 'value': value})

# Lists
for key, predicate, in (
Expand All @@ -1101,7 +1122,8 @@ def parse_dataset(self, dataset_dict, dataset_ref):
):
values = self._object_value_list(dataset_ref, predicate)
if values:
self._set_list_dataset_value(dataset_dict, key, values)
dataset_dict['extras'].append({'key': key,
'value': json.dumps(values)})

# Contact details
contact = self._contact_details(dataset_ref, DCAT.contactPoint)
Expand All @@ -1110,11 +1132,17 @@ def parse_dataset(self, dataset_dict, dataset_ref):
contact = self._contact_details(dataset_ref, ADMS.contactPoint)

if contact:
for key in ('uri', 'name', 'email'):
if contact.get(key):
dataset_dict['extras'].append(
{'key': 'contact_{0}'.format(key),
'value': contact.get(key)})
# TODO: this will go into a separate profile
schema_field = self._schema_field("contact")
if schema_field and 'repeating_subfields' in schema_field:
# TODO: support multiple items
dataset_dict['contact'] = [contact]
else:
for key in ('uri', 'name', 'email'):
if contact.get(key):
dataset_dict['extras'].append(
{'key': 'contact_{0}'.format(key),
'value': contact.get(key)})

# Publisher
publisher = self._publisher(dataset_ref, DCT.publisher)
Expand Down Expand Up @@ -1676,7 +1704,12 @@ def parse_dataset(self, dataset_dict, dataset_ref):
access_service_list.append(access_service_dict)

if access_service_list:
resource_dict['access_services'] = json.dumps(access_service_list)
# TODO: move to a separate profile
schema_field = self._schema_resource_field('access_services')
if schema_field and 'repeating_subfields' in schema_field:
resource_dict['access_services'] = access_service_list
else:
resource_dict['access_services'] = json.dumps(access_service_list)

return dataset_dict

Expand Down Expand Up @@ -2097,3 +2130,63 @@ def _distribution_url_graph(self, distribution, resource_dict):
def _distribution_numbers_graph(self, distribution, resource_dict):
if resource_dict.get('size'):
self.g.add((distribution, SCHEMA.contentSize, Literal(resource_dict['size'])))


# TODO: split all these classes in different files
class EuropeanDCATAPSchemingProfile(RDFProfile):
'''
This is a compatibilty profile meant to add support for ckanext-scheming to the existing
`euro_dcat_ap` and `euro_dcat_ap_2` profiles.
It does not add or remove any properties from these profiles, it just transforms the
resulting dataset_dict so it is compatible with a ckanext-scheming schema
TODO: summarize changes and link to docs
'''

def parse_dataset(self, dataset_dict, dataset_ref):

if not self._dataset_schema:
# Not using scheming
return dataset_dict

# Move extras to root

extras_to_remove = []
extras = dataset_dict.get('extras', [])
for extra in extras:
if self._schema_field(extra['key']):
# This is a field defined in the dataset schema
dataset_dict[extra['key']] = extra['value']
extras_to_remove.append(extra['key'])

dataset_dict['extras'] = [e for e in extras if e['key'] not in extras_to_remove]


# Parse lists
def _parse_list_value(data_dict, field_name):
schema_field = self._schema_field(field_name) or self._schema_resource_field(field_name)

if schema_field and 'scheming_multiple_text' in schema_field.get('validators', []):
if isinstance(data_dict[field_name], str):
try:
data_dict[field_name] = json.loads(data_dict[field_name])
except ValueError:
pass

for field_name in dataset_dict.keys():
_parse_list_value(dataset_dict, field_name)

for resource_dict in dataset_dict.get('resources', []):
for field_name in resource_dict.keys():
_parse_list_value(resource_dict, field_name)


# Repeating subfields
fields = [
('contact', ('uri', 'name', 'email'),
('publisher', (
]


return dataset_dict
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,7 @@
[ckan.rdf.profiles]
euro_dcat_ap=ckanext.dcat.profiles:EuropeanDCATAPProfile
euro_dcat_ap_2=ckanext.dcat.profiles:EuropeanDCATAP2Profile
euro_dcat_ap_scheming=ckanext.dcat.profiles:EuropeanDCATAPSchemingProfile
schemaorg=ckanext.dcat.profiles:SchemaOrgProfile
[babel.extractors]
Expand Down

0 comments on commit 770628e

Please sign in to comment.