[#56] push

ckan · May 20, 2024 · 770628e · 770628e
1 parent 000baa4
commit 770628e
Show file tree

Hide file tree

Showing 3 changed files with 123 additions and 17 deletions.
diff --git a/ckanext/dcat/cli.py b/ckanext/dcat/cli.py
@@ -6,7 +6,12 @@
 import ckan.plugins.toolkit as tk
 
 import ckanext.dcat.utils as utils
-from ckanext.dcat.processors import RDFParser, RDFSerializer, DEFAULT_RDF_PROFILES
+from ckanext.dcat.processors import (
+    RDFParser,
+    RDFSerializer,
+    DEFAULT_RDF_PROFILES,
+    RDF_PROFILES_CONFIG_OPTION,
+)
 
 
 @click.group()
@@ -40,8 +45,8 @@ def generate_static(output):
 @click.option(
     "-p",
     "--profiles",
-    default=" ".join(DEFAULT_RDF_PROFILES),
-    help="RDF profiles to use",
+    help=f"RDF profiles to use. If not provided will be read from config, "
+    "if not present there the default will be used: {DEFAULT_RDF_PROFILES}",
 )
 @click.option(
     "-P", "--pretty", is_flag=True, help="Make the output more human readable"
@@ -65,6 +70,11 @@ def consume(input, output, format, profiles, pretty, compat_mode):
 
     if profiles:
         profiles = profiles.split()
+    elif tk.config.get(RDF_PROFILES_CONFIG_OPTION):
+        profiles = tk.aslist(tk.config[RDF_PROFILES_CONFIG_OPTION])
+    else:
+        profiles = None
+
     parser = RDFParser(profiles=profiles, compatibility_mode=compat_mode)
     parser.parse(contents, _format=format)
 
@@ -92,8 +102,8 @@ def consume(input, output, format, profiles, pretty, compat_mode):
 @click.option(
     "-p",
     "--profiles",
-    default=" ".join(DEFAULT_RDF_PROFILES),
-    help="RDF profiles to use",
+    help=f"RDF profiles to use. If not provided will be read from config, "
+    "if not present there the default will be used: {DEFAULT_RDF_PROFILES}",
 )
 @click.option(
     "-m", "--compat_mode", is_flag=True, help="Compatibility mode (deprecated)"
@@ -114,10 +124,12 @@ def produce(input, output, format, profiles, compat_mode):
 
     if profiles:
         profiles = profiles.split()
-    serializer = RDFSerializer(
-        profiles=profiles,
-        compatibility_mode=compat_mode
-    )
+    elif tk.config.get(RDF_PROFILES_CONFIG_OPTION):
+        profiles = tk.aslist(tk.config[RDF_PROFILES_CONFIG_OPTION])
+    else:
+        profiles = None
+
+    serializer = RDFSerializer(profiles=profiles, compatibility_mode=compat_mode)
 
     dataset = json.loads(contents)
     if isinstance(dataset, list):

diff --git a/ckanext/dcat/profiles.py b/ckanext/dcat/profiles.py
@@ -731,6 +731,18 @@ def _schema_field(self, key):
             if field['field_name'] == key:
                 return field
 
+    def _schema_resource_field(self, key):
+        '''
+        Returns the schema field information if the provided key exists as a field in
+        the resources fields of the dataset schema (if one was provided)
+        '''
+        if not self._dataset_schema:
+            return None
+
+        for field in self._dataset_schema['resource_fields']:
+            if field['field_name'] == key:
+                return field
+
     def _set_dataset_value(self, dataset_dict, key, value):
         '''
         Sets the value for a given key in a CKAN dataset dict
@@ -758,6 +770,15 @@ def _set_list_dataset_value(self, dataset_dict, key, value):
         else:
             return self._set_dataset_value(dataset_dict, key, json.dumps(value))
 
+    def _set_list_resource_value(self, resource_dict, key, value):
+        schema_field = self._schema_resource_field(key)
+        if schema_field and 'scheming_multiple_text' in schema_field['validators']:
+            resource_dict[key] = value
+        else:
+            resource_dict[key] = json.dumps(value)
+
+        return resource_dict
+
     def _get_dataset_value(self, dataset_dict, key, default=None):
         '''
         Returns the value for the given key on a CKAN dict
@@ -1084,7 +1105,7 @@ def parse_dataset(self, dataset_dict, dataset_ref):
                 ):
             value = self._object_value(dataset_ref, predicate)
             if value:
-                self._set_dataset_value(dataset_dict, key, value)
+                dataset_dict['extras'].append({'key': key, 'value': value})
 
         #  Lists
         for key, predicate, in (
@@ -1101,7 +1122,8 @@ def parse_dataset(self, dataset_dict, dataset_ref):
                 ):
             values = self._object_value_list(dataset_ref, predicate)
             if values:
-                self._set_list_dataset_value(dataset_dict, key, values)
+                dataset_dict['extras'].append({'key': key,
+                                               'value': json.dumps(values)})
 
         # Contact details
         contact = self._contact_details(dataset_ref, DCAT.contactPoint)
@@ -1110,11 +1132,17 @@ def parse_dataset(self, dataset_dict, dataset_ref):
             contact = self._contact_details(dataset_ref, ADMS.contactPoint)
 
         if contact:
-            for key in ('uri', 'name', 'email'):
-                if contact.get(key):
-                    dataset_dict['extras'].append(
-                        {'key': 'contact_{0}'.format(key),
-                         'value': contact.get(key)})
+            # TODO: this will go into a separate profile
+            schema_field = self._schema_field("contact")
+            if schema_field and 'repeating_subfields' in schema_field:
+                # TODO: support multiple items
+                dataset_dict['contact'] = [contact]
+            else:
+                for key in ('uri', 'name', 'email'):
+                    if contact.get(key):
+                        dataset_dict['extras'].append(
+                            {'key': 'contact_{0}'.format(key),
+                             'value': contact.get(key)})
 
         # Publisher
         publisher = self._publisher(dataset_ref, DCT.publisher)
@@ -1676,7 +1704,12 @@ def parse_dataset(self, dataset_dict, dataset_ref):
                             access_service_list.append(access_service_dict)
 
                         if access_service_list:
-                            resource_dict['access_services'] = json.dumps(access_service_list)
+                            # TODO: move to a separate profile
+                            schema_field = self._schema_resource_field('access_services')
+                            if schema_field and 'repeating_subfields' in schema_field:
+                                resource_dict['access_services'] = access_service_list
+                            else:
+                                resource_dict['access_services'] = json.dumps(access_service_list)
 
         return dataset_dict
 
@@ -2097,3 +2130,63 @@ def _distribution_url_graph(self, distribution, resource_dict):
     def _distribution_numbers_graph(self, distribution, resource_dict):
         if resource_dict.get('size'):
             self.g.add((distribution, SCHEMA.contentSize, Literal(resource_dict['size'])))
+
+
+# TODO: split all these classes in different files
+class EuropeanDCATAPSchemingProfile(RDFProfile):
+    '''
+    This is a compatibilty profile meant to add support for ckanext-scheming to the existing
+    `euro_dcat_ap` and `euro_dcat_ap_2` profiles.
+
+    It does not add or remove any properties from these profiles, it just transforms the
+    resulting dataset_dict so it is compatible with a ckanext-scheming schema
+
+    TODO: summarize changes and link to docs
+    '''
+
+    def parse_dataset(self, dataset_dict, dataset_ref):
+
+        if not self._dataset_schema:
+            # Not using scheming
+            return dataset_dict
+
+        # Move extras to root
+
+        extras_to_remove = []
+        extras = dataset_dict.get('extras', [])
+        for extra in extras:
+            if self._schema_field(extra['key']):
+                # This is a field defined in the dataset schema
+                dataset_dict[extra['key']] = extra['value']
+                extras_to_remove.append(extra['key'])
+
+        dataset_dict['extras'] = [e for e in extras if e['key'] not in extras_to_remove]
+
+
+        # Parse lists
+        def _parse_list_value(data_dict, field_name):
+            schema_field = self._schema_field(field_name) or self._schema_resource_field(field_name)
+
+            if schema_field and 'scheming_multiple_text' in schema_field.get('validators', []):
+                if isinstance(data_dict[field_name], str):
+                    try:
+                        data_dict[field_name] = json.loads(data_dict[field_name])
+                    except ValueError:
+                        pass
+
+        for field_name in dataset_dict.keys():
+            _parse_list_value(dataset_dict, field_name)
+
+        for resource_dict in dataset_dict.get('resources', []):
+            for field_name in resource_dict.keys():
+                _parse_list_value(resource_dict, field_name)
+
+
+        # Repeating subfields
+        fields = [
+            ('contact', ('uri', 'name', 'email'),
+            ('publisher', (
+        ]
+
+
+        return dataset_dict
diff --git a/setup.py b/setup.py
@@ -43,6 +43,7 @@
     [ckan.rdf.profiles]
     euro_dcat_ap=ckanext.dcat.profiles:EuropeanDCATAPProfile
     euro_dcat_ap_2=ckanext.dcat.profiles:EuropeanDCATAP2Profile
+    euro_dcat_ap_scheming=ckanext.dcat.profiles:EuropeanDCATAPSchemingProfile
     schemaorg=ckanext.dcat.profiles:SchemaOrgProfile
 
     [babel.extractors]