From 5612a1838ce46a97bd0b92e6df1ec2fbb1751e2a Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Fri, 26 Jul 2013 16:05:31 +0200 Subject: [PATCH 01/27] Converting ckanext-swisstopo to a CKAN-style harvester --- ckanext/swisstopo/cleanup.py | 29 ---- ckanext/swisstopo/harvesters/__init__.py | 1 + .../harvesters/swisstopoharvester.py | 139 ++++++++++++++++++ ckanext/swisstopo/plugins.py | 8 + ckanext/swisstopo/show_dataset.py | 35 ----- ckanext/swisstopo/swissboundaries.py | 84 ----------- pip-requirements.txt | 2 - setup.py | 10 +- 8 files changed, 154 insertions(+), 154 deletions(-) delete mode 100644 ckanext/swisstopo/cleanup.py create mode 100644 ckanext/swisstopo/harvesters/__init__.py create mode 100644 ckanext/swisstopo/harvesters/swisstopoharvester.py create mode 100644 ckanext/swisstopo/plugins.py delete mode 100644 ckanext/swisstopo/show_dataset.py delete mode 100644 ckanext/swisstopo/swissboundaries.py diff --git a/ckanext/swisstopo/cleanup.py b/ckanext/swisstopo/cleanup.py deleted file mode 100644 index c321a66..0000000 --- a/ckanext/swisstopo/cleanup.py +++ /dev/null @@ -1,29 +0,0 @@ -import ckanclient -import sys -from optparse import OptionParser -from pprint import pprint - -API_KEY = p.toolkit.asbool(config.get('ckanext.swisstopo.api_key', '')) -BASE_LOCATION = p.toolkit.asbool(config.get('ckanext.swisstopo.base_location', '')) - -ckan = ckanclient.CkanClient(api_key=API_KEY, base_location=BASE_LOCATION) - -parser = OptionParser() -parser.add_option("-s", "--search", dest="search_term", help="search packages with TERM", metavar="TERM") -parser.add_option("-t", "--tag", dest="search_tag", help="Search packages with tag TAG", metavar="TAG") -(options, args) = parser.parse_args() - -results = [] -if options.search_term: - search_results = ckan.package_search(options.search_term); - results = search_results['results'] -elif options.search_tag: - search_results = ckan.package_search('tags:' + options.search_tag); - results = search_results['results'] -else: - parser.error("No valid argument supplied") - -for package_name in results: - print package_name - last_message = ckan.package_entity_delete(package_name) - print last_message diff --git a/ckanext/swisstopo/harvesters/__init__.py b/ckanext/swisstopo/harvesters/__init__.py new file mode 100644 index 0000000..f96002e --- /dev/null +++ b/ckanext/swisstopo/harvesters/__init__.py @@ -0,0 +1 @@ +from ckanext.swisstopo.harvesters.swisstopoharvester import SwisstopoHarvester diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py new file mode 100644 index 0000000..6c15156 --- /dev/null +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -0,0 +1,139 @@ +import ckanclient +import random +import os +import shutil +import tempfile +import zipfile +from pprint import pprint +from collections import defaultdict +import ckan_csw + +from ckan.lib.base import c +from ckan import model +from ckan.model import Session, Package +from ckan.logic import ValidationError, NotFound, get_action, action +from ckan.lib.helpers import json + +from ckanext.harvest.model import HarvestJob, HarvestObject, HarvestGatherError, \ + HarvestObjectError +from ckanext.harvest.harvesters import HarvesterBase + +import logging +log = logging.getLogger(__name__) + +class SwisstopoHarvester(HarvesterCase): + ''' + The harvester for swisstopo + ''' + + API_KEY = p.toolkit.asbool(config.get('ckanext.swisstopo.api_key', '')) + BASE_LOCATION = p.toolkit.asbool(config.get('ckanext.swisstopo.base_location', '')) + + ckan = ckanclient.CkanClient(api_key=API_KEY, base_location=BASE_LOCATION) + + DATASET_NAMES = ['swissboundaries3D'] + + def info(self): + return { + 'name': 'swisstopo', + 'title': 'Swisstopo', + 'description': 'Harvests the swisstopo data', + 'form_config_interface': 'Text' + } + + def gather_stage(self, harvest_job): + log.debug('In SwisstopoHarvester gather_stage') + + csw = ckan_csw.SwisstopoCkanMetadata(); + ids = [] + for dataset_name in self.DATASET_NAMES: + metadata = csw.get_ckan_metadata(dataset_name) + obj = HarvestObject( + guid = metadata['id'], + job = harvest_job, + content = json.dumps(metadata) + ) + obj.save() + log.debug('adding ' + dataset_name + ' to the queue') + ids.append(obj.id) + + return ids + + + def fetch_stage(self, harvest_object): + pass + + def import_stage(self, harvest_obeject): + pass + + + + def create_dataset(name, tags, description, metadata): + dataset_name = name + '_' + str(random.randint(1000000, 9999999999)) + dataset_entity = { + 'name': dataset_name, + 'title': name + ' - ' + metadata['title'], + 'tags': tags + ' ' + metadata['tags'], + 'notes': metadata['notes'], + 'url': metadata['url'], + 'author': metadata['author'], + 'author_email': metadata['author_email'], + 'maintainer': metadata['maintainer'], + 'maintainer_email': metadata['maintainer_email'], + 'license': metadata['license'], + } + return dataset_entity + + def extract_file(zipped_file, name, extract_path): + (dirname, filename) = os.path.split(name) + new_path = os.path.join(extract_path, dirname) + extracted_filename = os.path.join(extract_path, name) + if not os.path.exists(new_path): + os.makedirs(new_path) + fd = open(extracted_filename,"w") + fd.write(zipped_file.read(name)) + fd.close() + return extracted_filename + +# Copy the file +origin_file = '/home/www-data/swissBOUNDARIES3D080312.zip' +origin_path, file_name = os.path.split(origin_file) +temp_dir = tempfile.mkdtemp() +shutil.copy(origin_file, temp_dir); +temporary_file = os.path.join(temp_dir, file_name) + +csw = ckan_csw.SwisstopoCkanMetadata(); +metadata = csw.get_ckan_metadata('swissboundaries3D') + +aggregates = defaultdict(list) +# Unzip the file +zipped_file = zipfile.ZipFile(temporary_file) +for name in zipped_file.namelist(): + (dirname, filename) = os.path.split(name) + pure_name, file_extension = os.path.splitext(filename) + dataset_name = pure_name.lower().replace(".","-") + if file_extension not in ['.pdf']: + print "Extracting " + name + extracted_filename = extract_file(zipped_file, name, temp_dir) + resource = { + 'filename': extracted_filename, + 'title': 'swissboundaries3D - ' + filename, + 'description': 'swissboundaries ' + file_extension + ' file', + 'format': file_extension[1:] + } + aggregates[dataset_name].append(resource) + + +for key, aggregate in aggregates.iteritems(): + dataset = create_dataset(key, 'swissboundaries Verwaltungseinheiten', 'swissboundaries ' + key, metadata) + ckan.package_register_post(dataset) + + for resource in aggregate: + pprint(resource) + try: + dataset = ckan.add_package_resource(dataset['name'], resource['filename'], name=resource['title'], resource_type='data', format=resource['format'], description=resource['description']) + except ValueError as e: + print e + pprint(dataset) + +shutil.rmtree(temp_dir); diff --git a/ckanext/swisstopo/plugins.py b/ckanext/swisstopo/plugins.py new file mode 100644 index 0000000..2300fbd --- /dev/null +++ b/ckanext/swisstopo/plugins.py @@ -0,0 +1,8 @@ +import ckan +import ckan.plugins as p +from pylons import config + +class SwisstopoHarvest(p.SingletonPlugin): + """ + Plugin containg the harvester for swisstopo + """ diff --git a/ckanext/swisstopo/show_dataset.py b/ckanext/swisstopo/show_dataset.py deleted file mode 100644 index cd86f72..0000000 --- a/ckanext/swisstopo/show_dataset.py +++ /dev/null @@ -1,35 +0,0 @@ -import ckanclient -import sys -from pprint import pprint - -API_KEY = p.toolkit.asbool(config.get('ckanext.swisstopo.api_key', '')) -BASE_LOCATION = p.toolkit.asbool(config.get('ckanext.swisstopo.base_location', '')) - -ckan = ckanclient.CkanClient(api_key=API_KEY, base_location=BASE_LOCATION) - -package_list = ckan.package_register_get() - -if (len(package_list) <= 0): - print "- No datasets found -" - sys.exit() - -package_list.sort() -for index, package in enumerate(package_list): - print str(index+1) + ") " + package - -try: - selected_dataset = int(raw_input("Select dataset (1-" + str(len(package_list)) + "): ")) - if not selected_dataset in range(1, len(package_list) + 1): - raise ValueError("number is out of range (1-" + str(len(package_list)) + ")") - dataset_name = package_list[selected_dataset-1] -except ValueError as detail: - print "Invalid dataset number: ", detail - sys.exit() - -package_entity = ckan.package_entity_get(dataset_name) - - -max_key_len = len(max(package_entity.keys(), key=len)) -for key, value in package_entity.iteritems(): - print str(key).ljust(max_key_len) + "\t\t" + str(value) - diff --git a/ckanext/swisstopo/swissboundaries.py b/ckanext/swisstopo/swissboundaries.py deleted file mode 100644 index eb8ba62..0000000 --- a/ckanext/swisstopo/swissboundaries.py +++ /dev/null @@ -1,84 +0,0 @@ -import ckanclient -import random -import os -import shutil -import tempfile -import zipfile -from pprint import pprint -from collections import defaultdict -import ckan_csw - -API_KEY = p.toolkit.asbool(config.get('ckanext.swisstopo.api_key', '')) -BASE_LOCATION = p.toolkit.asbool(config.get('ckanext.swisstopo.base_location', '')) - -ckan = ckanclient.CkanClient(api_key=API_KEY, base_location=BASE_LOCATION) - -def create_dataset(name, tags, description, metadata): - dataset_name = name + '_' + str(random.randint(1000000, 9999999999)) - dataset_entity = { - 'name': dataset_name, - 'title': name + ' - ' + metadata['title'], - 'tags': tags + ' ' + metadata['tags'], - 'notes': metadata['notes'], - 'url': metadata['url'], - 'author': metadata['author'], - 'author_email': metadata['author_email'], - 'maintainer': metadata['maintainer'], - 'maintainer_email': metadata['maintainer_email'], - 'license': metadata['license'], - } - return dataset_entity - -def extract_file(zipped_file, name, extract_path): - (dirname, filename) = os.path.split(name) - new_path = os.path.join(extract_path, dirname) - extracted_filename = os.path.join(extract_path, name) - if not os.path.exists(new_path): - os.makedirs(new_path) - fd = open(extracted_filename,"w") - fd.write(zipped_file.read(name)) - fd.close() - return extracted_filename - -# Copy the file -origin_file = '/home/www-data/swissBOUNDARIES3D080312.zip' -origin_path, file_name = os.path.split(origin_file) -temp_dir = tempfile.mkdtemp() -shutil.copy(origin_file, temp_dir); -temporary_file = os.path.join(temp_dir, file_name) - -csw = ckan_csw.SwisstopoCkanMetadata(); -metadata = csw.get_ckan_metadata('swissboundaries3D') - -aggregates = defaultdict(list) -# Unzip the file -zipped_file = zipfile.ZipFile(temporary_file) -for name in zipped_file.namelist(): - (dirname, filename) = os.path.split(name) - pure_name, file_extension = os.path.splitext(filename) - dataset_name = pure_name.lower().replace(".","-") - if file_extension not in ['.pdf']: - print "Extracting " + name - extracted_filename = extract_file(zipped_file, name, temp_dir) - resource = { - 'filename': extracted_filename, - 'title': 'swissboundaries3D - ' + filename, - 'description': 'swissboundaries ' + file_extension + ' file', - 'format': file_extension[1:] - } - aggregates[dataset_name].append(resource) - - -for key, aggregate in aggregates.iteritems(): - dataset = create_dataset(key, 'swissboundaries Verwaltungseinheiten', 'swissboundaries ' + key, metadata) - ckan.package_register_post(dataset) - - for resource in aggregate: - pprint(resource) - try: - dataset = ckan.add_package_resource(dataset['name'], resource['filename'], name=resource['title'], resource_type='data', format=resource['format'], description=resource['description']) - except ValueError as e: - print e - pprint(dataset) - -shutil.rmtree(temp_dir); diff --git a/pip-requirements.txt b/pip-requirements.txt index 4784c03..fd1c9c6 100644 --- a/pip-requirements.txt +++ b/pip-requirements.txt @@ -1,7 +1,5 @@ # This file lists the dependencies of this extension. # Install with a command like: pip install -r pip-requirements.txt --e git+https://github.com/liip-forks/ckanclient.git#egg=ckanclient --e git+https://github.com/okfn/ckanext-importlib.git#egg=ckanext-importlib OWSLib==0.7.1 lxml==2.2.4 boto==2.8.0 diff --git a/setup.py b/setup.py index 3c6b033..ba67eb7 100644 --- a/setup.py +++ b/setup.py @@ -6,15 +6,15 @@ setup( name='ckanext-swisstopo', version=version, - description="swisstopo CKAN extension", + description="CKAN extension of the Federal Office of Topography swisstopo for the OGD portal of Switzerland", long_description="""\ """, classifiers=[], # Get strings from http://pypi.python.org/pypi?%3Aaction=list_classifiers keywords='', author='Liip AG', - author_email='contact@liip.ch', + author_email='ogd@liip.ch', url='http://www.liip.ch', - license='', + license='GPL', packages=find_packages(exclude=['ez_setup', 'examples', 'tests']), namespace_packages=['ckanext', 'ckanext.swisstopo'], include_package_data=True, @@ -26,7 +26,9 @@ """ [ckan.plugins] #swisstopo_plugin=ckanext.swisstopo:PluginClass + swisstopo=ckanext.swisstopo.plugins:SwisstopoHarvest + swisstopo_harvester=ckanext.swisstopo.harvester:SwisstopoHarvester [paste.paster_command] - swisstopo=ckanext.swisstopo.commands.swisstopo:SwisstopoCommand + swisstopo_harvest=ckanext.swisstopo.commands.harvester:Harvester """, ) From 2510a9e7f01f8bb929ffcdaa6bfdb93584b2529f Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Mon, 29 Jul 2013 17:20:55 +0200 Subject: [PATCH 02/27] Refactoring of swisstopo command (e.g. to load pylons config) --- ckanext/swisstopo/commands/swisstopo.py | 45 +++++++++++--- ckanext/swisstopo/s3/s3.py | 25 ++++++++ ckanext/swisstopo/s3/s3_ls.py | 10 --- ckanext/swisstopo/s3/s3_upload.py | 81 ------------------------- setup.py | 1 + 5 files changed, 63 insertions(+), 99 deletions(-) create mode 100644 ckanext/swisstopo/s3/s3.py delete mode 100644 ckanext/swisstopo/s3/s3_ls.py delete mode 100644 ckanext/swisstopo/s3/s3_upload.py diff --git a/ckanext/swisstopo/commands/swisstopo.py b/ckanext/swisstopo/commands/swisstopo.py index 4f07dcb..fa46ea1 100644 --- a/ckanext/swisstopo/commands/swisstopo.py +++ b/ckanext/swisstopo/commands/swisstopo.py @@ -1,38 +1,67 @@ import logging import ckan.lib.cli +import sys + +from ckanext.swisstopo.s3 import s3 +from ckanext.swisstopo.ckan_csw import ckan_csw + class SwisstopoCommand(ckan.lib.cli.CkanCommand): - '''Command to import swisstopo data + '''Command to handle swisstopo data + + Usage: + + # Show this help + paster --plugin=ckanext-swisstopo swisstopo help -c - Usage:: + # Import datasets + paster --plugin=ckanext-swisstopo swisstopo import -c - From the ckanext-swisstopo directory, run: + # List all files in the S3 bucket + paster --plugin=ckanext-swisstopo swisstopo list -c - paster swisstopo import -c + # Show output from CSW, 'query' is typically the name of a dataset like 'swissboundaries3D' + paster --plugin=ckanext-swisstopo swisstopo csw -c ''' summary = __doc__.split('\n')[0] usage = __doc__ def command(self): + # load pylons config + self._load_config() options = { 'import': self.importCmd, - 'show': self.showCmd, + 'list': self.listCmd, + 'csw': self.cswCmd, 'help': self.helpCmd, } try: cmd = self.args[0] - options[cmd]() + options[cmd](*self.args[1:]) except KeyError: - helpCmd() + self.helpCmd() + sys.exit(1) def helpCmd(self): print self.__doc__ + def listCmd(self): + s3_helper = s3.S3(); + s3_helper.list() + + def cswCmd(self, query=None): + if (query is None): + print "Argument 'query' must be set" + self.helpCmd() + sys.exit(1) + csw = ckan_csw.SwisstopoCkanMetadata(); + print csw.get_ckan_metadata(query) + + def importCmd(self): raise NotImplementedError def showCmd(self): raise NotImplementedError - diff --git a/ckanext/swisstopo/s3/s3.py b/ckanext/swisstopo/s3/s3.py new file mode 100644 index 0000000..a1b2cc5 --- /dev/null +++ b/ckanext/swisstopo/s3/s3.py @@ -0,0 +1,25 @@ +from boto.s3.connection import S3Connection +from ckan.plugins.core import SingletonPlugin, implements +from ckan.plugins.interfaces import IConfigurable +from pylons import config + +class S3(): + def __init__(self): + try: + self.key = config['ckanext.swisstopo.s3_key'] + self.token = config['ckanext.swisstopo.s3_token'] + self.bucket_name = config['ckanext.swisstopo.s3_bucket'] + except KeyError as e: + raise ConfigEntryNotFoundError("'%s' not found in config" % e.message) + + def __repr__(self): + return "" % (self.key, self.token, self.bucket_name) + + def list(self): + conn = S3Connection(self.key,self.token) + bucket = conn.get_bucket(self.bucket_name) + for key in bucket.list(): + print key.name.encode('utf-8') + +class ConfigEntryNotFoundError(Exception): + pass diff --git a/ckanext/swisstopo/s3/s3_ls.py b/ckanext/swisstopo/s3/s3_ls.py deleted file mode 100644 index 0d36d8e..0000000 --- a/ckanext/swisstopo/s3/s3_ls.py +++ /dev/null @@ -1,10 +0,0 @@ -from boto.s3.connection import S3Connection - -S3_KEY = p.toolkit.asbool(config.get('ckanext.swisstopo.s3_key', '')) -S3_TOKEN = p.toolkit.asbool(config.get('ckanext.swisstopo.s3_token', '')) -S3_BUCKET = p.toolkit.asbool(config.get('ckanext.swisstopo.s3_bucket', '')) - -conn = S3Connection(S3_KEY,S3_TOKEN) -bucket = conn.get_bucket(S3_BUCKET) -for key in bucket.list(): - print key.name.encode('utf-8') diff --git a/ckanext/swisstopo/s3/s3_upload.py b/ckanext/swisstopo/s3/s3_upload.py deleted file mode 100644 index d671693..0000000 --- a/ckanext/swisstopo/s3/s3_upload.py +++ /dev/null @@ -1,81 +0,0 @@ -import ckanclient -import urllib -import os -from httplib2 import Http -from datetime import datetime - -API_KEY = p.toolkit.asbool(config.get('ckanext.swisstopo.api_key', '')) -BASE_LOCATION = p.toolkit.asbool(config.get('ckanext.swisstopo.base_location', '')) - -ckan = ckanclient.CkanClient(api_key=API_KEY, base_location=BASE_LOCATION) - -now = datetime.now() - -# Register the dataset. -dataset_name = 's3_test_dataset_' + now.strftime("%Y-%m-%d_%H-%M-%S") -# csv_url = 'https://commondatastorage.googleapis.com/ckannet-storage/2011-11-24T112025/AfTerFibre_21nov2011.csv' -dataset_entity = { - 'name': dataset_name, - 'tags': 'test', - 'notes': 'Notes about the test', - } - -package_response = ckan.package_register_post(dataset_entity) -print "package_response start" -print package_response -print "package_response end" - -# Download the file -# file_name = csv_url.split('/')[-1] -file_name = 'sample.csv' -# urllib.urlretrieve (csv_url, file_name) - -# Upload the file -res1, res2 = ckan.upload_file(file_name) -print res1 -print res2 - -# ------------- - -# import ckanclient -# import urllib -# -# ckan = ckanclient.CkanClient(api_key=API_KEY, base_location=BASE_LOCATION) -# -# # dataset_name = 'phils_dataset_2013-02-19_23-44-42' -# dataset_name = 'new_liip_dataset' -# # file_name = 'sample.csv' -# # file_name = 'http://ckan.liip.ch.s3.amazonaws.com/2013-02-19T234442/sample.csv' -# # res1 = ckan.add_package_resource(dataset_name, file_name, resource_type='csv', description='Some CSV') -# -# res1 = ckan.add_package_resource(dataset_name, 'sample.csv', resource_type='data', description='this-is-a-description') -# print res1 -# res2 = ckan.add_package_resource(dataset_name, 'http://ckan.liip.ch.s3.amazonaws.com/2013-02-20T003611/sample.csv', name='Foo', resource_type='metadata', format='csv') -# print res2 - -# ------------- - -# url = 'http://ckan.liip.ch.s3.amazonaws.com/' - -# from poster.encode import multipart_encode -# from poster.streaminghttp import register_openers -# import urllib2 - -# register_openers() - -# datagen, headers = multipart_encode({ -# 'key': '2013-02-19T224240/sample.csv', -# 'acl': 'public-read', -# 'AWSAccessKeyId': 'AKIAIIHWI2WETQQMAW5Q', -# 'policy': 'eyJleHBpcmF0aW9uIjogIjIwMTMtMDItMjBUMTc6NDI6MzhaIiwKImNvbmRpdGlvbnMiOiBbeyJ4LWFtei1tZXRhLXVwbG9hZGVkLWJ5IjogIjc0NmIyYzU4LWU3NGYtNDZhNy1hMzhjLTA2Nzg3YjA2NDBhZSJ9LHsiYnVja2V0IjogImNrYW4ubGlpcC5jaCJ9LHsia2V5IjogIjIwMTMtMDItMTlUMjI0MjQwL3NhbXBsZS5jc3YifSx7ImFjbCI6ICJwdWJsaWMtcmVhZCJ9LHsic3VjY2Vzc19hY3Rpb25fcmVkaXJlY3QiOiAiaHR0cDovLzE4NS4xNC4xODYuMTE6NTAwMC9zdG9yYWdlL3VwbG9hZC9zdWNjZXNzX2VtcHR5P2xhYmVsPTIwMTMtMDItMTlUMjI0MjQwJTJGc2FtcGxlLmNzdiJ9LFsiY29udGVudC1sZW5ndGgtcmFuZ2UiLCAwLCA1MDAwMDAwMF0seyJ4LWFtei1zdG9yYWdlLWNsYXNzIjogIlNUQU5EQVJEIn1dfQ==', -# 'signature': 'RypkJqVhnpbfqDZlsWBU8vifSzE=', -# 'Content-Type': 'multipart/form-data', -# 'file': 'foobar,something,asdfjh' -# # "image1": open("sample.csv", "r"), -# }) -# request = urllib2.Request(url, datagen, headers) -# print request -# print urllib2.urlopen(request).read() - -# Cleanup -# os.remove(file_name) diff --git a/setup.py b/setup.py index ba67eb7..e834f77 100644 --- a/setup.py +++ b/setup.py @@ -29,6 +29,7 @@ swisstopo=ckanext.swisstopo.plugins:SwisstopoHarvest swisstopo_harvester=ckanext.swisstopo.harvester:SwisstopoHarvester [paste.paster_command] + swisstopo=ckanext.swisstopo.commands.swisstopo:SwisstopoCommand swisstopo_harvest=ckanext.swisstopo.commands.harvester:Harvester """, ) From d6e037ccebe8069af2ec3fb9403711d9a1c495fa Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Tue, 30 Jul 2013 09:35:38 +0200 Subject: [PATCH 03/27] Move all helpers in common module --- ckanext/swisstopo/ckan_csw/run_ckan_csw.py | 8 -------- ckanext/swisstopo/commands/swisstopo.py | 4 ++-- ckanext/swisstopo/{ckan_csw => helpers}/__init__.py | 0 ckanext/swisstopo/{ckan_csw => helpers}/ckan_csw.py | 0 ckanext/swisstopo/{s3 => helpers}/s3.py | 0 ckanext/swisstopo/s3/__init__.py | 0 6 files changed, 2 insertions(+), 10 deletions(-) delete mode 100644 ckanext/swisstopo/ckan_csw/run_ckan_csw.py rename ckanext/swisstopo/{ckan_csw => helpers}/__init__.py (100%) rename ckanext/swisstopo/{ckan_csw => helpers}/ckan_csw.py (100%) rename ckanext/swisstopo/{s3 => helpers}/s3.py (100%) delete mode 100644 ckanext/swisstopo/s3/__init__.py diff --git a/ckanext/swisstopo/ckan_csw/run_ckan_csw.py b/ckanext/swisstopo/ckan_csw/run_ckan_csw.py deleted file mode 100644 index 851ee60..0000000 --- a/ckanext/swisstopo/ckan_csw/run_ckan_csw.py +++ /dev/null @@ -1,8 +0,0 @@ -from ckanext.swisstopo.ckan_csw import ckan_csw -from pprint import pprint - -swisstopo = ckan_csw.SwisstopoCkanMetadata() - -pprint(swisstopo) - -pprint(swisstopo.get_ckan_metadata('swissboundaries3D')) diff --git a/ckanext/swisstopo/commands/swisstopo.py b/ckanext/swisstopo/commands/swisstopo.py index fa46ea1..12f5ea4 100644 --- a/ckanext/swisstopo/commands/swisstopo.py +++ b/ckanext/swisstopo/commands/swisstopo.py @@ -2,8 +2,8 @@ import ckan.lib.cli import sys -from ckanext.swisstopo.s3 import s3 -from ckanext.swisstopo.ckan_csw import ckan_csw +from ckanext.swisstopo.helpers import s3 +from ckanext.swisstopo.helpers import ckan_csw class SwisstopoCommand(ckan.lib.cli.CkanCommand): diff --git a/ckanext/swisstopo/ckan_csw/__init__.py b/ckanext/swisstopo/helpers/__init__.py similarity index 100% rename from ckanext/swisstopo/ckan_csw/__init__.py rename to ckanext/swisstopo/helpers/__init__.py diff --git a/ckanext/swisstopo/ckan_csw/ckan_csw.py b/ckanext/swisstopo/helpers/ckan_csw.py similarity index 100% rename from ckanext/swisstopo/ckan_csw/ckan_csw.py rename to ckanext/swisstopo/helpers/ckan_csw.py diff --git a/ckanext/swisstopo/s3/s3.py b/ckanext/swisstopo/helpers/s3.py similarity index 100% rename from ckanext/swisstopo/s3/s3.py rename to ckanext/swisstopo/helpers/s3.py diff --git a/ckanext/swisstopo/s3/__init__.py b/ckanext/swisstopo/s3/__init__.py deleted file mode 100644 index e69de29..0000000 From affdca19d49f0350d04e89d3cbd1a1734867c7ca Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Tue, 30 Jul 2013 09:39:39 +0200 Subject: [PATCH 04/27] Remove own handling of etree and use lxml instead --- ckanext/swisstopo/etree/__init__.py | 0 ckanext/swisstopo/etree/etree.py | 54 --------------------------- ckanext/swisstopo/helpers/ckan_csw.py | 2 +- 3 files changed, 1 insertion(+), 55 deletions(-) delete mode 100644 ckanext/swisstopo/etree/__init__.py delete mode 100644 ckanext/swisstopo/etree/etree.py diff --git a/ckanext/swisstopo/etree/__init__.py b/ckanext/swisstopo/etree/__init__.py deleted file mode 100644 index e69de29..0000000 diff --git a/ckanext/swisstopo/etree/etree.py b/ckanext/swisstopo/etree/etree.py deleted file mode 100644 index fb7ac43..0000000 --- a/ckanext/swisstopo/etree/etree.py +++ /dev/null @@ -1,54 +0,0 @@ -# ============================================================================= -# OWSLib. Copyright (C) 2005 Sean C. Gillies -# -# Contact email: sgillies@frii.com -# ============================================================================= - -def patch_well_known_namespaces(etree_module): - """Monkey patches the etree module to add some well-known namespaces.""" - etree_module._namespace_map.update({ - "http://www.w3.org/1999/02/22-rdf-syntax-ns#": "rdf", - "http://purl.org/rss/1.0/": "rss", - "http://purl.org/rss/1.0/modules/taxonomy/": "taxo", - "http://purl.org/dc/elements/1.1/": "dc", - "http://purl.org/rss/1.0/modules/syndication/": "syn", - "http://www.w3.org/2003/01/geo/wgs84_pos#": "geo", - "http://www.opengis.net/cat/csw/2.0.2": "csw", - "http://purl.org/dc/terms/": "dct", - "http://www.isotc211.org/2005/gco": "gco", - "http://www.isotc211.org/2005/gmd": "gmd", - "http://www.geocat.ch/2008/che": "che", - "http://www.isotc211.org/2005/gts": "gts", - "http://www.isotc211.org/2005/srv": "srv", - "http://www.fgdc.gov": "fgdc", - "http://gcmd.gsfc.nasa.gov/Aboutus/xml/dif/": "dif", - "http://www.opengis.net/gml": "gml", - "http://www.opengis.net/ogc": "ogc", - "http://www.opengis.net/ows": "ows", - "http://www.opengis.net/ows/1.1": "ows", - "http://www.opengis.net/ows/2.0": "ows", - "http://www.opengis.net/wms": "wms", - "http://www.opengis.net/context": "wmc", - "http://www.opengis.net/wfs": "wfs", - "http://www.opengis.net/sos/1.0": "sos", - "urn:oasis:names:tc:ebxml-regrep:xsd:rim:3.0": "rim", - "http://www.w3.org/2001/XMLSchema": "xs", - "http://www.w3.org/XML/Schema": "xs2", - "http://www.w3.org/2001/XMLSchema-instance": "xsi", - "http://www.w3.org/1999/xlink": "xlink"}) - -# try to find lxml or elementtree -try: - from lxml import etree -except ImportError: - try: - # Python 2.5 with ElementTree included - import xml.etree.ElementTree as etree - patch_well_known_namespaces(etree) - except ImportError: - try: - # Python < 2.5 with ElementTree installed - import elementtree.ElementTree as etree - patch_well_known_namespaces(etree) - except ImportError: - raise RuntimeError('You need either lxml or ElementTree to use OWSLib!') diff --git a/ckanext/swisstopo/helpers/ckan_csw.py b/ckanext/swisstopo/helpers/ckan_csw.py index a808926..e9fb323 100644 --- a/ckanext/swisstopo/helpers/ckan_csw.py +++ b/ckanext/swisstopo/helpers/ckan_csw.py @@ -1,6 +1,6 @@ import traceback from owslib.csw import CatalogueServiceWeb -from ckanext.swisstopo.etree.etree import etree +from lxml import etree namespaces = { 'atom': 'http://www.w3.org/2005/Atom', From c1da1b1af1186a3197a27d8e2ff3bd3e325f6ac1 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Tue, 30 Jul 2013 11:12:35 +0200 Subject: [PATCH 05/27] Add test requirements and fix relative path issue in tests --- ckanext/swisstopo/tests/ckan_csw_test.py | 14 +++++++------- pip-requirements.txt | 1 + 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/ckanext/swisstopo/tests/ckan_csw_test.py b/ckanext/swisstopo/tests/ckan_csw_test.py index dc2dc29..1738dce 100644 --- a/ckanext/swisstopo/tests/ckan_csw_test.py +++ b/ckanext/swisstopo/tests/ckan_csw_test.py @@ -1,7 +1,7 @@ from mock import Mock -import unittest -from ckanext.swisstopo.ckan_csw import ckan_csw -from ckanext.swisstopo.etree.etree import etree +import unittest, os, sys +from ckanext.swisstopo.helpers import ckan_csw +from lxml import etree class CkanMetadataTest(unittest.TestCase): def test_init(self): @@ -17,7 +17,7 @@ def test_namespaces_available(self): class SwisstopoCkanMetadataTest(unittest.TestCase): def setUp(self): - self.test_xml = etree.parse('swissboundaries_csw.test.xml') + self.test_xml = etree.parse(os.path.dirname(__file__) + '/swissboundaries_csw.test.xml') self.swisstopo = ckan_csw.SwisstopoCkanMetadata() self.swisstopo.get_xml = Mock(return_value=self.test_xml) @@ -46,7 +46,7 @@ def test_get_value(self): class XmlAttributeTest(unittest.TestCase): def setUp(self): - xml_input = etree.parse('test.xml') + xml_input = etree.parse(os.path.dirname(__file__) + '/test.xml') self.test_xml = xml_input.getroot() def remove_all_whitespace(self, str): @@ -54,7 +54,7 @@ def remove_all_whitespace(self, str): def test_xml_attribute_get_value_init(self): attr = ckan_csw.XmlAttribute('', xml=self.test_xml) - xml_string = open('test.xml', 'r').read() + xml_string = open(os.path.dirname(__file__) + '/test.xml', 'r').read() xml_string = self.remove_all_whitespace(xml_string) attr_value = self.remove_all_whitespace(attr.get_value()) @@ -63,7 +63,7 @@ def test_xml_attribute_get_value_init(self): def test_xml_attribute_get_value_call(self): attr = ckan_csw.XmlAttribute('') - xml_string = open('test.xml', 'r').read() + xml_string = open(os.path.dirname(__file__) + '/test.xml', 'r').read() xml_string = self.remove_all_whitespace(xml_string) attr_value = self.remove_all_whitespace(attr.get_value(xml=self.test_xml)) diff --git a/pip-requirements.txt b/pip-requirements.txt index fd1c9c6..e497032 100644 --- a/pip-requirements.txt +++ b/pip-requirements.txt @@ -3,3 +3,4 @@ OWSLib==0.7.1 lxml==2.2.4 boto==2.8.0 +mock==1.0.1 From 6efb57077dffe813881308308a1c3f60d95fc7c3 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Tue, 30 Jul 2013 11:15:21 +0200 Subject: [PATCH 06/27] Rearranged imports in test --- ckanext/swisstopo/tests/ckan_csw_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/swisstopo/tests/ckan_csw_test.py b/ckanext/swisstopo/tests/ckan_csw_test.py index 1738dce..239581f 100644 --- a/ckanext/swisstopo/tests/ckan_csw_test.py +++ b/ckanext/swisstopo/tests/ckan_csw_test.py @@ -1,5 +1,5 @@ -from mock import Mock import unittest, os, sys +from mock import Mock from ckanext.swisstopo.helpers import ckan_csw from lxml import etree From b51e10feceb1776a24e43187d1d6c4bda4ce03bd Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Sun, 4 Aug 2013 19:18:11 +0200 Subject: [PATCH 07/27] Fixed typo in setup.py --- setup.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/setup.py b/setup.py index e834f77..741c453 100644 --- a/setup.py +++ b/setup.py @@ -27,7 +27,7 @@ [ckan.plugins] #swisstopo_plugin=ckanext.swisstopo:PluginClass swisstopo=ckanext.swisstopo.plugins:SwisstopoHarvest - swisstopo_harvester=ckanext.swisstopo.harvester:SwisstopoHarvester + swisstopo_harvester=ckanext.swisstopo.harvesters:SwisstopoHarvester [paste.paster_command] swisstopo=ckanext.swisstopo.commands.swisstopo:SwisstopoCommand swisstopo_harvest=ckanext.swisstopo.commands.harvester:Harvester From f5b376830dcfe55de769cda20e77066f4649ef07 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Sun, 4 Aug 2013 19:18:35 +0200 Subject: [PATCH 08/27] Converted s3.list to a generator --- ckanext/swisstopo/commands/swisstopo.py | 3 ++- ckanext/swisstopo/helpers/s3.py | 6 +++--- 2 files changed, 5 insertions(+), 4 deletions(-) diff --git a/ckanext/swisstopo/commands/swisstopo.py b/ckanext/swisstopo/commands/swisstopo.py index 12f5ea4..37b6c10 100644 --- a/ckanext/swisstopo/commands/swisstopo.py +++ b/ckanext/swisstopo/commands/swisstopo.py @@ -49,7 +49,8 @@ def helpCmd(self): def listCmd(self): s3_helper = s3.S3(); - s3_helper.list() + for file in s3_helper.list(): + print file def cswCmd(self, query=None): if (query is None): diff --git a/ckanext/swisstopo/helpers/s3.py b/ckanext/swisstopo/helpers/s3.py index a1b2cc5..99ea40e 100644 --- a/ckanext/swisstopo/helpers/s3.py +++ b/ckanext/swisstopo/helpers/s3.py @@ -15,11 +15,11 @@ def __init__(self): def __repr__(self): return "" % (self.key, self.token, self.bucket_name) - def list(self): + def list(self, prefix=None): conn = S3Connection(self.key,self.token) bucket = conn.get_bucket(self.bucket_name) - for key in bucket.list(): - print key.name.encode('utf-8') + for key in bucket.list(prefix=prefix): + yield key.name.encode('utf-8') class ConfigEntryNotFoundError(Exception): pass From 6a0520a3f3415a5973f2f219465d08e03b26f25e Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Sun, 4 Aug 2013 19:19:13 +0200 Subject: [PATCH 09/27] Use xpath method instead of find/findall to enable 'namespaces' param --- ckanext/swisstopo/helpers/ckan_csw.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/ckanext/swisstopo/helpers/ckan_csw.py b/ckanext/swisstopo/helpers/ckan_csw.py index e9fb323..1c346eb 100644 --- a/ckanext/swisstopo/helpers/ckan_csw.py +++ b/ckanext/swisstopo/helpers/ckan_csw.py @@ -1,6 +1,8 @@ import traceback from owslib.csw import CatalogueServiceWeb from lxml import etree +import logging +log = logging.getLogger(__name__) namespaces = { 'atom': 'http://www.w3.org/2005/Atom', @@ -43,7 +45,7 @@ def get_value(self, **kwargs): class XPathAttribute(Attribute): def get_element(self, xml): - return xml.find(self._config, namespaces) + return xml.xpath(self._config, namespaces=namespaces)[0] def get_value(self, **kwargs): self.env.update(kwargs) @@ -51,13 +53,14 @@ def get_value(self, **kwargs): try: # this should probably return a XPathTextAttribute value = self.get_element(xml) - except: + except Exception as e: + log.debug(e) value = '' return value class XPathMultiAttribute(XPathAttribute): def get_element(self, xml): - return xml.findall(self._config, namespaces) + return xml.xpath(self._config, namespaces=namespaces) class XPathTextAttribute(XPathAttribute): def get_value(self, **kwargs): @@ -159,8 +162,11 @@ def get_xml(self, id): def get_ckan_metadata(self, dataset_name): """ Returns the requested dataset mapped to CKAN attributes """ id = self.get_id_by_dataset_name(dataset_name) + log.debug("Dataset ID: %s" % id) + dataset_xml = etree.fromstring(self.get_xml(id)) for key in self.metadata: + log.debug("Metadata key: %s" % key) attribute = self.get_attribute(dataset_name, key) self.metadata[key] = attribute.get_value(xml=dataset_xml) return self.metadata From 6a5774fd841b28436f222f862be6684f3f7b5bbc Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Sun, 4 Aug 2013 19:20:42 +0200 Subject: [PATCH 10/27] Complete rewrite of harvester based on SFA/FSO and ckanext-harvester --- .../harvesters/swisstopoharvester.py | 283 ++++++++++++------ 1 file changed, 195 insertions(+), 88 deletions(-) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 6c15156..1e83cfe 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -1,4 +1,5 @@ -import ckanclient +#n -*- coding: utf-8 -*- + import random import os import shutil @@ -6,7 +7,6 @@ import zipfile from pprint import pprint from collections import defaultdict -import ckan_csw from ckan.lib.base import c from ckan import model @@ -14,24 +14,55 @@ from ckan.logic import ValidationError, NotFound, get_action, action from ckan.lib.helpers import json -from ckanext.harvest.model import HarvestJob, HarvestObject, HarvestGatherError, \ - HarvestObjectError +from ckanext.harvest.model import HarvestJob, HarvestObject, HarvestGatherError, HarvestObjectError from ckanext.harvest.harvesters import HarvesterBase +from ckanext.swisstopo.helpers import ckan_csw +from ckanext.swisstopo.helpers import s3 + import logging log = logging.getLogger(__name__) -class SwisstopoHarvester(HarvesterCase): +class SwisstopoHarvester(HarvesterBase): ''' The harvester for swisstopo ''' - API_KEY = p.toolkit.asbool(config.get('ckanext.swisstopo.api_key', '')) - BASE_LOCATION = p.toolkit.asbool(config.get('ckanext.swisstopo.base_location', '')) - - ckan = ckanclient.CkanClient(api_key=API_KEY, base_location=BASE_LOCATION) - - DATASET_NAMES = ['swissboundaries3D'] + HARVEST_USER = u'harvest' + + DATASETS = { + 'ch.swisstopo.swissboundaries3d-gemeinde-flaeche.fill': { + 'csw_query': 'swissboundaries3D Gemeindegrenzen', + }, + 'ch.swisstopo.swissboundaries3d-bezirk-flaeche.fill': { + 'csw_query': 'swissboundaries3D Bezirksgrenzen', + }, + 'ch.swisstopo.swissboundaries3d-kanton-flaeche.fill': { + 'csw_query': 'swissboundaries3D Kantonsgrenzen', + }, + 'ch.swisstopo.swissboundaries3d-land-flaeche.fill': { + 'csw_query': 'swissboundaries3D Landesgrenzen', + }, + 'ch.swisstopo.pixelkarte-farbe-pk1000.noscale': { + 'csw_query': 'Landeskarte 1:1 Mio.', + }, + } + + FILES_BASE_URL = 'http://opendata-ch.s3.amazonaws.com' + + ORGANIZATION = { + 'de': u'Bundesamt für Landestopografie swisstopo', + 'fr': u'Office fédéral de topographie swisstopo', + 'it': u'Ufficio federale di topografia swisstopo', + 'en': u'Federal Office of Topography swisstopo', + } + GROUPS = { + 'de': [u'Raum und Umwelt'], + 'fr': [u'Espace et environnement'], + 'it': [u'Territorio e ambiente'], + 'en': [u'Territory and environment'] + } + def info(self): return { @@ -44,10 +75,18 @@ def info(self): def gather_stage(self, harvest_job): log.debug('In SwisstopoHarvester gather_stage') - csw = ckan_csw.SwisstopoCkanMetadata(); ids = [] - for dataset_name in self.DATASET_NAMES: - metadata = csw.get_ckan_metadata(dataset_name) + for dataset_name, dataset in self.DATASETS.iteritems(): + csw = ckan_csw.SwisstopoCkanMetadata(); + metadata = csw.get_ckan_metadata(dataset['csw_query']) + log.debug(metadata) + + metadata['translations'] = self._generate_term_translations() + log.debug("Translations: %s" % metadata['translations']) + + metadata['resources'] = self._generate_resources_dict_array(dataset_name) + log.debug(metadata['resources']) + obj = HarvestObject( guid = metadata['id'], job = harvest_job, @@ -61,79 +100,147 @@ def gather_stage(self, harvest_job): def fetch_stage(self, harvest_object): - pass - - def import_stage(self, harvest_obeject): - pass - - - - def create_dataset(name, tags, description, metadata): - dataset_name = name + '_' + str(random.randint(1000000, 9999999999)) - dataset_entity = { - 'name': dataset_name, - 'title': name + ' - ' + metadata['title'], - 'tags': tags + ' ' + metadata['tags'], - 'notes': metadata['notes'], - 'url': metadata['url'], - 'author': metadata['author'], - 'author_email': metadata['author_email'], - 'maintainer': metadata['maintainer'], - 'maintainer_email': metadata['maintainer_email'], - 'license': metadata['license'], - } - return dataset_entity - - def extract_file(zipped_file, name, extract_path): - (dirname, filename) = os.path.split(name) - new_path = os.path.join(extract_path, dirname) - extracted_filename = os.path.join(extract_path, name) - if not os.path.exists(new_path): - os.makedirs(new_path) - fd = open(extracted_filename,"w") - fd.write(zipped_file.read(name)) - fd.close() - return extracted_filename - -# Copy the file -origin_file = '/home/www-data/swissBOUNDARIES3D080312.zip' -origin_path, file_name = os.path.split(origin_file) -temp_dir = tempfile.mkdtemp() -shutil.copy(origin_file, temp_dir); -temporary_file = os.path.join(temp_dir, file_name) - -csw = ckan_csw.SwisstopoCkanMetadata(); -metadata = csw.get_ckan_metadata('swissboundaries3D') - -aggregates = defaultdict(list) -# Unzip the file -zipped_file = zipfile.ZipFile(temporary_file) -for name in zipped_file.namelist(): - (dirname, filename) = os.path.split(name) - pure_name, file_extension = os.path.splitext(filename) - dataset_name = pure_name.lower().replace(".","-") - if file_extension not in ['.pdf']: - print "Extracting " + name - extracted_filename = extract_file(zipped_file, name, temp_dir) - resource = { - 'filename': extracted_filename, - 'title': 'swissboundaries3D - ' + filename, - 'description': 'swissboundaries ' + file_extension + ' file', - 'format': file_extension[1:] - } - aggregates[dataset_name].append(resource) - - -for key, aggregate in aggregates.iteritems(): - dataset = create_dataset(key, 'swissboundaries Verwaltungseinheiten', 'swissboundaries ' + key, metadata) - ckan.package_register_post(dataset) - - for resource in aggregate: - pprint(resource) + log.debug('In SwisstopoHarvester fetch_stage') + + # Get the URL + log.debug(json.loads(harvest_object.content)) + name = json.loads(harvest_object.content)['name'] + log.debug(harvest_object.content) + + # Get contents + try: + harvest_object.save() + log.debug('successfully processed ' + name) + return True + except Exception, e: + log.exception(e) + + def import_stage(self, harvest_object): + log.debug('In SwisstopoHarvester import_stage') + + if not harvest_object: + log.error('No harvest object received') + return False + + try: + package_dict = json.loads(harvest_object.content) + + package_dict['id'] = harvest_object.guid + package_dict['name'] = self._gen_new_name(package_dict['title']) + + tags = package_dict['tags'] + package_dict['tags'] = [] + package_dict['tags'].extend([t for t in tags.split()]) + + user = model.User.get(self.HARVEST_USER) + context = { + 'model': model, + 'session': Session, + 'user': self.HARVEST_USER + } + + # Find or create group the dataset should get assigned to + package_dict['groups'] = self._find_or_create_groups(context) + + # Find or create the organization the dataset should get assigned to + package_dict['owner_org'] = self._find_or_create_organization(context) + + package = model.Package.get(package_dict['id']) + pkg_role = model.PackageRole(package=package, user=user, role=model.Role.ADMIN) + + log.debug('Save or update package %s (%s)' % (package_dict['name'],package_dict['id'])) + result = self._create_or_update_package(package_dict, harvest_object) + + log.debug('Save or update term translations') + self._submit_term_translations(context, package_dict) + Session.commit() + + except Exception, e: + log.exception(e) + return True + + def _find_or_create_groups(self, context): + group_name = self.GROUPS['de'][0] + data_dict = { + 'id': group_name, + 'name': self._gen_new_name(group_name), + 'title': group_name + } + try: + group = get_action('group_show')(context, data_dict) + except: + group = get_action('group_create')(context, data_dict) + log.info('created the group ' + group['id']) + group_ids = [] + group_ids.append(group['id']) + return group_ids + + def _find_or_create_organization(self, context): + try: + data_dict = { + 'permission': 'edit_group', + 'id': self._gen_new_name(self.ORGANIZATION['de']), + 'name': self._gen_new_name(self.ORGANIZATION['de']), + 'title': self.ORGANIZATION['de'] + } + organization = get_action('organization_show')(context, data_dict) + except: + organization = get_action('organization_create')(context, data_dict) + return organization['id'] + + def _generate_term_translations(self): + ''' + ''' + try: + translations = [] + + for k,v in self.ORGANIZATION.items(): + if k != u'de': + translations.append({ + 'lang_code': k, + 'term': self.ORGANIZATION[u'de'], + 'term_translation': v + }) + + for k,v in self.GROUPS.items(): + if k != u'de': + translations.append({ + 'lang_code': k, + 'term': self.GROUPS[u'de'], + 'term_translation': v + }) + + return translations + + + except Exception, e: + log.exception(e) + return [] + + def _submit_term_translations(self, context, package_dict): + for translation in package_dict['translations']: + action.update.term_translation_update(context, translation) + + def _generate_resources_dict_array(self, dataset_name): try: - dataset = ckan.add_package_resource(dataset['name'], resource['filename'], name=resource['title'], resource_type='data', format=resource['format'], description=resource['description']) - except ValueError as e: - print e - pprint(dataset) + resources = [] + prefix = dataset_name + u'/' + s3_helper = s3.S3() + for file in s3_helper.list(prefix=prefix): + resources.append({ + 'url': self.FILES_BASE_URL + '/' + file, + 'name': file.replace(prefix, u''), + 'format': self._guess_format(file) + }) + return resources + except Exception, e: + log.exception(e) + return [] + + def _guess_format(self, file_name): + ''' + Return the format for a given full filename + ''' + _, file_extension = os.path.splitext(file_name.lower()) + return file_extension[1:] -shutil.rmtree(temp_dir); From 4067f1e933bb77950ff54820454f5a745ee0db62 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Sun, 4 Aug 2013 19:21:16 +0200 Subject: [PATCH 11/27] Added ckanext-harvester harvester.py to control this harvester --- ckanext/swisstopo/commands/harvester.py | 342 ++++++++++++++++++++++++ 1 file changed, 342 insertions(+) create mode 100644 ckanext/swisstopo/commands/harvester.py diff --git a/ckanext/swisstopo/commands/harvester.py b/ckanext/swisstopo/commands/harvester.py new file mode 100644 index 0000000..df35965 --- /dev/null +++ b/ckanext/swisstopo/commands/harvester.py @@ -0,0 +1,342 @@ +import sys +import re +from pprint import pprint + +from ckan import model +from ckan.logic import get_action, ValidationError + +from ckan.lib.cli import CkanCommand + +class Harvester(CkanCommand): + '''Harvests remotely mastered metadata + + Usage: + + harvester initdb + - Creates the necessary tables in the database + + harvester source {url} {type} [{config}] [{active}] [{user-id}] [{publisher-id}] [{frequency}] + - create new harvest source + + harvester rmsource {id} + - remove (inactivate) a harvester source + + harvester sources [all] + - lists harvest sources + If 'all' is defined, it also shows the Inactive sources + + harvester job {source-id} + - create new harvest job + + harvester jobs + - lists harvest jobs + + harvester run + - runs harvest jobs + + harvester gather_consumer + - starts the consumer for the gathering queue + + harvester fetch_consumer + - starts the consumer for the fetching queue + + harvester purge_queues + - removes all jobs from fetch and gather queue + + harvester [-j] [--segments={segments}] import [{source-id}] + - perform the import stage with the last fetched objects, optionally belonging to a certain source. + Please note that no objects will be fetched from the remote server. It will only affect + the last fetched objects already present in the database. + + If the -j flag is provided, the objects are not joined to existing datasets. This may be useful + when importing objects for the first time. + + The --segments flag allows to define a string containing hex digits that represent which of + the 16 harvest object segments to import. e.g. 15af will run segments 1,5,a,f + + harvester job-all + - create new harvest jobs for all active sources. + + harvester reindex + - reindexes the harvest source datasets + + The commands should be run from the ckanext-harvest directory and expect + a development.ini file to be present. Most of the time you will + specify the config explicitly though:: + + paster harvester sources --config=../ckan/development.ini + + ''' + + summary = __doc__.split('\n')[0] + usage = __doc__ + max_args = 8 + min_args = 0 + + def __init__(self,name): + + super(Harvester,self).__init__(name) + + self.parser.add_option('-j', '--no-join-datasets', dest='no_join_datasets', + action='store_true', default=False, help='Do not join harvest objects to existing datasets') + + self.parser.add_option('--segments', dest='segments', + default=False, help= +'''A string containing hex digits that represent which of + the 16 harvest object segments to import. e.g. 15af will run segments 1,5,a,f''') + + def command(self): + self._load_config() + + # We'll need a sysadmin user to perform most of the actions + # We will use the sysadmin site user (named as the site_id) + context = {'model':model,'session':model.Session,'ignore_auth':True} + self.admin_user = get_action('get_site_user')(context,{}) + + + print '' + + if len(self.args) == 0: + self.parser.print_usage() + sys.exit(1) + cmd = self.args[0] + if cmd == 'source': + self.create_harvest_source() + elif cmd == "rmsource": + self.remove_harvest_source() + elif cmd == 'sources': + self.list_harvest_sources() + elif cmd == 'job': + self.create_harvest_job() + elif cmd == 'jobs': + self.list_harvest_jobs() + elif cmd == 'run': + self.run_harvester() + elif cmd == 'gather_consumer': + import logging + from ckanext.harvest.queue import get_gather_consumer, gather_callback + logging.getLogger('amqplib').setLevel(logging.INFO) + consumer = get_gather_consumer() + for method, header, body in consumer.consume(queue='ckan.harvest.gather'): + gather_callback(consumer, method, header, body) + elif cmd == 'fetch_consumer': + import logging + logging.getLogger('amqplib').setLevel(logging.INFO) + from ckanext.harvest.queue import get_fetch_consumer, fetch_callback + consumer = get_fetch_consumer() + for method, header, body in consumer.consume(queue='ckan.harvest.fetch'): + fetch_callback(consumer, method, header, body) + elif cmd == 'purge_queues': + from ckanext.harvest.queue import purge_queues + purge_queues() + elif cmd == 'initdb': + self.initdb() + elif cmd == 'import': + self.initdb() + self.import_stage() + elif cmd == 'job-all': + self.create_harvest_job_all() + elif cmd == 'harvesters-info': + harvesters_info = get_action('harvesters_info_show')() + pprint(harvesters_info) + elif cmd == 'reindex': + self.reindex() + else: + print 'Command %s not recognized' % cmd + + def _load_config(self): + super(Harvester, self)._load_config() + + def initdb(self): + from ckanext.harvest.model import setup as db_setup + db_setup() + + print 'DB tables created' + + def create_harvest_source(self): + + if len(self.args) >= 2: + url = unicode(self.args[1]) + else: + print 'Please provide a source URL' + sys.exit(1) + if len(self.args) >= 3: + type = unicode(self.args[2]) + else: + print 'Please provide a source type' + sys.exit(1) + if len(self.args) >= 4: + config = unicode(self.args[3]) + else: + config = None + if len(self.args) >= 5: + active = not(self.args[4].lower() == 'false' or \ + self.args[4] == '0') + else: + active = True + if len(self.args) >= 6: + user_id = unicode(self.args[5]) + else: + user_id = u'' + if len(self.args) >= 7: + publisher_id = unicode(self.args[6]) + else: + publisher_id = u'' + if len(self.args) >= 8: + frequency = unicode(self.args[7]) + if not frequency: + frequency = 'MANUAL' + else: + frequency = 'MANUAL' + try: + data_dict = { + 'url':url, + 'type':type, + 'config':config, + 'frequency':frequency, + 'active':active, + 'user_id':user_id, + 'publisher_id':publisher_id} + + context = {'model':model, 'session':model.Session, 'user': self.admin_user['name']} + source = get_action('harvest_source_create')(context,data_dict) + print 'Created new harvest source:' + self.print_harvest_source(source) + + sources = get_action('harvest_source_list')(context,{}) + self.print_there_are('harvest source', sources) + + # Create a harvest job for the new source if not regular job. + if not data_dict['frequency']: + get_action('harvest_job_create')(context,{'source_id':source['id']}) + print 'A new Harvest Job for this source has also been created' + + except ValidationError,e: + print 'An error occurred:' + print str(e.error_dict) + raise e + + def remove_harvest_source(self): + if len(self.args) >= 2: + source_id = unicode(self.args[1]) + else: + print 'Please provide a source id' + sys.exit(1) + context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} + get_action('harvest_source_delete')(context,{'id':source_id}) + print 'Removed harvest source: %s' % source_id + + def list_harvest_sources(self): + if len(self.args) >= 2 and self.args[1] == 'all': + data_dict = {} + what = 'harvest source' + else: + data_dict = {'only_active':True} + what = 'active harvest source' + + context = {'model': model,'session':model.Session, 'user': self.admin_user['name']} + sources = get_action('harvest_source_list')(context,data_dict) + self.print_harvest_sources(sources) + self.print_there_are(what=what, sequence=sources) + + def create_harvest_job(self): + if len(self.args) >= 2: + source_id = unicode(self.args[1]) + else: + print 'Please provide a source id' + sys.exit(1) + + context = {'model': model,'session':model.Session, 'user': self.admin_user['name']} + job = get_action('harvest_job_create')(context,{'source_id':source_id}) + + self.print_harvest_job(job) + jobs = get_action('harvest_job_list')(context,{'status':u'New'}) + self.print_there_are('harvest job', jobs, condition=u'New') + + def list_harvest_jobs(self): + context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} + jobs = get_action('harvest_job_list')(context,{}) + + self.print_harvest_jobs(jobs) + self.print_there_are(what='harvest job', sequence=jobs) + + def run_harvester(self): + context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} + jobs = get_action('harvest_jobs_run')(context,{}) + + #print 'Sent %s jobs to the gather queue' % len(jobs) + + def import_stage(self): + + if len(self.args) >= 2: + source_id = unicode(self.args[1]) + else: + source_id = None + + context = {'model': model, 'session':model.Session, 'user': self.admin_user['name'], + 'join_datasets': not self.options.no_join_datasets, + 'segments': self.options.segments} + + + objs = get_action('harvest_objects_import')(context,{'source_id':source_id}) + + print '%s objects reimported' % len(objs) + + def create_harvest_job_all(self): + context = {'model': model, 'user': self.admin_user['name'], 'session':model.Session} + jobs = get_action('harvest_job_create_all')(context,{}) + print 'Created %s new harvest jobs' % len(jobs) + + def reindex(self): + context = {'model': model, 'user': self.admin_user['name']} + get_action('harvest_sources_reindex')(context,{}) + + + def print_harvest_sources(self, sources): + if sources: + print '' + for source in sources: + self.print_harvest_source(source) + + def print_harvest_source(self, source): + print 'Source id: %s' % source['id'] + print ' url: %s' % source['url'] + print ' type: %s' % source['type'] + print ' active: %s' % source['active'] + print ' user: %s' % source['user_id'] + print 'publisher: %s' % source['publisher_id'] + print 'frequency: %s' % source['frequency'] + print ' jobs: %s' % source['status']['job_count'] + print '' + + def print_harvest_jobs(self, jobs): + if jobs: + print '' + for job in jobs: + self.print_harvest_job(job) + + def print_harvest_job(self, job): + print ' Job id: %s' % job['id'] + print ' status: %s' % job['status'] + print ' source: %s' % job['source_id'] + print ' objects: %s' % len(job.get('objects', [])) + + print 'gather_errors: %s' % len(job.get('gather_errors', [])) + for error in job.get('gather_errors', []): + print ' %s' % error['message'] + + print '' + + def print_there_are(self, what, sequence, condition=''): + is_singular = self.is_singular(sequence) + print 'There %s %s %s%s%s' % ( + is_singular and 'is' or 'are', + len(sequence), + condition and ('%s ' % condition.lower()) or '', + what, + not is_singular and 's' or '', + ) + + def is_singular(self, sequence): + return len(sequence) == 1 + From 112d091da096f08fa6228630e8302446f28e9a69 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Mon, 5 Aug 2013 01:16:04 +0200 Subject: [PATCH 12/27] Add multi-language support to CSW queries --- ckanext/swisstopo/commands/swisstopo.py | 4 +-- .../harvesters/swisstopoharvester.py | 34 ++++++++++++++++++- ckanext/swisstopo/helpers/ckan_csw.py | 19 +++++++---- 3 files changed, 47 insertions(+), 10 deletions(-) diff --git a/ckanext/swisstopo/commands/swisstopo.py b/ckanext/swisstopo/commands/swisstopo.py index 37b6c10..80212b2 100644 --- a/ckanext/swisstopo/commands/swisstopo.py +++ b/ckanext/swisstopo/commands/swisstopo.py @@ -52,13 +52,13 @@ def listCmd(self): for file in s3_helper.list(): print file - def cswCmd(self, query=None): + def cswCmd(self, query=None, lang='de'): if (query is None): print "Argument 'query' must be set" self.helpCmd() sys.exit(1) csw = ckan_csw.SwisstopoCkanMetadata(); - print csw.get_ckan_metadata(query) + print csw.get_ckan_metadata(query, lang) def importCmd(self): diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 1e83cfe..5c0d173 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -78,12 +78,23 @@ def gather_stage(self, harvest_job): ids = [] for dataset_name, dataset in self.DATASETS.iteritems(): csw = ckan_csw.SwisstopoCkanMetadata(); - metadata = csw.get_ckan_metadata(dataset['csw_query']) + metadata = csw.get_ckan_metadata(dataset['csw_query'], 'de').copy() + metadata_fr = csw.get_ckan_metadata(dataset['csw_query'], 'fr').copy() + metadata_it = csw.get_ckan_metadata(dataset['csw_query'], 'it').copy() + metadata_en = csw.get_ckan_metadata(dataset['csw_query'], 'en').copy() log.debug(metadata) metadata['translations'] = self._generate_term_translations() log.debug("Translations: %s" % metadata['translations']) + metadata_trans = { + 'de': metadata, + 'fr': metadata_fr, + 'it': metadata_it, + 'en': metadata_en, + } + metadata['translations'].extend(self._generate_metadata_translations(metadata_trans)) + metadata['resources'] = self._generate_resources_dict_array(dataset_name) log.debug(metadata['resources']) @@ -190,6 +201,7 @@ def _find_or_create_organization(self, context): def _generate_term_translations(self): ''' + Generate term translatations for groups, organizations and metadata ''' try: translations = [] @@ -217,8 +229,28 @@ def _generate_term_translations(self): log.exception(e) return [] + def _generate_metadata_translations(self, metadata_translations): + try: + translations = [] + + for lang, metadata in metadata_translations.items(): + if lang != u'de': + for key, term in metadata_translations[lang].items(): + if term and term != metadata_translations['de'][key]: + translations.append({ + 'lang_code': lang, + 'term': metadata_translations['de'][key], + 'term_translation': term + }) + return translations + + except Exception, e: + log.exception(e) + return [] + def _submit_term_translations(self, context, package_dict): for translation in package_dict['translations']: + log.debug(translation) action.update.term_translation_update(context, translation) def _generate_resources_dict_array(self, dataset_name): diff --git a/ckanext/swisstopo/helpers/ckan_csw.py b/ckanext/swisstopo/helpers/ckan_csw.py index 1c346eb..2d8bcf2 100644 --- a/ckanext/swisstopo/helpers/ckan_csw.py +++ b/ckanext/swisstopo/helpers/ckan_csw.py @@ -44,23 +44,28 @@ def get_value(self, **kwargs): return etree.tostring(xml) class XPathAttribute(Attribute): - def get_element(self, xml): - return xml.xpath(self._config, namespaces=namespaces)[0] + def get_element(self, xml, xpath): + return xml.xpath(xpath, namespaces=namespaces)[0] def get_value(self, **kwargs): self.env.update(kwargs) xml = self.env['xml'] + + lang = self.env['lang'] + xpath = self._config.replace('#DE', '#' + lang.upper()) + log.debug("Lang: %s, XPath: %s" % (lang, xpath)) + try: # this should probably return a XPathTextAttribute - value = self.get_element(xml) + value = self.get_element(xml, xpath) except Exception as e: log.debug(e) value = '' return value class XPathMultiAttribute(XPathAttribute): - def get_element(self, xml): - return xml.xpath(self._config, namespaces=namespaces) + def get_element(self, xml, xpath): + return xml.xpath(xpath, namespaces=namespaces) class XPathTextAttribute(XPathAttribute): def get_value(self, **kwargs): @@ -159,7 +164,7 @@ def get_xml(self, id): raise DatasetNotFoundError("Dataset with id %s not found" % id) return dataset_xml_string - def get_ckan_metadata(self, dataset_name): + def get_ckan_metadata(self, dataset_name, language='de'): """ Returns the requested dataset mapped to CKAN attributes """ id = self.get_id_by_dataset_name(dataset_name) log.debug("Dataset ID: %s" % id) @@ -168,7 +173,7 @@ def get_ckan_metadata(self, dataset_name): for key in self.metadata: log.debug("Metadata key: %s" % key) attribute = self.get_attribute(dataset_name, key) - self.metadata[key] = attribute.get_value(xml=dataset_xml) + self.metadata[key] = attribute.get_value(xml=dataset_xml, lang=language) return self.metadata From a7063d42d7b14906d87b3237f4b9e4deb83cad4d Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Mon, 5 Aug 2013 09:06:15 +0200 Subject: [PATCH 13/27] Fixed translations of groups --- .../harvesters/swisstopoharvester.py | 65 ++++++++++--------- 1 file changed, 33 insertions(+), 32 deletions(-) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 5c0d173..a719589 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -51,16 +51,16 @@ class SwisstopoHarvester(HarvesterBase): FILES_BASE_URL = 'http://opendata-ch.s3.amazonaws.com' ORGANIZATION = { - 'de': u'Bundesamt für Landestopografie swisstopo', - 'fr': u'Office fédéral de topographie swisstopo', - 'it': u'Ufficio federale di topografia swisstopo', - 'en': u'Federal Office of Topography swisstopo', + u'de': u'Bundesamt für Landestopografie swisstopo', + u'fr': u'Office fédéral de topographie swisstopo', + u'it': u'Ufficio federale di topografia swisstopo', + u'en': u'Federal Office of Topography swisstopo', } GROUPS = { - 'de': [u'Raum und Umwelt'], - 'fr': [u'Espace et environnement'], - 'it': [u'Territorio e ambiente'], - 'en': [u'Territory and environment'] + u'de': [u'Raum und Umwelt'], + u'fr': [u'Espace et environnement'], + u'it': [u'Territorio e ambiente'], + u'en': [u'Territory and environment'] } @@ -78,20 +78,20 @@ def gather_stage(self, harvest_job): ids = [] for dataset_name, dataset in self.DATASETS.iteritems(): csw = ckan_csw.SwisstopoCkanMetadata(); - metadata = csw.get_ckan_metadata(dataset['csw_query'], 'de').copy() - metadata_fr = csw.get_ckan_metadata(dataset['csw_query'], 'fr').copy() - metadata_it = csw.get_ckan_metadata(dataset['csw_query'], 'it').copy() - metadata_en = csw.get_ckan_metadata(dataset['csw_query'], 'en').copy() + metadata = csw.get_ckan_metadata(dataset['csw_query'], u'de').copy() + metadata_fr = csw.get_ckan_metadata(dataset['csw_query'], u'fr').copy() + metadata_it = csw.get_ckan_metadata(dataset['csw_query'], u'it').copy() + metadata_en = csw.get_ckan_metadata(dataset['csw_query'], u'en').copy() log.debug(metadata) metadata['translations'] = self._generate_term_translations() log.debug("Translations: %s" % metadata['translations']) metadata_trans = { - 'de': metadata, - 'fr': metadata_fr, - 'it': metadata_it, - 'en': metadata_en, + u'de': metadata, + u'fr': metadata_fr, + u'it': metadata_it, + u'en': metadata_en, } metadata['translations'].extend(self._generate_metadata_translations(metadata_trans)) @@ -190,9 +190,9 @@ def _find_or_create_organization(self, context): try: data_dict = { 'permission': 'edit_group', - 'id': self._gen_new_name(self.ORGANIZATION['de']), - 'name': self._gen_new_name(self.ORGANIZATION['de']), - 'title': self.ORGANIZATION['de'] + 'id': self._gen_new_name(self.ORGANIZATION[u'de']), + 'name': self._gen_new_name(self.ORGANIZATION[u'de']), + 'title': self.ORGANIZATION[u'de'] } organization = get_action('organization_show')(context, data_dict) except: @@ -206,21 +206,22 @@ def _generate_term_translations(self): try: translations = [] - for k,v in self.ORGANIZATION.items(): - if k != u'de': + for lang, org in self.ORGANIZATION.items(): + if lang != u'de': translations.append({ - 'lang_code': k, + 'lang_code': lang, 'term': self.ORGANIZATION[u'de'], - 'term_translation': v + 'term_translation': org }) - for k,v in self.GROUPS.items(): - if k != u'de': - translations.append({ - 'lang_code': k, - 'term': self.GROUPS[u'de'], - 'term_translation': v - }) + for lang, groups in self.GROUPS.iteritems(): + if lang != u'de': + for idx, group in enumerate(self.GROUPS[lang]): + translations.append({ + 'lang_code': lang, + 'term': self.GROUPS[u'de'][idx], + 'term_translation': group + }) return translations @@ -236,10 +237,10 @@ def _generate_metadata_translations(self, metadata_translations): for lang, metadata in metadata_translations.items(): if lang != u'de': for key, term in metadata_translations[lang].items(): - if term and term != metadata_translations['de'][key]: + if term and term != metadata_translations[u'de'][key]: translations.append({ 'lang_code': lang, - 'term': metadata_translations['de'][key], + 'term': metadata_translations[u'de'][key], 'term_translation': term }) return translations From aa4931d4876b8d7d86cbbe4fc992efa982246af5 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Mon, 5 Aug 2013 14:24:01 +0200 Subject: [PATCH 14/27] Added translations for tags and the license --- .../harvesters/swisstopoharvester.py | 38 ++++++++++++++----- ckanext/swisstopo/helpers/ckan_csw.py | 24 +++++++++--- 2 files changed, 48 insertions(+), 14 deletions(-) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index a719589..848ae61 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -50,6 +50,12 @@ class SwisstopoHarvester(HarvesterBase): FILES_BASE_URL = 'http://opendata-ch.s3.amazonaws.com' + LICENSE = { + u'de': u'Lizenz für Fertigprodukte', + u'fr': u'Accord relatif aux produits finis', + u'it': u'Licenza per prodotti finiti', + u'en': u'Licence for finished products', + } ORGANIZATION = { u'de': u'Bundesamt für Landestopografie swisstopo', u'fr': u'Office fédéral de topographie swisstopo', @@ -98,6 +104,8 @@ def gather_stage(self, harvest_job): metadata['resources'] = self._generate_resources_dict_array(dataset_name) log.debug(metadata['resources']) + metadata['license_id'] = self.LICENSE['de'] + obj = HarvestObject( guid = metadata['id'], job = harvest_job, @@ -139,10 +147,6 @@ def import_stage(self, harvest_object): package_dict['id'] = harvest_object.guid package_dict['name'] = self._gen_new_name(package_dict['title']) - tags = package_dict['tags'] - package_dict['tags'] = [] - package_dict['tags'].extend([t for t in tags.split()]) - user = model.User.get(self.HARVEST_USER) context = { 'model': model, @@ -206,6 +210,14 @@ def _generate_term_translations(self): try: translations = [] + for lang, lic in self.LICENSE.items(): + if lang != u'de': + translations.append({ + 'lang_code': lang, + 'term': self.LICENSE[u'de'], + 'term_translation': lic + }) + for lang, org in self.ORGANIZATION.items(): if lang != u'de': translations.append({ @@ -238,11 +250,19 @@ def _generate_metadata_translations(self, metadata_translations): if lang != u'de': for key, term in metadata_translations[lang].items(): if term and term != metadata_translations[u'de'][key]: - translations.append({ - 'lang_code': lang, - 'term': metadata_translations[u'de'][key], - 'term_translation': term - }) + if key == 'tags' and len(term) == len(metadata_translations[u'de'][key]): + for idx, subterm in enumerate(term): + translations.append({ + 'lang_code': lang, + 'term': self._gen_new_name(metadata_translations[u'de'][key][idx]), + 'term_translation': self._gen_new_name(subterm) + }) + else: + translations.append({ + 'lang_code': lang, + 'term': metadata_translations[u'de'][key], + 'term_translation': term + }) return translations except Exception, e: diff --git a/ckanext/swisstopo/helpers/ckan_csw.py b/ckanext/swisstopo/helpers/ckan_csw.py index 2d8bcf2..eff4ab8 100644 --- a/ckanext/swisstopo/helpers/ckan_csw.py +++ b/ckanext/swisstopo/helpers/ckan_csw.py @@ -59,7 +59,7 @@ def get_value(self, **kwargs): # this should probably return a XPathTextAttribute value = self.get_element(xml, xpath) except Exception as e: - log.debug(e) + log.exception(e) value = '' return value @@ -102,7 +102,21 @@ def get_value(self, **kwargs): except TypeError: value = value + new_value + separator return value.strip(separator) - + +class ArrayAttribute(Attribute): + def get_value(self, **kwargs): + self.env.update(kwargs) + value = [] + for attribute in self._config: + new_value = attribute.get_value(**kwargs) + try: + iterator = iter(new_value) + for inner_attribute in iterator: + # it should be possible to call inner_attribute.get_value and the right thing(tm) happens' + value.append(inner_attribute.text if hasattr(inner_attribute, 'text') else inner_attribute) + except TypeError: + value.append(new_value) + return value class FirstInOrderAttribute(CombinedAttribute): def get_value(self, **kwargs): @@ -126,7 +140,7 @@ def __init__(self, url, schema, version='2.0.2', lang='en-US'): 'author_email', 'maintainer', 'maintainer_email', - 'license', + 'license_url', 'version', 'notes', 'tags', @@ -210,10 +224,10 @@ class SwisstopoCkanMetadata(CkanMetadata): XPathTextAttribute(".//gmd:identificationInfo//gmd:pointOfContact[1]//gmd:CI_RoleCode[@codeListValue='owner']/ancestor::gmd:pointOfContact//gmd:address//gmd:electronicMailAddress/gco:CharacterString"), XPathTextAttribute(".//gmd:identificationInfo//gmd:pointOfContact//gmd:address//gmd:electronicMailAddress/gco:CharacterString"), ]), - 'license': StringAttribute('http://www.toposhop.admin.ch/de/shop/terms/use/finished_products'), + 'license_url': StringAttribute('http://www.toposhop.admin.ch/de/shop/terms/use/finished_products'), 'version': XPathTextAttribute(".//gmd:identificationInfo//gmd:citation//gmd:date/gco:Date"), 'notes': XPathTextAttribute(".//gmd:identificationInfo//gmd:abstract//gmd:textGroup/gmd:LocalisedCharacterString[@locale='#DE']"), - 'tags': MultiAttribute([XPathMultiTextAttribute(".//gmd:identificationInfo//gmd:descriptiveKeywords//gmd:keyword//gmd:textGroup/gmd:LocalisedCharacterString[@locale='#DE']")], separator=' '), + 'tags': ArrayAttribute([XPathMultiTextAttribute(".//gmd:identificationInfo//gmd:descriptiveKeywords//gmd:keyword//gmd:textGroup/gmd:LocalisedCharacterString[@locale='#DE']")]), 'metadata_url': StringAttribute(''), 'metadata_raw': XmlAttribute(''), } From c57b55b0e2227cde83dc7195a8931b0d995ea89d Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Mon, 5 Aug 2013 19:47:21 +0200 Subject: [PATCH 15/27] Use new OGDCHHarvesterBase class to keep package name untouched --- ckanext/swisstopo/harvesters/__init__.py | 1 + ckanext/swisstopo/harvesters/base.py | 124 ++++++++++++++++++ .../harvesters/swisstopoharvester.py | 8 +- 3 files changed, 129 insertions(+), 4 deletions(-) create mode 100644 ckanext/swisstopo/harvesters/base.py diff --git a/ckanext/swisstopo/harvesters/__init__.py b/ckanext/swisstopo/harvesters/__init__.py index f96002e..c777ba3 100644 --- a/ckanext/swisstopo/harvesters/__init__.py +++ b/ckanext/swisstopo/harvesters/__init__.py @@ -1 +1,2 @@ from ckanext.swisstopo.harvesters.swisstopoharvester import SwisstopoHarvester +from ckanext.swisstopo.harvesters.base import OGDCHHarvesterBase diff --git a/ckanext/swisstopo/harvesters/base.py b/ckanext/swisstopo/harvesters/base.py new file mode 100644 index 0000000..9177caa --- /dev/null +++ b/ckanext/swisstopo/harvesters/base.py @@ -0,0 +1,124 @@ +from ckanext.harvest.harvesters import HarvesterBase + +from ckan import plugins as p +from ckan import model +from ckan.model import Session +from ckan.logic import ValidationError, NotFound, get_action +from ckan.logic.schema import default_create_package_schema +from ckan.lib.navl.validators import ignore_missing,ignore +from ckan.lib.munge import munge_tag + +import logging +log = logging.getLogger(__name__) + +class OGDCHHarvesterBase(HarvesterBase): + def _create_or_update_package(self, package_dict, harvest_object): + ''' + Creates a new package or updates an exisiting one according to the + package dictionary provided. The package dictionary should look like + the REST API response for a package: + + http://ckan.net/api/rest/package/statistics-catalunya + + Note that the package_dict must contain an id, which will be used to + check if the package needs to be created or updated (use the remote + dataset id). + + If the remote server provides the modification date of the remote + package, add it to package_dict['metadata_modified']. + + + TODO: Not sure it is worth keeping this function. If useful it should + use the output of package_show logic function (maybe keeping support + for rest api based dicts + ''' + try: + # Change default schema + schema = default_create_package_schema() + schema['id'] = [ignore_missing, unicode] + schema['__junk'] = [ignore] + + # Check API version + if self.config: + try: + api_version = int(self.config.get('api_version', 2)) + except ValueError: + raise ValueError('api_version must be an integer') + + #TODO: use site user when available + user_name = self.config.get('user', u'harvest') + else: + api_version = 2 + user_name = u'harvest' + + context = { + 'model': model, + 'session': Session, + 'user': user_name, + 'api_version': api_version, + 'schema': schema, + } + + tags = package_dict.get('tags', []) + tags = [munge_tag(t) for t in tags] + tags = list(set(tags)) + package_dict['tags'] = tags + + # Check if package exists + data_dict = {} + data_dict['id'] = package_dict['id'] + try: + existing_package_dict = get_action('package_show')(context, data_dict) + # Check modified date + if not 'metadata_modified' in package_dict or \ + package_dict['metadata_modified'] > existing_package_dict.get('metadata_modified'): + log.info('Package with GUID %s exists and needs to be updated' % harvest_object.guid) + # Update package + context.update({'id':package_dict['id']}) + new_package = get_action('package_update_rest')(context, package_dict) + + else: + log.info('Package with GUID %s not updated, skipping...' % harvest_object.guid) + return + + # Flag the other objects linking to this package as not current anymore + from ckanext.harvest.model import harvest_object_table + conn = Session.connection() + u = update(harvest_object_table) \ + .where(harvest_object_table.c.package_id==bindparam('b_package_id')) \ + .values(current=False) + conn.execute(u, b_package_id=new_package['id']) + + # Flag this as the current harvest object + + harvest_object.package_id = new_package['id'] + harvest_object.current = True + harvest_object.save() + + except NotFound: + # Package needs to be created + log.info('Package with GUID %s does not exist, let\'s create it' % harvest_object.guid) + harvest_object.current = True + harvest_object.package_id = package_dict['id'] + # Defer constraints and flush so the dataset can be indexed with + # the harvest object id (on the after_show hook from the harvester + # plugin) + harvest_object.add() + + model.Session.execute('SET CONSTRAINTS harvest_object_package_id_fkey DEFERRED') + model.Session.flush() + + new_package = get_action('package_create_rest')(context, package_dict) + + Session.commit() + + return True + + except ValidationError,e: + log.exception(e) + self._save_object_error('Invalid package with GUID %s: %r'%(harvest_object.guid,e.error_dict),harvest_object,'Import') + except Exception, e: + log.exception(e) + self._save_object_error('%r'%e,harvest_object,'Import') + + return None diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 848ae61..22c8cff 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -15,7 +15,7 @@ from ckan.lib.helpers import json from ckanext.harvest.model import HarvestJob, HarvestObject, HarvestGatherError, HarvestObjectError -from ckanext.harvest.harvesters import HarvesterBase +from base import OGDCHHarvesterBase from ckanext.swisstopo.helpers import ckan_csw from ckanext.swisstopo.helpers import s3 @@ -23,7 +23,7 @@ import logging log = logging.getLogger(__name__) -class SwisstopoHarvester(HarvesterBase): +class SwisstopoHarvester(OGDCHHarvesterBase): ''' The harvester for swisstopo ''' @@ -105,6 +105,7 @@ def gather_stage(self, harvest_job): log.debug(metadata['resources']) metadata['license_id'] = self.LICENSE['de'] + metadata['layer_name'] = dataset_name obj = HarvestObject( guid = metadata['id'], @@ -145,8 +146,7 @@ def import_stage(self, harvest_object): package_dict = json.loads(harvest_object.content) package_dict['id'] = harvest_object.guid - package_dict['name'] = self._gen_new_name(package_dict['title']) - + package_dict['name'] = self._gen_new_name(package_dict['layer_name']) user = model.User.get(self.HARVEST_USER) context = { 'model': model, From 5bbbd4537c6083da5664ea15635fadae4abf47e5 Mon Sep 17 00:00:00 2001 From: Stefan Reinhard Date: Mon, 26 Aug 2013 11:23:08 +0200 Subject: [PATCH 16/27] stop hardcoding s3 urls --- .../harvesters/swisstopoharvester.py | 33 ++++++++++--------- ckanext/swisstopo/helpers/s3.py | 5 ++- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 22c8cff..2e29c53 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -48,8 +48,6 @@ class SwisstopoHarvester(OGDCHHarvesterBase): }, } - FILES_BASE_URL = 'http://opendata-ch.s3.amazonaws.com' - LICENSE = { u'de': u'Lizenz für Fertigprodukte', u'fr': u'Accord relatif aux produits finis', @@ -68,7 +66,7 @@ class SwisstopoHarvester(OGDCHHarvesterBase): u'it': [u'Territorio e ambiente'], u'en': [u'Territory and environment'] } - + def info(self): return { @@ -89,7 +87,7 @@ def gather_stage(self, harvest_job): metadata_it = csw.get_ckan_metadata(dataset['csw_query'], u'it').copy() metadata_en = csw.get_ckan_metadata(dataset['csw_query'], u'en').copy() log.debug(metadata) - + metadata['translations'] = self._generate_term_translations() log.debug("Translations: %s" % metadata['translations']) @@ -115,7 +113,7 @@ def gather_stage(self, harvest_job): obj.save() log.debug('adding ' + dataset_name + ' to the queue') ids.append(obj.id) - + return ids @@ -126,7 +124,7 @@ def fetch_stage(self, harvest_object): log.debug(json.loads(harvest_object.content)) name = json.loads(harvest_object.content)['name'] log.debug(harvest_object.content) - + # Get contents try: harvest_object.save() @@ -134,14 +132,14 @@ def fetch_stage(self, harvest_object): return True except Exception, e: log.exception(e) - + def import_stage(self, harvest_object): log.debug('In SwisstopoHarvester import_stage') if not harvest_object: log.error('No harvest object received') return False - + try: package_dict = json.loads(harvest_object.content) @@ -153,7 +151,7 @@ def import_stage(self, harvest_object): 'session': Session, 'user': self.HARVEST_USER } - + # Find or create group the dataset should get assigned to package_dict['groups'] = self._find_or_create_groups(context) @@ -273,18 +271,21 @@ def _submit_term_translations(self, context, package_dict): for translation in package_dict['translations']: log.debug(translation) action.update.term_translation_update(context, translation) - + def _generate_resources_dict_array(self, dataset_name): try: resources = [] prefix = dataset_name + u'/' s3_helper = s3.S3() - for file in s3_helper.list(prefix=prefix): - resources.append({ - 'url': self.FILES_BASE_URL + '/' + file, - 'name': file.replace(prefix, u''), - 'format': self._guess_format(file) - }) + + for key in s3_helper.list(prefix=prefix): + if key.size > 0: + resources.append({ + 'url': key.generate_url(0, query_auth=False, + force_http=True), + 'name': os.path.basename(key.name), + 'format': self._guess_format(key.name) + }) return resources except Exception, e: log.exception(e) diff --git a/ckanext/swisstopo/helpers/s3.py b/ckanext/swisstopo/helpers/s3.py index 99ea40e..7247fc1 100644 --- a/ckanext/swisstopo/helpers/s3.py +++ b/ckanext/swisstopo/helpers/s3.py @@ -11,15 +11,14 @@ def __init__(self): self.bucket_name = config['ckanext.swisstopo.s3_bucket'] except KeyError as e: raise ConfigEntryNotFoundError("'%s' not found in config" % e.message) - + def __repr__(self): return "" % (self.key, self.token, self.bucket_name) def list(self, prefix=None): conn = S3Connection(self.key,self.token) bucket = conn.get_bucket(self.bucket_name) - for key in bucket.list(prefix=prefix): - yield key.name.encode('utf-8') + return bucket.list(prefix=prefix) class ConfigEntryNotFoundError(Exception): pass From 5a9238cab6cef2fd7bf58af8f0d2c3874673c1d1 Mon Sep 17 00:00:00 2001 From: Stefan Reinhard Date: Mon, 26 Aug 2013 11:56:36 +0200 Subject: [PATCH 17/27] copy license to license_id --- ckanext/swisstopo/harvesters/swisstopoharvester.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 2e29c53..5ce8dd1 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -152,6 +152,8 @@ def import_stage(self, harvest_object): 'user': self.HARVEST_USER } + package_dict['licence_id'] = package_dict.get('license') + # Find or create group the dataset should get assigned to package_dict['groups'] = self._find_or_create_groups(context) From 9b59aa7cf6610cd47a40bb585daded9e0b57effe Mon Sep 17 00:00:00 2001 From: Stefan Reinhard Date: Tue, 27 Aug 2013 15:37:47 +0200 Subject: [PATCH 18/27] Revert "copy license to license_id" This reverts commit 5a9238cab6cef2fd7bf58af8f0d2c3874673c1d1. --- ckanext/swisstopo/harvesters/swisstopoharvester.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 5ce8dd1..2e29c53 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -152,8 +152,6 @@ def import_stage(self, harvest_object): 'user': self.HARVEST_USER } - package_dict['licence_id'] = package_dict.get('license') - # Find or create group the dataset should get assigned to package_dict['groups'] = self._find_or_create_groups(context) From 6984b88f78a53ee4c3595d61412a93a7620564a4 Mon Sep 17 00:00:00 2001 From: Stefan Reinhard Date: Mon, 2 Sep 2013 13:59:01 +0200 Subject: [PATCH 19/27] reraise exceptions when they happen --- ckanext/swisstopo/harvesters/swisstopoharvester.py | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 2e29c53..e08454e 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -132,6 +132,7 @@ def fetch_stage(self, harvest_object): return True except Exception, e: log.exception(e) + raise def import_stage(self, harvest_object): log.debug('In SwisstopoHarvester import_stage') @@ -170,6 +171,7 @@ def import_stage(self, harvest_object): except Exception, e: log.exception(e) + raise return True def _find_or_create_groups(self, context): @@ -238,7 +240,7 @@ def _generate_term_translations(self): except Exception, e: log.exception(e) - return [] + raise def _generate_metadata_translations(self, metadata_translations): try: @@ -265,7 +267,7 @@ def _generate_metadata_translations(self, metadata_translations): except Exception, e: log.exception(e) - return [] + raise def _submit_term_translations(self, context, package_dict): for translation in package_dict['translations']: @@ -289,7 +291,7 @@ def _generate_resources_dict_array(self, dataset_name): return resources except Exception, e: log.exception(e) - return [] + raise def _guess_format(self, file_name): ''' From d15bee9fc64aa5cd6df4e8c584225fb0855cda04 Mon Sep 17 00:00:00 2001 From: Stefan Reinhard Date: Mon, 2 Sep 2013 17:39:23 +0200 Subject: [PATCH 20/27] add ortschaftenverzeichnis dataset --- ckanext/swisstopo/harvesters/swisstopoharvester.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index e08454e..152ded1 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -46,6 +46,9 @@ class SwisstopoHarvester(OGDCHHarvesterBase): 'ch.swisstopo.pixelkarte-farbe-pk1000.noscale': { 'csw_query': 'Landeskarte 1:1 Mio.', }, + 'ch.swisstopo-vd.ortschaftenverzeichnis_plz': { + 'csw_query': 'Amtliches Ortschaftenverzeichnis', + } } LICENSE = { From 020d14242cf12e070ac8455bf55fb8f9e26dfb94 Mon Sep 17 00:00:00 2001 From: Stefan Reinhard Date: Tue, 3 Sep 2013 10:52:38 +0200 Subject: [PATCH 21/27] save license_url in extras --- ckanext/swisstopo/harvesters/swisstopoharvester.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 152ded1..f5e854d 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -162,6 +162,12 @@ def import_stage(self, harvest_object): # Find or create the organization the dataset should get assigned to package_dict['owner_org'] = self._find_or_create_organization(context) + # Save license url in extras + extras = [] + if 'license_url' in package_dict: + extras.append(('license_url', package_dict['license_url'])) + package_dict['extras'] = extras + package = model.Package.get(package_dict['id']) pkg_role = model.PackageRole(package=package, user=user, role=model.Role.ADMIN) From 8c9d06689c255777ec4a1257ea7f8752742c0446 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Thu, 5 Sep 2013 17:18:43 +0200 Subject: [PATCH 22/27] Add license_url to the metadata --- .../harvesters/swisstopoharvester.py | 23 ++++++++++++------- 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index f5e854d..538de0b 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -52,10 +52,10 @@ class SwisstopoHarvester(OGDCHHarvesterBase): } LICENSE = { - u'de': u'Lizenz für Fertigprodukte', - u'fr': u'Accord relatif aux produits finis', - u'it': u'Licenza per prodotti finiti', - u'en': u'Licence for finished products', + u'de': (u'Lizenz für Fertigprodukte', 'http://www.toposhop.admin.ch/de/shop/terms/use/finished_products'), + u'fr': (u'Accord relatif aux produits finis', 'http://www.toposhop.admin.ch/fr/shop/terms/use/finished_products'), + u'it': (u'Licenza per prodotti finiti', 'http://www.toposhop.admin.ch/it/shop/terms/use/finished_products'), + u'en': (u'Licence for finished products', 'http://www.toposhop.admin.ch/en/shop/terms/use/finished_products'), } ORGANIZATION = { u'de': u'Bundesamt für Landestopografie swisstopo', @@ -105,7 +105,9 @@ def gather_stage(self, harvest_job): metadata['resources'] = self._generate_resources_dict_array(dataset_name) log.debug(metadata['resources']) - metadata['license_id'] = self.LICENSE['de'] + metadata['license_id'] = self.LICENSE['de'][0] + metadata['license_url'] = self.LICENSE['de'][1] + metadata['layer_name'] = dataset_name obj = HarvestObject( @@ -220,11 +222,16 @@ def _generate_term_translations(self): translations = [] for lang, lic in self.LICENSE.items(): - if lang != u'de': + if lang != u'de' + translations.append({ + 'lang_code': lang, + 'term': self.LICENSE[u'de'][0], + 'term_translation': lic[0] + }) translations.append({ 'lang_code': lang, - 'term': self.LICENSE[u'de'], - 'term_translation': lic + 'term': self.LICENSE[u'de'][1], + 'term_translation': lic[1] }) for lang, org in self.ORGANIZATION.items(): From b53e8f71e51ceb8d28e2d21a7a69628877266491 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Thu, 5 Sep 2013 17:21:26 +0200 Subject: [PATCH 23/27] Fixed typo --- ckanext/swisstopo/harvesters/swisstopoharvester.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/ckanext/swisstopo/harvesters/swisstopoharvester.py b/ckanext/swisstopo/harvesters/swisstopoharvester.py index 538de0b..d257e88 100644 --- a/ckanext/swisstopo/harvesters/swisstopoharvester.py +++ b/ckanext/swisstopo/harvesters/swisstopoharvester.py @@ -222,7 +222,7 @@ def _generate_term_translations(self): translations = [] for lang, lic in self.LICENSE.items(): - if lang != u'de' + if lang != u'de': translations.append({ 'lang_code': lang, 'term': self.LICENSE[u'de'][0], From 692050c6b0c172543fb3d659b1bd158b48381d85 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Thu, 5 Sep 2013 17:30:37 +0200 Subject: [PATCH 24/27] Add missing import statements for HarvesterBase class --- ckanext/swisstopo/harvesters/base.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/ckanext/swisstopo/harvesters/base.py b/ckanext/swisstopo/harvesters/base.py index 9177caa..71f4799 100644 --- a/ckanext/swisstopo/harvesters/base.py +++ b/ckanext/swisstopo/harvesters/base.py @@ -1,5 +1,8 @@ from ckanext.harvest.harvesters import HarvesterBase +from sqlalchemy.sql import update,and_, bindparam +from sqlalchemy.exc import InvalidRequestError + from ckan import plugins as p from ckan import model from ckan.model import Session From ae835a5498c97f8dcfa329c822f16394d7507658 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Fri, 6 Sep 2013 18:59:41 +0200 Subject: [PATCH 25/27] OGDCH-75: Add daemon scripts for ckanext-swisstopo --- scripts/swisstopo_fetch | 38 ++++++++++++++++++++++++++++++++++++++ scripts/swisstopo_gather | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 76 insertions(+) create mode 100755 scripts/swisstopo_fetch create mode 100755 scripts/swisstopo_gather diff --git a/scripts/swisstopo_fetch b/scripts/swisstopo_fetch new file mode 100755 index 0000000..3b56ed5 --- /dev/null +++ b/scripts/swisstopo_fetch @@ -0,0 +1,38 @@ +#!/bin/bash + +DAEMON=/home/www-data/pyenv/bin/python +ARGS="/home/www-data/pyenv/bin/paster --plugin=ckanext-swisstopo swisstopoharvester fetch_consumer --config=/home/www-data/production.ini" +PIDFILE=/home/www-data/pid/swisstopo_fetch.pid + +function start { + /sbin/start-stop-daemon --start --pidfile $PIDFILE \ + --user www-data --group www-data \ + -b --make-pidfile \ + --chuid www-data \ + --exec $DAEMON -- $ARGS +} +function stop { + /sbin/start-stop-daemon --stop --pidfile $PIDFILE --verbose +} + +case "$1" in + start) + echo "Starting server ..." + start + ;; + stop) + echo "Stopping server ..." + stop + ;; + restart) + echo "Restarting server ..." + stop + start + ;; + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 + ;; +esac + +exit 0 diff --git a/scripts/swisstopo_gather b/scripts/swisstopo_gather new file mode 100755 index 0000000..2e5cf22 --- /dev/null +++ b/scripts/swisstopo_gather @@ -0,0 +1,38 @@ +#!/bin/bash + +DAEMON=/home/www-data/pyenv/bin/python +ARGS="/home/www-data/pyenv/bin/paster --plugin=ckanext-swisstopo swisstopoharvester gather_consumer --config=/home/www-data/production.ini" +PIDFILE=/home/www-data/pid/swisstopo_gather.pid + +function start { + /sbin/start-stop-daemon --start --pidfile $PIDFILE \ + --user www-data --group www-data \ + -b --make-pidfile \ + --chuid www-data \ + --exec $DAEMON -- $ARGS +} +function stop { + /sbin/start-stop-daemon --stop --pidfile $PIDFILE --verbose +} + +case "$1" in + start) + echo "Starting server ..." + start + ;; + stop) + echo "Stopping server ..." + stop + ;; + restart) + echo "Restarting server ..." + stop + start + ;; + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 + ;; +esac + +exit 0 From af5fd21a8d0607bf68718614d0bc7f748d61209d Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Mon, 9 Sep 2013 10:02:22 +0200 Subject: [PATCH 26/27] Fix name of swisstopo harvester in daemon script --- scripts/swisstopo_fetch | 2 +- scripts/swisstopo_gather | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/scripts/swisstopo_fetch b/scripts/swisstopo_fetch index 3b56ed5..fe52793 100755 --- a/scripts/swisstopo_fetch +++ b/scripts/swisstopo_fetch @@ -1,7 +1,7 @@ #!/bin/bash DAEMON=/home/www-data/pyenv/bin/python -ARGS="/home/www-data/pyenv/bin/paster --plugin=ckanext-swisstopo swisstopoharvester fetch_consumer --config=/home/www-data/production.ini" +ARGS="/home/www-data/pyenv/bin/paster --plugin=ckanext-swisstopo swisstopo_harvest fetch_consumer --config=/home/www-data/production.ini" PIDFILE=/home/www-data/pid/swisstopo_fetch.pid function start { diff --git a/scripts/swisstopo_gather b/scripts/swisstopo_gather index 2e5cf22..2ed6e9b 100755 --- a/scripts/swisstopo_gather +++ b/scripts/swisstopo_gather @@ -1,7 +1,7 @@ #!/bin/bash DAEMON=/home/www-data/pyenv/bin/python -ARGS="/home/www-data/pyenv/bin/paster --plugin=ckanext-swisstopo swisstopoharvester gather_consumer --config=/home/www-data/production.ini" +ARGS="/home/www-data/pyenv/bin/paster --plugin=ckanext-swisstopo swisstopo_harvest gather_consumer --config=/home/www-data/production.ini" PIDFILE=/home/www-data/pid/swisstopo_gather.pid function start { From 4edaa480d2fb47d8385e7ce27ed8a57c223340e3 Mon Sep 17 00:00:00 2001 From: Stefan Oderbolz Date: Sat, 14 Sep 2013 07:37:21 +0200 Subject: [PATCH 27/27] Add LICENSE and README --- LICENSE | 661 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ README.md | 35 +++ 2 files changed, 696 insertions(+) create mode 100644 LICENSE create mode 100644 README.md diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..d5445e7 --- /dev/null +++ b/LICENSE @@ -0,0 +1,661 @@ +GNU AFFERO GENERAL PUBLIC LICENSE + Version 3, 19 November 2007 + + Copyright (C) 2007 Free Software Foundation, Inc. + Everyone is permitted to copy and distribute verbatim copies + of this license document, but changing it is not allowed. + + Preamble + + The GNU Affero General Public License is a free, copyleft license for +software and other kinds of works, specifically designed to ensure +cooperation with the community in the case of network server software. + + The licenses for most software and other practical works are designed +to take away your freedom to share and change the works. By contrast, +our General Public Licenses are intended to guarantee your freedom to +share and change all versions of a program--to make sure it remains free +software for all its users. + + When we speak of free software, we are referring to freedom, not +price. Our General Public Licenses are designed to make sure that you +have the freedom to distribute copies of free software (and charge for +them if you wish), that you receive source code or can get it if you +want it, that you can change the software or use pieces of it in new +free programs, and that you know you can do these things. + + Developers that use our General Public Licenses protect your rights +with two steps: (1) assert copyright on the software, and (2) offer +you this License which gives you legal permission to copy, distribute +and/or modify the software. + + A secondary benefit of defending all users' freedom is that +improvements made in alternate versions of the program, if they +receive widespread use, become available for other developers to +incorporate. Many developers of free software are heartened and +encouraged by the resulting cooperation. However, in the case of +software used on network servers, this result may fail to come about. +The GNU General Public License permits making a modified version and +letting the public access it on a server without ever releasing its +source code to the public. + + The GNU Affero General Public License is designed specifically to +ensure that, in such cases, the modified source code becomes available +to the community. It requires the operator of a network server to +provide the source code of the modified version running there to the +users of that server. Therefore, public use of a modified version, on +a publicly accessible server, gives the public access to the source +code of the modified version. + + An older license, called the Affero General Public License and +published by Affero, was designed to accomplish similar goals. This is +a different license, not a version of the Affero GPL, but Affero has +released a new version of the Affero GPL which permits relicensing under +this license. + + The precise terms and conditions for copying, distribution and +modification follow. + + TERMS AND CONDITIONS + + 0. Definitions. + + "This License" refers to version 3 of the GNU Affero General Public License. + + "Copyright" also means copyright-like laws that apply to other kinds of +works, such as semiconductor masks. + + "The Program" refers to any copyrightable work licensed under this +License. Each licensee is addressed as "you". "Licensees" and +"recipients" may be individuals or organizations. + + To "modify" a work means to copy from or adapt all or part of the work +in a fashion requiring copyright permission, other than the making of an +exact copy. The resulting work is called a "modified version" of the +earlier work or a work "based on" the earlier work. + + A "covered work" means either the unmodified Program or a work based +on the Program. + + To "propagate" a work means to do anything with it that, without +permission, would make you directly or secondarily liable for +infringement under applicable copyright law, except executing it on a +computer or modifying a private copy. Propagation includes copying, +distribution (with or without modification), making available to the +public, and in some countries other activities as well. + + To "convey" a work means any kind of propagation that enables other +parties to make or receive copies. Mere interaction with a user through +a computer network, with no transfer of a copy, is not conveying. + + An interactive user interface displays "Appropriate Legal Notices" +to the extent that it includes a convenient and prominently visible +feature that (1) displays an appropriate copyright notice, and (2) +tells the user that there is no warranty for the work (except to the +extent that warranties are provided), that licensees may convey the +work under this License, and how to view a copy of this License. If +the interface presents a list of user commands or options, such as a +menu, a prominent item in the list meets this criterion. + + 1. Source Code. + + The "source code" for a work means the preferred form of the work +for making modifications to it. "Object code" means any non-source +form of a work. + + A "Standard Interface" means an interface that either is an official +standard defined by a recognized standards body, or, in the case of +interfaces specified for a particular programming language, one that +is widely used among developers working in that language. + + The "System Libraries" of an executable work include anything, other +than the work as a whole, that (a) is included in the normal form of +packaging a Major Component, but which is not part of that Major +Component, and (b) serves only to enable use of the work with that +Major Component, or to implement a Standard Interface for which an +implementation is available to the public in source code form. A +"Major Component", in this context, means a major essential component +(kernel, window system, and so on) of the specific operating system +(if any) on which the executable work runs, or a compiler used to +produce the work, or an object code interpreter used to run it. + + The "Corresponding Source" for a work in object code form means all +the source code needed to generate, install, and (for an executable +work) run the object code and to modify the work, including scripts to +control those activities. However, it does not include the work's +System Libraries, or general-purpose tools or generally available free +programs which are used unmodified in performing those activities but +which are not part of the work. For example, Corresponding Source +includes interface definition files associated with source files for +the work, and the source code for shared libraries and dynamically +linked subprograms that the work is specifically designed to require, +such as by intimate data communication or control flow between those +subprograms and other parts of the work. + + The Corresponding Source need not include anything that users +can regenerate automatically from other parts of the Corresponding +Source. + + The Corresponding Source for a work in source code form is that +same work. + + 2. Basic Permissions. + + All rights granted under this License are granted for the term of +copyright on the Program, and are irrevocable provided the stated +conditions are met. This License explicitly affirms your unlimited +permission to run the unmodified Program. The output from running a +covered work is covered by this License only if the output, given its +content, constitutes a covered work. This License acknowledges your +rights of fair use or other equivalent, as provided by copyright law. + + You may make, run and propagate covered works that you do not +convey, without conditions so long as your license otherwise remains +in force. You may convey covered works to others for the sole purpose +of having them make modifications exclusively for you, or provide you +with facilities for running those works, provided that you comply with +the terms of this License in conveying all material for which you do +not control copyright. Those thus making or running the covered works +for you must do so exclusively on your behalf, under your direction +and control, on terms that prohibit them from making any copies of +your copyrighted material outside their relationship with you. + + Conveying under any other circumstances is permitted solely under +the conditions stated below. Sublicensing is not allowed; section 10 +makes it unnecessary. + + 3. Protecting Users' Legal Rights From Anti-Circumvention Law. + + No covered work shall be deemed part of an effective technological +measure under any applicable law fulfilling obligations under article +11 of the WIPO copyright treaty adopted on 20 December 1996, or +similar laws prohibiting or restricting circumvention of such +measures. + + When you convey a covered work, you waive any legal power to forbid +circumvention of technological measures to the extent such circumvention +is effected by exercising rights under this License with respect to +the covered work, and you disclaim any intention to limit operation or +modification of the work as a means of enforcing, against the work's +users, your or third parties' legal rights to forbid circumvention of +technological measures. + + 4. Conveying Verbatim Copies. + + You may convey verbatim copies of the Program's source code as you +receive it, in any medium, provided that you conspicuously and +appropriately publish on each copy an appropriate copyright notice; +keep intact all notices stating that this License and any +non-permissive terms added in accord with section 7 apply to the code; +keep intact all notices of the absence of any warranty; and give all +recipients a copy of this License along with the Program. + + You may charge any price or no price for each copy that you convey, +and you may offer support or warranty protection for a fee. + + 5. Conveying Modified Source Versions. + + You may convey a work based on the Program, or the modifications to +produce it from the Program, in the form of source code under the +terms of section 4, provided that you also meet all of these conditions: + + a) The work must carry prominent notices stating that you modified + it, and giving a relevant date. + + b) The work must carry prominent notices stating that it is + released under this License and any conditions added under section + 7. This requirement modifies the requirement in section 4 to + "keep intact all notices". + + c) You must license the entire work, as a whole, under this + License to anyone who comes into possession of a copy. This + License will therefore apply, along with any applicable section 7 + additional terms, to the whole of the work, and all its parts, + regardless of how they are packaged. This License gives no + permission to license the work in any other way, but it does not + invalidate such permission if you have separately received it. + + d) If the work has interactive user interfaces, each must display + Appropriate Legal Notices; however, if the Program has interactive + interfaces that do not display Appropriate Legal Notices, your + work need not make them do so. + + A compilation of a covered work with other separate and independent +works, which are not by their nature extensions of the covered work, +and which are not combined with it such as to form a larger program, +in or on a volume of a storage or distribution medium, is called an +"aggregate" if the compilation and its resulting copyright are not +used to limit the access or legal rights of the compilation's users +beyond what the individual works permit. Inclusion of a covered work +in an aggregate does not cause this License to apply to the other +parts of the aggregate. + + 6. Conveying Non-Source Forms. + + You may convey a covered work in object code form under the terms +of sections 4 and 5, provided that you also convey the +machine-readable Corresponding Source under the terms of this License, +in one of these ways: + + a) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by the + Corresponding Source fixed on a durable physical medium + customarily used for software interchange. + + b) Convey the object code in, or embodied in, a physical product + (including a physical distribution medium), accompanied by a + written offer, valid for at least three years and valid for as + long as you offer spare parts or customer support for that product + model, to give anyone who possesses the object code either (1) a + copy of the Corresponding Source for all the software in the + product that is covered by this License, on a durable physical + medium customarily used for software interchange, for a price no + more than your reasonable cost of physically performing this + conveying of source, or (2) access to copy the + Corresponding Source from a network server at no charge. + + c) Convey individual copies of the object code with a copy of the + written offer to provide the Corresponding Source. This + alternative is allowed only occasionally and noncommercially, and + only if you received the object code with such an offer, in accord + with subsection 6b. + + d) Convey the object code by offering access from a designated + place (gratis or for a charge), and offer equivalent access to the + Corresponding Source in the same way through the same place at no + further charge. You need not require recipients to copy the + Corresponding Source along with the object code. If the place to + copy the object code is a network server, the Corresponding Source + may be on a different server (operated by you or a third party) + that supports equivalent copying facilities, provided you maintain + clear directions next to the object code saying where to find the + Corresponding Source. Regardless of what server hosts the + Corresponding Source, you remain obligated to ensure that it is + available for as long as needed to satisfy these requirements. + + e) Convey the object code using peer-to-peer transmission, provided + you inform other peers where the object code and Corresponding + Source of the work are being offered to the general public at no + charge under subsection 6d. + + A separable portion of the object code, whose source code is excluded +from the Corresponding Source as a System Library, need not be +included in conveying the object code work. + + A "User Product" is either (1) a "consumer product", which means any +tangible personal property which is normally used for personal, family, +or household purposes, or (2) anything designed or sold for incorporation +into a dwelling. In determining whether a product is a consumer product, +doubtful cases shall be resolved in favor of coverage. For a particular +product received by a particular user, "normally used" refers to a +typical or common use of that class of product, regardless of the status +of the particular user or of the way in which the particular user +actually uses, or expects or is expected to use, the product. A product +is a consumer product regardless of whether the product has substantial +commercial, industrial or non-consumer uses, unless such uses represent +the only significant mode of use of the product. + + "Installation Information" for a User Product means any methods, +procedures, authorization keys, or other information required to install +and execute modified versions of a covered work in that User Product from +a modified version of its Corresponding Source. The information must +suffice to ensure that the continued functioning of the modified object +code is in no case prevented or interfered with solely because +modification has been made. + + If you convey an object code work under this section in, or with, or +specifically for use in, a User Product, and the conveying occurs as +part of a transaction in which the right of possession and use of the +User Product is transferred to the recipient in perpetuity or for a +fixed term (regardless of how the transaction is characterized), the +Corresponding Source conveyed under this section must be accompanied +by the Installation Information. But this requirement does not apply +if neither you nor any third party retains the ability to install +modified object code on the User Product (for example, the work has +been installed in ROM). + + The requirement to provide Installation Information does not include a +requirement to continue to provide support service, warranty, or updates +for a work that has been modified or installed by the recipient, or for +the User Product in which it has been modified or installed. Access to a +network may be denied when the modification itself materially and +adversely affects the operation of the network or violates the rules and +protocols for communication across the network. + + Corresponding Source conveyed, and Installation Information provided, +in accord with this section must be in a format that is publicly +documented (and with an implementation available to the public in +source code form), and must require no special password or key for +unpacking, reading or copying. + + 7. Additional Terms. + + "Additional permissions" are terms that supplement the terms of this +License by making exceptions from one or more of its conditions. +Additional permissions that are applicable to the entire Program shall +be treated as though they were included in this License, to the extent +that they are valid under applicable law. If additional permissions +apply only to part of the Program, that part may be used separately +under those permissions, but the entire Program remains governed by +this License without regard to the additional permissions. + + When you convey a copy of a covered work, you may at your option +remove any additional permissions from that copy, or from any part of +it. (Additional permissions may be written to require their own +removal in certain cases when you modify the work.) You may place +additional permissions on material, added by you to a covered work, +for which you have or can give appropriate copyright permission. + + Notwithstanding any other provision of this License, for material you +add to a covered work, you may (if authorized by the copyright holders of +that material) supplement the terms of this License with terms: + + a) Disclaiming warranty or limiting liability differently from the + terms of sections 15 and 16 of this License; or + + b) Requiring preservation of specified reasonable legal notices or + author attributions in that material or in the Appropriate Legal + Notices displayed by works containing it; or + + c) Prohibiting misrepresentation of the origin of that material, or + requiring that modified versions of such material be marked in + reasonable ways as different from the original version; or + + d) Limiting the use for publicity purposes of names of licensors or + authors of the material; or + + e) Declining to grant rights under trademark law for use of some + trade names, trademarks, or service marks; or + + f) Requiring indemnification of licensors and authors of that + material by anyone who conveys the material (or modified versions of + it) with contractual assumptions of liability to the recipient, for + any liability that these contractual assumptions directly impose on + those licensors and authors. + + All other non-permissive additional terms are considered "further +restrictions" within the meaning of section 10. If the Program as you +received it, or any part of it, contains a notice stating that it is +governed by this License along with a term that is a further +restriction, you may remove that term. If a license document contains +a further restriction but permits relicensing or conveying under this +License, you may add to a covered work material governed by the terms +of that license document, provided that the further restriction does +not survive such relicensing or conveying. + + If you add terms to a covered work in accord with this section, you +must place, in the relevant source files, a statement of the +additional terms that apply to those files, or a notice indicating +where to find the applicable terms. + + Additional terms, permissive or non-permissive, may be stated in the +form of a separately written license, or stated as exceptions; +the above requirements apply either way. + + 8. Termination. + + You may not propagate or modify a covered work except as expressly +provided under this License. Any attempt otherwise to propagate or +modify it is void, and will automatically terminate your rights under +this License (including any patent licenses granted under the third +paragraph of section 11). + + However, if you cease all violation of this License, then your +license from a particular copyright holder is reinstated (a) +provisionally, unless and until the copyright holder explicitly and +finally terminates your license, and (b) permanently, if the copyright +holder fails to notify you of the violation by some reasonable means +prior to 60 days after the cessation. + + Moreover, your license from a particular copyright holder is +reinstated permanently if the copyright holder notifies you of the +violation by some reasonable means, this is the first time you have +received notice of violation of this License (for any work) from that +copyright holder, and you cure the violation prior to 30 days after +your receipt of the notice. + + Termination of your rights under this section does not terminate the +licenses of parties who have received copies or rights from you under +this License. If your rights have been terminated and not permanently +reinstated, you do not qualify to receive new licenses for the same +material under section 10. + + 9. Acceptance Not Required for Having Copies. + + You are not required to accept this License in order to receive or +run a copy of the Program. Ancillary propagation of a covered work +occurring solely as a consequence of using peer-to-peer transmission +to receive a copy likewise does not require acceptance. However, +nothing other than this License grants you permission to propagate or +modify any covered work. These actions infringe copyright if you do +not accept this License. Therefore, by modifying or propagating a +covered work, you indicate your acceptance of this License to do so. + + 10. Automatic Licensing of Downstream Recipients. + + Each time you convey a covered work, the recipient automatically +receives a license from the original licensors, to run, modify and +propagate that work, subject to this License. You are not responsible +for enforcing compliance by third parties with this License. + + An "entity transaction" is a transaction transferring control of an +organization, or substantially all assets of one, or subdividing an +organization, or merging organizations. If propagation of a covered +work results from an entity transaction, each party to that +transaction who receives a copy of the work also receives whatever +licenses to the work the party's predecessor in interest had or could +give under the previous paragraph, plus a right to possession of the +Corresponding Source of the work from the predecessor in interest, if +the predecessor has it or can get it with reasonable efforts. + + You may not impose any further restrictions on the exercise of the +rights granted or affirmed under this License. For example, you may +not impose a license fee, royalty, or other charge for exercise of +rights granted under this License, and you may not initiate litigation +(including a cross-claim or counterclaim in a lawsuit) alleging that +any patent claim is infringed by making, using, selling, offering for +sale, or importing the Program or any portion of it. + + 11. Patents. + + A "contributor" is a copyright holder who authorizes use under this +License of the Program or a work on which the Program is based. The +work thus licensed is called the contributor's "contributor version". + + A contributor's "essential patent claims" are all patent claims +owned or controlled by the contributor, whether already acquired or +hereafter acquired, that would be infringed by some manner, permitted +by this License, of making, using, or selling its contributor version, +but do not include claims that would be infringed only as a +consequence of further modification of the contributor version. For +purposes of this definition, "control" includes the right to grant +patent sublicenses in a manner consistent with the requirements of +this License. + + Each contributor grants you a non-exclusive, worldwide, royalty-free +patent license under the contributor's essential patent claims, to +make, use, sell, offer for sale, import and otherwise run, modify and +propagate the contents of its contributor version. + + In the following three paragraphs, a "patent license" is any express +agreement or commitment, however denominated, not to enforce a patent +(such as an express permission to practice a patent or covenant not to +sue for patent infringement). To "grant" such a patent license to a +party means to make such an agreement or commitment not to enforce a +patent against the party. + + If you convey a covered work, knowingly relying on a patent license, +and the Corresponding Source of the work is not available for anyone +to copy, free of charge and under the terms of this License, through a +publicly available network server or other readily accessible means, +then you must either (1) cause the Corresponding Source to be so +available, or (2) arrange to deprive yourself of the benefit of the +patent license for this particular work, or (3) arrange, in a manner +consistent with the requirements of this License, to extend the patent +license to downstream recipients. "Knowingly relying" means you have +actual knowledge that, but for the patent license, your conveying the +covered work in a country, or your recipient's use of the covered work +in a country, would infringe one or more identifiable patents in that +country that you have reason to believe are valid. + + If, pursuant to or in connection with a single transaction or +arrangement, you convey, or propagate by procuring conveyance of, a +covered work, and grant a patent license to some of the parties +receiving the covered work authorizing them to use, propagate, modify +or convey a specific copy of the covered work, then the patent license +you grant is automatically extended to all recipients of the covered +work and works based on it. + + A patent license is "discriminatory" if it does not include within +the scope of its coverage, prohibits the exercise of, or is +conditioned on the non-exercise of one or more of the rights that are +specifically granted under this License. You may not convey a covered +work if you are a party to an arrangement with a third party that is +in the business of distributing software, under which you make payment +to the third party based on the extent of your activity of conveying +the work, and under which the third party grants, to any of the +parties who would receive the covered work from you, a discriminatory +patent license (a) in connection with copies of the covered work +conveyed by you (or copies made from those copies), or (b) primarily +for and in connection with specific products or compilations that +contain the covered work, unless you entered into that arrangement, +or that patent license was granted, prior to 28 March 2007. + + Nothing in this License shall be construed as excluding or limiting +any implied license or other defenses to infringement that may +otherwise be available to you under applicable patent law. + + 12. No Surrender of Others' Freedom. + + If conditions are imposed on you (whether by court order, agreement or +otherwise) that contradict the conditions of this License, they do not +excuse you from the conditions of this License. If you cannot convey a +covered work so as to satisfy simultaneously your obligations under this +License and any other pertinent obligations, then as a consequence you may +not convey it at all. For example, if you agree to terms that obligate you +to collect a royalty for further conveying from those to whom you convey +the Program, the only way you could satisfy both those terms and this +License would be to refrain entirely from conveying the Program. + + 13. Remote Network Interaction; Use with the GNU General Public License. + + Notwithstanding any other provision of this License, if you modify the +Program, your modified version must prominently offer all users +interacting with it remotely through a computer network (if your version +supports such interaction) an opportunity to receive the Corresponding +Source of your version by providing access to the Corresponding Source +from a network server at no charge, through some standard or customary +means of facilitating copying of software. This Corresponding Source +shall include the Corresponding Source for any work covered by version 3 +of the GNU General Public License that is incorporated pursuant to the +following paragraph. + + Notwithstanding any other provision of this License, you have +permission to link or combine any covered work with a work licensed +under version 3 of the GNU General Public License into a single +combined work, and to convey the resulting work. The terms of this +License will continue to apply to the part which is the covered work, +but the work with which it is combined will remain governed by version +3 of the GNU General Public License. + + 14. Revised Versions of this License. + + The Free Software Foundation may publish revised and/or new versions of +the GNU Affero General Public License from time to time. Such new versions +will be similar in spirit to the present version, but may differ in detail to +address new problems or concerns. + + Each version is given a distinguishing version number. If the +Program specifies that a certain numbered version of the GNU Affero General +Public License "or any later version" applies to it, you have the +option of following the terms and conditions either of that numbered +version or of any later version published by the Free Software +Foundation. If the Program does not specify a version number of the +GNU Affero General Public License, you may choose any version ever published +by the Free Software Foundation. + + If the Program specifies that a proxy can decide which future +versions of the GNU Affero General Public License can be used, that proxy's +public statement of acceptance of a version permanently authorizes you +to choose that version for the Program. + + Later license versions may give you additional or different +permissions. However, no additional obligations are imposed on any +author or copyright holder as a result of your choosing to follow a +later version. + + 15. Disclaimer of Warranty. + + THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY +APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT +HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY +OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, +THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM +IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF +ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + + 16. Limitation of Liability. + + IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING +WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS +THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY +GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE +USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF +DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD +PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), +EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF +SUCH DAMAGES. + + 17. Interpretation of Sections 15 and 16. + + If the disclaimer of warranty and limitation of liability provided +above cannot be given local legal effect according to their terms, +reviewing courts shall apply local law that most closely approximates +an absolute waiver of all civil liability in connection with the +Program, unless a warranty or assumption of liability accompanies a +copy of the Program in return for a fee. + + END OF TERMS AND CONDITIONS + + How to Apply These Terms to Your New Programs + + If you develop a new program, and you want it to be of the greatest +possible use to the public, the best way to achieve this is to make it +free software which everyone can redistribute and change under these terms. + + To do so, attach the following notices to the program. It is safest +to attach them to the start of each source file to most effectively +state the exclusion of warranty; and each file should have at least +the "copyright" line and a pointer to where the full notice is found. + + + Copyright (C) + + This program is free software: you can redistribute it and/or modify + it under the terms of the GNU Affero General Public License as published by + the Free Software Foundation, either version 3 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU Affero General Public License for more details. + + You should have received a copy of the GNU Affero General Public License + along with this program. If not, see . + +Also add information on how to contact you by electronic and paper mail. + + If your software can interact with users remotely through a computer +network, you should also make sure that it provides a way for users to +get its source. For example, if your program is a web application, its +interface could display a "Source" link that leads users to an archive +of the code. There are many ways you could offer source, and different +solutions will be better for different programs; see section 13 for the +specific requirements. + + You should also get your employer (if you work as a programmer) or school, +if any, to sign a "copyright disclaimer" for the program, if necessary. +For more information on this, and how to apply and follow the GNU AGPL, see +. diff --git a/README.md b/README.md new file mode 100644 index 0000000..92e8202 --- /dev/null +++ b/README.md @@ -0,0 +1,35 @@ +ckanext-swisstopo +================= + +Harvester for the Federal Office of Topography swisstopo + +## Installation + +Use `pip` to install this plugin. This example installs it in `/home/www-data` + +```bash +source /home/www-data/pyenv/bin/activate +pip install -e git+https://github.com/ogdch/ckanext-swisstopo.git#egg=ckanext-swisstopo --src /home/www-data +cd /home/www-data/ckanext-swisstopo +pip install -r pip-requirements.txt +python setup.py develop +``` + +Make sure to add `swisstopo` and `swisstopo_harvest` to `ckan.plugins` in your config file. + +## Run harvester + +```bash +source /home/www-data/pyenv/bin/activate +paster --plugin=ckanext-swisstopo swisstopo_harvest gather_consumer -c development.ini & +paster --plugin=ckanext-swisstopo swisstopo_harvest fetch_consumer -c development.ini & +paster --plugin=ckanext-swisstopo swisstopo_harvest run -c development.ini +``` + +CSW query: + +```bash +source /home/www-data/pyenv/bin/activate +# Show output from CSW, 'query' is typically the name of a dataset like 'swissboundaries3D' +paster --plugin=ckanext-swisstopo swisstopo csw -c development.ini +```