diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml new file mode 100644 index 00000000..22965a1e --- /dev/null +++ b/.github/workflows/test.yml @@ -0,0 +1,79 @@ +name: Tests +on: [push, pull_request] +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - uses: actions/setup-python@v2 + with: + python-version: '3.6' + - name: Install requirements + run: pip install flake8 pycodestyle + - name: Check syntax + run: flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ckan + + test: + needs: lint + strategy: + matrix: + ckan-version: ["2.10", 2.9, 2.9-py2, 2.8, 2.7] + fail-fast: false + + name: CKAN ${{ matrix.ckan-version }} + runs-on: ubuntu-latest + container: + image: openknowledge/ckan-dev:${{ matrix.ckan-version }} + services: + solr: + image: ckan/ckan-solr:${{ matrix.ckan-version }} + postgres: + image: ckan/ckan-postgres-dev:${{ matrix.ckan-version }} + env: + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: postgres + options: --health-cmd pg_isready --health-interval 10s --health-timeout 5s --health-retries 5 + redis: + image: redis:3 + env: + CKAN_SQLALCHEMY_URL: postgresql://ckan_default:pass@postgres/ckan_test + CKAN_DATASTORE_WRITE_URL: postgresql://datastore_write:pass@postgres/datastore_test + CKAN_DATASTORE_READ_URL: postgresql://datastore_read:pass@postgres/datastore_test + CKAN_SOLR_URL: http://solr:8983/solr/ckan + CKAN_REDIS_URL: redis://redis:6379/1 + + steps: + - uses: actions/checkout@v3 + - name: Install py3 requirements + if: ${{ matrix.ckan-version == '2.10' || matrix.ckan-version == '2.9' }} + run: pip install -r requirements.txt + - name: Install py2 requirements + if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' || matrix.ckan-version == '2.9-py2' }} + run: pip install -r requirements-py2.txt + - name: Install requirements + run: | + pip install -r dev-requirements.txt + pip install -e . + # Replace default path to CKAN core config file with the one on the container + sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini + - name: Setup extension (CKAN >= 2.9) + if: ${{ matrix.ckan-version != '2.7' && matrix.ckan-version != '2.8' }} + run: | + ckan -c test.ini db init + ckan -c test.ini archiver init + ckan -c test.ini report initdb + ckan -c test.ini qa init + - name: Setup extension (CKAN < 2.9) + if: ${{ matrix.ckan-version == '2.7' || matrix.ckan-version == '2.8' }} + run: | + paster --plugin=ckan db init -c test.ini + paster --plugin=ckanext-archiver archiver init --config=test.ini + paster --plugin=ckanext-report report initdb --config=test.ini + paster --plugin=ckanext-qa qa init --config=test.ini + - name: Run tests + run: pytest --ckan-ini=test.ini --cov=ckanext.qa --cov-report=xml --cov-append --disable-warnings ckanext/qa/tests + - name: Upload coverage report to codecov + uses: codecov/codecov-action@v1 + with: + file: ./coverage.xml diff --git a/.travis.yml b/.travis.yml deleted file mode 100644 index 9d7d4752..00000000 --- a/.travis.yml +++ /dev/null @@ -1,40 +0,0 @@ -language: python -python: - - "2.7" -env: - - CKANVERSION=master - - CKANVERSION=2.6 - - CKANVERSION=2.7 - - CKANVERSION=2.8 -services: - - redis-server - - postgresql -install: - - bash bin/travis-build.bash - - pip install coveralls -script: sh bin/travis-run.sh -after_success: - - coveralls - -# the new trusty images of Travis cause build errors with psycopg2, see https://github.com/travis-ci/travis-ci/issues/8897 -dist: trusty -group: deprecated-2017Q4 - -stages: - - Flake8 - - test - -jobs: - include: - - stage: Flake8 - env: FLAKE8=True - install: - - bash bin/travis-build.bash - - pip install flake8==3.5.0 - - pip install pycodestyle==2.3.0 - script: - - flake8 --version - # stop the build if there are Python syntax errors or undefined names - - flake8 . --count --select=E901,E999,F821,F822,F823 --show-source --statistics --exclude ckan,ckanext-archiver - # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide - - flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics --exclude ckan,ckanext-archiver diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 00000000..f2db6a7f --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,7 @@ +# ckanext-qa versions + +## 3.0 (2022-04-15) + +New features: + - Add Python 3 compatibility + - Remove Travis an start using GitHub Actions \ No newline at end of file diff --git a/README.rst b/README.rst index fa2fcfc8..36d6accf 100644 --- a/README.rst +++ b/README.rst @@ -2,8 +2,8 @@ these badges work. The necessary Travis and Coverage config files have been generated for you. -.. image:: https://travis-ci.org/ckan/ckanext-qa.svg?branch=master - :target: https://travis-ci.org/ckan/ckanext-qa +[![Tests](https://github.com/ckan/ckanext-qa/workflows/Tests/badge.svg?branch=master)](https://github.com/ckan/ckanext-qa/actions) +[![Code Coverage](http://codecov.io/github/ckan/ckanext-qa/coverage.svg?branch=master)](http://codecov.io/github/ckan/ckanext-qa?branch=master) CKAN QA Extension (Quality Assurance) ===================================== @@ -31,7 +31,7 @@ Requirements Before installing ckanext-qa, make sure that you have installed the following: -* CKAN 2.1+ +* CKAN 2.1+ (tests are only running for CKAN 2.7+) * ckanext-archiver 2.0+ (https://github.com/ckan/ckanext-archiver) * ckanext-report (https://github.com/datagovuk/ckanext-report) for reporting diff --git a/bin/travis-build.bash b/bin/travis-build.bash deleted file mode 100644 index fa1b072c..00000000 --- a/bin/travis-build.bash +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash -set -e -set -x # echo on - -echo "This is travis-build.bash..." - -echo "Installing the packages that CKAN requires..." -sudo apt-get update -qq -sudo apt-get install solr-jetty libcommons-fileupload-java - -echo "Upgrading libmagic for ckanext-qa..." -# appears to upgrade it from 5.09-2 to 5.09-2ubuntu0.6 which seems to help the tests -sudo apt-get install libmagic1 - -echo "Installing CKAN and its Python dependencies..." -git clone https://github.com/ckan/ckan -cd ckan - -if [ $CKANVERSION == 'master' ] -then - echo "CKAN version: master" -else - CKAN_TAG=$(git tag | grep ^ckan-$CKANVERSION | sort --version-sort | tail -n 1) - git checkout $CKAN_TAG - echo "CKAN version: ${CKAN_TAG#ckan-}" -fi - -python setup.py develop -if [ -f requirements-py2.txt ] -then - pip install -r requirements-py2.txt -else - pip install -r requirements.txt -fi -pip install -r dev-requirements.txt --allow-all-external -cd - - -echo "Setting up Solr..." -# solr is multicore for tests on ckan master now, but it's easier to run tests -# on Travis single-core still. -# see https://github.com/ckan/ckan/issues/2972 -sed -i -e 's/solr_url.*/solr_url = http:\/\/127.0.0.1:8983\/solr/' ckan/test-core.ini -printf "NO_START=0\nJETTY_HOST=127.0.0.1\nJETTY_PORT=8983\nJAVA_HOME=$JAVA_HOME" | sudo tee /etc/default/jetty -sudo cp ckan/ckan/config/solr/schema.xml /etc/solr/conf/schema.xml -sudo service jetty restart - -echo "Creating the PostgreSQL user and database..." -sudo -u postgres psql -c "CREATE USER ckan_default WITH PASSWORD 'pass';" -sudo -u postgres psql -c 'CREATE DATABASE ckan_test WITH OWNER ckan_default;' - -echo "Initialising the database..." -cd ckan -paster db init -c test-core.ini -cd - - -echo "Installing dependency ckanext-report and its requirements..." -pip install -e git+https://github.com/datagovuk/ckanext-report.git#egg=ckanext-report - -echo "Installing dependency ckanext-archiver and its requirements..." -git clone https://github.com/ckan/ckanext-archiver.git -cd ckanext-archiver -pip install -e . -pip install -r requirements.txt -cd - - -echo "Installing ckanext-qa and its requirements..." -python setup.py develop -pip install -r requirements.txt -pip install -r dev-requirements.txt - -echo "Moving test-core.ini into a subdir..." -mkdir subdir -mv test-core.ini subdir - -echo "travis-build.bash is done." diff --git a/bin/travis-run.sh b/bin/travis-run.sh deleted file mode 100644 index 5c4022b7..00000000 --- a/bin/travis-run.sh +++ /dev/null @@ -1,6 +0,0 @@ -#!/bin/sh -e - -echo "NO_START=0\nJETTY_HOST=127.0.0.1\nJETTY_PORT=8983\nJAVA_HOME=$JAVA_HOME" | sudo tee /etc/default/jetty -sudo cp ckan/ckan/config/solr/schema.xml /etc/solr/conf/schema.xml -sudo service jetty restart -nosetests --with-pylons=subdir/test-core.ini --with-coverage --cover-package=ckanext.archiver --cover-inclusive --cover-erase --cover-tests diff --git a/ckanext/qa/__init__.py b/ckanext/qa/__init__.py index 53fd0507..97308f69 100644 --- a/ckanext/qa/__init__.py +++ b/ckanext/qa/__init__.py @@ -6,4 +6,4 @@ import pkgutil __path__ = pkgutil.extend_path(__path__, __name__) -__version__ = '2.0' +__version__ = '3.0' diff --git a/ckanext/qa/bin/common.py b/ckanext/qa/bin/common.py index 0ace784e..f30bf79a 100644 --- a/ckanext/qa/bin/common.py +++ b/ckanext/qa/bin/common.py @@ -1,3 +1,4 @@ +from __future__ import print_function import os @@ -48,5 +49,5 @@ def get_resources(state='active', publisher_ref=None, resource_id=None, dataset_ resources = resources.filter(model.Resource.id == resource_id) criteria.append('Resource:%s' % resource_id) resources = resources.all() - print '%i resources (%s)' % (len(resources), ' '.join(criteria)) + print('%i resources (%s)' % (len(resources), ' '.join(criteria))) return resources diff --git a/ckanext/qa/bin/migrate_task_status.py b/ckanext/qa/bin/migrate_task_status.py index f57b1bf5..a7b8fd61 100644 --- a/ckanext/qa/bin/migrate_task_status.py +++ b/ckanext/qa/bin/migrate_task_status.py @@ -5,12 +5,11 @@ information - resources that are no longer available but had the format detected in the past. ''' - +from __future__ import print_function from optparse import OptionParser import logging import json import datetime - import common from running_stats import StatsList @@ -59,7 +58,7 @@ def migrate(options): # time, so some timezone nonesense going on. Can't do much. archival = Archival.get_for_resource(res.id) if not archival: - print add_stat('QA but no Archival data', res, stats) + print(add_stat('QA but no Archival data', res, stats)) continue archival_date = archival.updated # the state of the resource was as it was archived on the date of @@ -112,10 +111,10 @@ def migrate(options): model.Session.add(qa) add_stat('Added to QA table', res, stats) - print 'Summary\n', stats.report() + print('Summary\n', stats.report()) if options.write: model.repo.commit_and_remove() - print 'Written' + print('Written') def add_stat(outcome, res, stats, extra_info=None): @@ -154,10 +153,10 @@ def date_str_to_datetime_or_none(date_str): if len(args) != 1: parser.error('Wrong number of arguments (%i)' % len(args)) config_ini = args[0] - print 'Loading CKAN config...' + print('Loading CKAN config...') common.load_config(config_ini) common.register_translator() - print 'Done' + print('Done') # Setup logging to print debug out for local only rootLogger = logging.getLogger() rootLogger.setLevel(logging.WARNING) diff --git a/ckanext/qa/bin/running_stats.py b/ckanext/qa/bin/running_stats.py index 947797aa..d08e5ad9 100644 --- a/ckanext/qa/bin/running_stats.py +++ b/ckanext/qa/bin/running_stats.py @@ -14,7 +14,7 @@ package_stats.increment('deleted') else: package_stats.increment('not deleted') -print package_stats.report() +print(package_stats.report()) > deleted: 30 > not deleted: 70 @@ -26,14 +26,15 @@ package_stats.add('deleted', package.name) else: package_stats.add('not deleted' package.name) -print package_stats.report() +print(package_stats.report()) > deleted: 30 pollution-uk, flood-regions, river-quality, ... > not deleted: 70 spending-bristol, ... ''' - +from __future__ import print_function import copy import datetime +import six class StatsCount(dict): @@ -68,9 +69,9 @@ def report(self, indent=1, order_by_title=False, show_time_taken=True): report_dict[category] = self.report_value(category) if order_by_title: - items = sorted(report_dict.iteritems()) + items = sorted(six.iteritems(report_dict)) else: - items = sorted(report_dict.iteritems(), + items = sorted(six.iteritems(report_dict), key=lambda x: -x[1][1]) for category, value_tuple in items: @@ -110,6 +111,6 @@ def report_value(self, category): package_stats.add('Success', 'good3') package_stats.add('Success', 'good4') package_stats.add('Failure', 'bad1') - print package_stats.report() + print(package_stats.report()) - print StatsList().report() + print(StatsList().report()) diff --git a/ckanext/qa/cli.py b/ckanext/qa/cli.py new file mode 100644 index 00000000..554d92a6 --- /dev/null +++ b/ckanext/qa/cli.py @@ -0,0 +1,35 @@ +# -*- coding: utf-8 -*- + +import click +import logging +import ckanext.qa.utils as utils + + +log = logging.getLogger(__name__) + + +def get_commands(): + return [qa] + + +@click.group() +def qa(): + pass + + +@qa.command() +def init(): + """Creates necessary db tables""" + utils.init_db() + + +@qa.command() +@click.argument('ids', nargs=-1) +@click.option('-q', '--queue', default=None) +def update(ids, queue): + """Creates necessary db tables""" + log.info('QA update: ids:%s queue:%s' % (ids, queue)) + if ids: + utils.update(*ids, queue=queue) + else: + utils.update(queue=queue) diff --git a/ckanext/qa/commands.py b/ckanext/qa/commands.py index 992fb0cd..e70ce48e 100644 --- a/ckanext/qa/commands.py +++ b/ckanext/qa/commands.py @@ -1,9 +1,11 @@ +from __future__ import print_function import logging import sys - +from builtins import input from sqlalchemy import or_ import ckan.plugins as p +from ckanext.qa import utils REQUESTS_HEADER = {'content-type': 'application/json', 'User-Agent': 'ckanext-qa commands'} @@ -65,10 +67,11 @@ def command(self): Parse command line arguments and call appropriate method. """ if not self.args or self.args[0] in ['--help', '-h', 'help']: - print QACommand.__doc__ + print(QACommand.__doc__) return cmd = self.args[0] + args = self.args[1:] if len(self.args) > 1 else [] self._load_config() # Now we can import ckan and create logger, knowing that loggers @@ -76,7 +79,7 @@ def command(self): self.log = logging.getLogger('ckanext.qa') if cmd == 'update': - self.update() + utils.update(*args, queue=self.options.queue) elif cmd == 'sniff': self.sniff() elif cmd == 'view': @@ -89,132 +92,52 @@ def command(self): elif cmd == 'migrate1': self.migrate1() elif cmd == 'init': - self.init_db() + utils.init_db() else: self.log.error('Command "%s" not recognized' % (cmd,)) - def init_db(self): - import ckan.model as model - from ckanext.qa.model import init_tables - init_tables(model.meta.engine) - - def update(self): - from ckan import model - from ckanext.qa import lib - packages = [] - resources = [] - if len(self.args) > 1: - for arg in self.args[1:]: - # try arg as a group id/name - group = model.Group.get(arg) - if group and group.is_organization: - # group.packages() is unreliable for an organization - - # member objects are not definitive whereas owner_org, so - # get packages using owner_org - query = model.Session.query(model.Package)\ - .filter( - or_(model.Package.state == 'active', - model.Package.state == 'pending'))\ - .filter_by(owner_org=group.id) - packages.extend(query.all()) - if not self.options.queue: - self.options.queue = 'bulk' - continue - elif group: - packages.extend(group.packages()) - if not self.options.queue: - self.options.queue = 'bulk' - continue - # try arg as a package id/name - pkg = model.Package.get(arg) - if pkg: - packages.append(pkg) - if not self.options.queue: - self.options.queue = 'priority' - continue - # try arg as a resource id - res = model.Resource.get(arg) - if res: - resources.append(res) - if not self.options.queue: - self.options.queue = 'priority' - continue - else: - self.log.error('Could not recognize as a group, package ' - 'or resource: %r', arg) - sys.exit(1) - else: - # all packages - pkgs = model.Session.query(model.Package)\ - .filter_by(state='active')\ - .order_by('name').all() - packages.extend(pkgs) - if not self.options.queue: - self.options.queue = 'bulk' - - if packages: - self.log.info('Datasets to QA: %d', len(packages)) - if resources: - self.log.info('Resources to QA: %d', len(resources)) - if not (packages or resources): - self.log.error('No datasets or resources to process') - sys.exit(1) - - self.log.info('Queue: %s', self.options.queue) - for package in packages: - lib.create_qa_update_package_task(package, self.options.queue) - self.log.info('Queuing dataset %s (%s resources)', - package.name, len(package.resources)) - - for resource in resources: - package = resource.resource_group.package - self.log.info('Queuing resource %s/%s', package.name, resource.id) - lib.create_qa_update_task(resource, self.options.queue) - - self.log.info('Completed queueing') - def sniff(self): from ckanext.qa.sniff_format import sniff_file_format if len(self.args) < 2: - print 'Not enough arguments', self.args + print('Not enough arguments', self.args) sys.exit(1) for filepath in self.args[1:]: format_ = sniff_file_format( filepath, logging.getLogger('ckanext.qa.sniffer')) if format_: - print 'Detected as: %s - %s' % (format_['display_name'], - filepath) + print('Detected as: %s - %s' % (format_['display_name'], + filepath)) else: - print 'ERROR: Could not recognise format of: %s' % filepath + print('ERROR: Could not recognise format of: %s' % filepath) def view(self, package_ref=None): from ckan import model q = model.Session.query(model.TaskStatus).filter_by(task_type='qa') - print 'QA records - %i TaskStatus rows' % q.count() - print ' across %i Resources' % q.distinct('entity_id').count() + print('QA records - %i TaskStatus rows' % q.count()) + print(' across %i Resources' % q.distinct('entity_id').count()) if package_ref: pkg = model.Package.get(package_ref) - print 'Package %s %s' % (pkg.name, pkg.id) + print('Package %s %s' % (pkg.name, pkg.id)) for res in pkg.resources: - print 'Resource %s' % res.id + print('Resource %s' % res.id) for row in q.filter_by(entity_id=res.id): - print '* %s = %r error=%r' % (row.key, row.value, - row.error) + print('* %s = %r error=%r' % (row.key, row.value, + row.error)) def clean(self): from ckan import model - print 'Before:' + print('Before:') self.view() q = model.Session.query(model.TaskStatus).filter_by(task_type='qa') q.delete() model.Session.commit() - print 'After:' + print('After:') self.view() def migrate1(self): @@ -223,32 +146,32 @@ def migrate1(self): q_status = model.Session.query(model.TaskStatus) \ .filter_by(task_type='qa') \ .filter_by(key='status') - print '* %s with "status" will be deleted e.g. %s' % (q_status.count(), - q_status.first()) + print('* %s with "status" will be deleted e.g. %s' % (q_status.count(), + q_status.first())) q_failures = model.Session.query(model.TaskStatus) \ .filter_by(task_type='qa') \ .filter_by(key='openness_score_failure_count') - print '* %s with openness_score_failure_count to be deleted e.g.\n%s'\ - % (q_failures.count(), q_failures.first()) + print('* %s with openness_score_failure_count to be deleted e.g.\n%s'\ + % (q_failures.count(), q_failures.first())) q_score = model.Session.query(model.TaskStatus) \ .filter_by(task_type='qa') \ .filter_by(key='openness_score') - print '* %s with openness_score to migrate e.g.\n%s' % \ - (q_score.count(), q_score.first()) + print('* %s with openness_score to migrate e.g.\n%s' % \ + (q_score.count(), q_score.first())) q_reason = model.Session.query(model.TaskStatus) \ .filter_by(task_type='qa') \ .filter_by(key='openness_score_reason') - print '* %s with openness_score_reason to migrate e.g.\n%s' % \ - (q_reason.count(), q_reason.first()) - raw_input('Press Enter to continue') + print('* %s with openness_score_reason to migrate e.g.\n%s' % \ + (q_reason.count(), q_reason.first())) + input('Press Enter to continue') q_status.delete() model.Session.commit() - print '..."status" deleted' + print('..."status" deleted') q_failures.delete() model.Session.commit() - print '..."openness_score_failure_count" deleted' + print('..."openness_score_failure_count" deleted') for task_status in q_score: reason_task_status = q_reason \ @@ -267,13 +190,13 @@ def migrate1(self): 'is_broken': None, }) model.Session.commit() - print '..."openness_score" and "openness_score_reason" migrated' + print('..."openness_score" and "openness_score_reason" migrated') count = q_reason.count() q_reason.delete() model.Session.commit() - print '... %i remaining "openness_score_reason" deleted' % count + print('... %i remaining "openness_score_reason" deleted' % count) model.Session.flush() model.Session.remove() - print 'Migration succeeded' + print('Migration succeeded') diff --git a/ckanext/qa/controllers.py b/ckanext/qa/controllers.py index 493eed7f..4cedcbb5 100644 --- a/ckanext/qa/controllers.py +++ b/ckanext/qa/controllers.py @@ -102,7 +102,7 @@ def _check_link(self, url): result['mimetype'] = self._extract_mimetype(headers) result['size'] = headers.get('content-length', '') result['last_modified'] = self._parse_and_format_date(headers.get('last-modified', '')) - except LinkCheckerError, e: + except LinkCheckerError as e: result['url_errors'].append(str(e)) return result diff --git a/ckanext/qa/lib.py b/ckanext/qa/lib.py index 2113badd..5e1608a3 100644 --- a/ckanext/qa/lib.py +++ b/ckanext/qa/lib.py @@ -3,10 +3,11 @@ import re import logging -from pylons import config +import ckan +from ckan.common import config from ckan import plugins as p -import tasks +from ckanext.qa import tasks log = logging.getLogger(__name__) @@ -55,7 +56,7 @@ def resource_format_scores(): with open(json_filepath) as format_file: try: file_resource_formats = json.loads(format_file.read()) - except ValueError, e: + except ValueError as e: # includes simplejson.decoder.JSONDecodeError raise ValueError('Invalid JSON syntax in %s: %s' % (json_filepath, e)) @@ -87,8 +88,7 @@ def munge_format_to_be_canonical(format_name): def create_qa_update_package_task(package, queue): - from pylons import config - ckan_ini_filepath = os.path.abspath(config.__file__) + ckan_ini_filepath = os.path.abspath(ckan.config.__file__) compat_enqueue('qa.update_package', tasks.update_package, queue, args=[ckan_ini_filepath, package.id]) log.debug('QA of package put into celery queue %s: %s', @@ -96,12 +96,11 @@ def create_qa_update_package_task(package, queue): def create_qa_update_task(resource, queue): - from pylons import config if p.toolkit.check_ckan_version(max_version='2.2.99'): package = resource.resource_group.package else: package = resource.package - ckan_ini_filepath = os.path.abspath(config.__file__) + ckan_ini_filepath = os.path.abspath(ckan.config.__file__) compat_enqueue('qa.update', tasks.update, queue, args=[ckan_ini_filepath, resource.id]) diff --git a/ckanext/qa/model.py b/ckanext/qa/model.py index 9e6b97a1..1f3ca9b2 100644 --- a/ckanext/qa/model.py +++ b/ckanext/qa/model.py @@ -1,5 +1,6 @@ import uuid import datetime +from builtins import str from sqlalchemy import Column from sqlalchemy import types @@ -15,7 +16,7 @@ def make_uuid(): - return unicode(uuid.uuid4()) + return str(uuid.uuid4()) class QA(Base): @@ -40,7 +41,7 @@ class QA(Base): def __repr__(self): summary = 'score=%s format=%s' % (self.openness_score, self.format) - details = unicode(self.openness_score_reason).encode('unicode_escape') + details = str(self.openness_score_reason).encode('unicode_escape') package = model.Package.get(self.package_id) package_name = package.name if package else '?%s?' % self.package_id return '' % \ diff --git a/ckanext/qa/plugin.py b/ckanext/qa/plugin.py index 876459d1..563da89a 100644 --- a/ckanext/qa/plugin.py +++ b/ckanext/qa/plugin.py @@ -4,10 +4,9 @@ import ckan.plugins as p from ckanext.archiver.interfaces import IPipe -from logic import action, auth -from model import QA, aggregate_qa_for_a_dataset -import helpers -import lib +from ckanext.qa.logic import action, auth +from ckanext.qa.model import QA, aggregate_qa_for_a_dataset +from ckanext.qa import helpers from ckanext.report.interfaces import IReport @@ -16,7 +15,6 @@ class QAPlugin(p.SingletonPlugin, p.toolkit.DefaultDatasetForm): p.implements(p.IConfigurer, inherit=True) - p.implements(p.IRoutes, inherit=True) p.implements(IPipe, inherit=True) p.implements(IReport) p.implements(p.IActions) @@ -29,22 +27,35 @@ class QAPlugin(p.SingletonPlugin, p.toolkit.DefaultDatasetForm): def update_config(self, config): p.toolkit.add_template_directory(config, 'templates') - # IRoutes - - def before_map(self, map): - # Link checker - deprecated - res = 'ckanext.qa.controllers:LinkCheckerController' - map.connect('qa_resource_checklink', '/qa/link_checker', - conditions=dict(method=['GET']), - controller=res, - action='check_link') - return map + if p.toolkit.check_ckan_version(max_version='2.9.5'): + p.implements(p.IRoutes, inherit=True) + # TODO IRoutes was deprecated and we need to figure out + # if this is important and how to implement + # https://github.com/ckan/ckan/commit/f2a5bffed60bb71db8f728091eb33a784c7a2052 + + # IRoutes + def before_map(self, map): + # Link checker - deprecated + res = 'ckanext.qa.controllers:LinkCheckerController' + map.connect('qa_resource_checklink', '/qa/link_checker', + conditions=dict(method=['GET']), + controller=res, + action='check_link') + return map + + if p.toolkit.check_ckan_version(min_version='2.9'): + p.implements(p.IClick, inherit=True) + # IClick + def get_commands(self): + from ckanext.qa import cli + return cli.get_commands() # IPipe def receive_data(self, operation, queue, **params): '''Receive notification from ckan-archiver that a dataset has been archived.''' + from ckanext.qa import lib if not operation == 'package-archived': return dataset_id = params['package_id'] diff --git a/ckanext/qa/reports.py b/ckanext/qa/reports.py index c50b56de..c87b9c55 100644 --- a/ckanext/qa/reports.py +++ b/ckanext/qa/reports.py @@ -1,5 +1,6 @@ from collections import Counter import copy +import six try: from collections import OrderedDict # from python 2.7 except ImportError: @@ -71,7 +72,7 @@ def openness_index(include_sub_organizations=False): results = counts table = [] - for org_name, org_counts in results.iteritems(): + for org_name, org_counts in six.iteritems(results): total_stars = sum([k*v for k, v in org_counts['score_counts'].items() if k]) num_pkgs_scored = sum([v for k, v in org_counts['score_counts'].items() if k is not None]) diff --git a/ckanext/qa/sniff_format.py b/ckanext/qa/sniff_format.py index 856447fa..84fc9f38 100644 --- a/ckanext/qa/sniff_format.py +++ b/ckanext/qa/sniff_format.py @@ -1,15 +1,16 @@ import re import zipfile import os +from builtins import str from collections import defaultdict import subprocess -import StringIO +from io import StringIO import xlrd import magic import messytables -import lib +from ckanext.qa import lib from ckan.lib import helpers as ckan_helpers import logging @@ -33,7 +34,7 @@ def sniff_file_format(filepath): ''' format_ = None log.info('Sniffing file format of: %s', filepath) - filepath_utf8 = filepath.encode('utf8') if isinstance(filepath, unicode) \ + filepath_utf8 = filepath.encode('utf8') if isinstance(filepath, str) \ else filepath mime_type = magic.from_file(filepath_utf8, mime=True) log.info('Magic detects file as: %s', mime_type) @@ -318,9 +319,9 @@ def start_element(name, attrs): p.StartElementHandler = start_element try: p.Parse(buf) - except GotFirstTag, e: + except GotFirstTag as e: top_level_tag_name = str(e).lower() - except xml.sax.SAXException, e: + except xml.sax.SAXException as e: log.info('Sax parse error: %s %s', e, buf) return {'format': 'XML'} @@ -381,11 +382,11 @@ def get_zipped_format(filepath): filepaths = zip.namelist() finally: zip.close() - except zipfile.BadZipfile, e: + except zipfile.BadZipfile as e: log.info('Zip file open raised error %s: %s', e, e.args) return - except Exception, e: + except Exception as e: log.warning('Zip file open raised exception %s: %s', e, e.args) return @@ -438,7 +439,7 @@ def get_zipped_format(filepath): def is_excel(filepath): try: xlrd.open_workbook(filepath) - except Exception, e: + except Exception as e: log.info('Not Excel - failed to load: %s %s', e, e.args) return False else: diff --git a/ckanext/qa/tasks.py b/ckanext/qa/tasks.py index 26fe8289..230141b4 100644 --- a/ckanext/qa/tasks.py +++ b/ckanext/qa/tasks.py @@ -6,16 +6,19 @@ import json import os import traceback -import urlparse +try: + import urlparse +except ImportError: + import urllib.parse as urlparse import routes +from builtins import str from ckan.common import _ from ckan.lib import i18n from ckan.plugins import toolkit import ckan.lib.helpers as ckan_helpers -from sniff_format import sniff_file_format -import lib +from ckanext.qa.sniff_format import sniff_file_format from ckanext.archiver.model import Archival, Status import logging @@ -115,9 +118,9 @@ def update_package(ckan_ini_filepath, package_id): try: update_package_(package_id) - except Exception, e: + except Exception as e: log.error('Exception occurred during QA update_package: %s: %s', - e.__class__.__name__, unicode(e)) + e.__class__.__name__, str(e)) raise @@ -154,9 +157,9 @@ def update(ckan_ini_filepath, resource_id): load_config(ckan_ini_filepath) try: update_resource_(resource_id) - except Exception, e: + except Exception as e: log.error('Exception occurred during QA update_resource: %s: %s', - e.__class__.__name__, unicode(e)) + e.__class__.__name__, str(e)) raise @@ -253,9 +256,9 @@ def resource_score(resource): format_ = get_qa_format(resource.id) score_reason = ' '.join(score_reasons) format_ = format_ or None - except Exception, e: + except Exception as e: log.error('Unexpected error while calculating openness score %s: %s\nException: %s', - e.__class__.__name__, unicode(e), traceback.format_exc()) + e.__class__.__name__, str(e), traceback.format_exc()) score_reason = _("Unknown error: %s") % str(e) raise @@ -296,7 +299,7 @@ def format_date(date): else: return '' messages = [_('File could not be downloaded.'), - _('Reason') + ':', unicode(archival.status) + '.', + _('Reason') + ':', str(archival.status) + '.', _('Error details: %s.') % archival.reason, _('Attempted on %s.') % format_date(archival.updated)] last_success = format_date(archival.last_success) @@ -347,6 +350,7 @@ def score_by_sniffing_data(archival, resource, score_reasons): * If it cannot work out the format then format_string is None * If it cannot score it, then score is None ''' + from ckanext.qa import lib if not archival or not archival.cache_filepath: score_reasons.append(_('This file had not been downloaded at the time of scoring it.')) return (None, None) @@ -394,6 +398,7 @@ def score_by_url_extension(resource, score_reasons): * If it cannot work out the format then format is None * If it cannot score it, then score is None ''' + from ckanext.qa import lib extension_variants_ = extension_variants(resource.url.strip()) if not extension_variants_: score_reasons.append(_('Could not determine a file extension in the URL.')) @@ -446,6 +451,7 @@ def score_by_format_field(resource, score_reasons): * If it cannot work out the format then format_string is None * If it cannot score it, then score is None ''' + from ckanext.qa import lib format_field = resource.format or '' if not format_field: score_reasons.append(_('Format field is blank.')) diff --git a/ckanext/qa/templates/report/openness.html b/ckanext/qa/templates/report/openness.html index 7b52791a..11d940cf 100644 --- a/ckanext/qa/templates/report/openness.html +++ b/ckanext/qa/templates/report/openness.html @@ -22,7 +22,7 @@ - +
@@ -76,7 +76,7 @@
{% trans %}Organization{% endtrans %}
- +
diff --git a/ckanext/qa/tests/mock_remote_server.py b/ckanext/qa/tests/mock_remote_server.py index b43fb77d..ae56ef03 100644 --- a/ckanext/qa/tests/mock_remote_server.py +++ b/ckanext/qa/tests/mock_remote_server.py @@ -8,6 +8,8 @@ from wsgiref.simple_server import make_server import urllib2 import socket +from builtins import str +from functools import reduce class MockHTTPServer(object): @@ -116,7 +118,7 @@ def __call__(self, environ, start_response): else: content = request.str_params.get('content', '') - if isinstance(content, unicode): + if isinstance(content, str): raise TypeError("Expected raw byte string for content") headers = [ diff --git a/ckanext/qa/tests/test_link_checker.py b/ckanext/qa/tests/test_link_checker.py index 550a016e..1fcefc6d 100644 --- a/ckanext/qa/tests/test_link_checker.py +++ b/ckanext/qa/tests/test_link_checker.py @@ -1,12 +1,10 @@ +from __future__ import print_function import logging from functools import wraps import json from urllib import urlencode from nose.tools import assert_in -try: - from ckan.tests.legacy import TestController as ControllerTestCase -except ImportError: - from ckan.tests import TestController as ControllerTestCase +from ckan.tests import TestController as ControllerTestCase from nose.tools import assert_equal from ckanext.archiver.tasks import update_package @@ -121,12 +119,12 @@ def test_colon_in_query_string(self, url): # accept, because browsers accept this # see discussion: http://trac.ckan.org/ticket/318 result = self.check_link(url) - print result + print(result) assert_equal(result['url_errors'], []) @with_mock_url('?status=200 ') def test_trailing_whitespace(self, url): # accept, because browsers accept this result = self.check_link(url) - print result + print(result) assert_equal(result['url_errors'], []) diff --git a/ckanext/qa/utils.py b/ckanext/qa/utils.py new file mode 100644 index 00000000..f69e5713 --- /dev/null +++ b/ckanext/qa/utils.py @@ -0,0 +1,95 @@ +""" +Utils for new and old cli +""" +import logging +from sqlalchemy import or_ +import sys + + +log = logging.getLogger(__name__) + + +def init_db(): + import ckan.model as model + from ckanext.qa.model import init_tables + init_tables(model.meta.engine) + + +def update(*args, **kwargs): + from ckan import model + from ckanext.qa import lib + queue = kwargs.pop('queue') + if kwargs: + raise TypeError('"update" got an unexpected keyword argument {}'.format(kwargs)) + + packages = [] + resources = [] + if len(args) > 0: + for arg in args: + # try arg as a group id/name + group = model.Group.get(arg) + if group and group.is_organization: + # group.packages() is unreliable for an organization - + # member objects are not definitive whereas owner_org, so + # get packages using owner_org + query = model.Session.query(model.Package)\ + .filter( + or_(model.Package.state == 'active', + model.Package.state == 'pending'))\ + .filter_by(owner_org=group.id) + packages.extend(query.all()) + if not queue: + queue = 'bulk' + continue + elif group: + packages.extend(group.packages()) + if not queue: + queue = 'bulk' + continue + # try arg as a package id/name + pkg = model.Package.get(arg) + if pkg: + packages.append(pkg) + if not queue: + queue = 'priority' + continue + # try arg as a resource id + res = model.Resource.get(arg) + if res: + resources.append(res) + if not queue: + queue = 'priority' + continue + else: + log.error('Could not recognize as a group, package ' + 'or resource: %r', arg) + sys.exit(1) + else: + # all packages + pkgs = model.Session.query(model.Package)\ + .filter_by(state='active')\ + .order_by('name').all() + packages.extend(pkgs) + if not queue: + queue = 'bulk' + + if packages: + log.info('Datasets to QA: %d', len(packages)) + if resources: + log.info('Resources to QA: %d', len(resources)) + if not (packages or resources): + log.error('No datasets or resources to process') + sys.exit(1) + + log.info('Queue: %s', queue) + for package in packages: + lib.create_qa_update_package_task(package, queue) + log.info('Queuing dataset %s (%s resources)', + package.name, len(package.resources)) + + for resource in resources: + package = resource.resource_group.package + log.info('Queuing resource %s/%s', package.name, resource.id) + lib.create_qa_update_task(resource, queue) + + log.info('Completed queueing') diff --git a/dev-requirements.txt b/dev-requirements.txt index ed77b3d6..ac4ee19a 100644 --- a/dev-requirements.txt +++ b/dev-requirements.txt @@ -1,3 +1,5 @@ nose mock flask +pytest-ckan +pytest-cov diff --git a/requirements-py2.txt b/requirements-py2.txt new file mode 100644 index 00000000..900b0ffb --- /dev/null +++ b/requirements-py2.txt @@ -0,0 +1,10 @@ +-e git+http://github.com/ckan/ckanext-report.git#egg=ckanext-report +-e git+http://github.com/ckan/ckanext-archiver.git#egg=ckanext-archiver +SQLAlchemy>=0.6.6 +requests +six>=1.9 # until messytables->html5lib releases https://github.com/html5lib/html5lib-python/pull/301 +xlrd==1.0.0 +python-magic==0.4.12 +messytables==0.15.2 +future>=0.18.2 +progressbar==2.3 \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 70da4e40..e3ccff48 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,10 @@ +-e git+http://github.com/ckan/ckanext-report.git#egg=ckanext-report +-e git+http://github.com/ckan/ckanext-archiver.git#egg=ckanext-archiver +SQLAlchemy>=0.6.6 +requests +six>=1.9 # until messytables->html5lib releases https://github.com/html5lib/html5lib-python/pull/301 xlrd==1.0.0 python-magic==0.4.12 messytables==0.15.2 -progressbar==2.3 +future>=0.18.2 +progressbar==2.5 \ No newline at end of file diff --git a/setup.py b/setup.py index f6012055..1167caf1 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,10 @@ version=__version__, description='Quality Assurance plugin for CKAN', long_description='', - classifiers=[], + classifiers=[ + 'Programming Language :: Python :: 2.7', + 'Programming Language :: Python :: 3', + ], keywords='', author='Open Knowledge Foundation, Cabinet Office & contributors', author_email='info@okfn.org', @@ -16,22 +19,7 @@ namespace_packages=['ckanext'], include_package_data=True, zip_safe=False, - install_requires=[ - 'ckanext-archiver>=2.0', - 'ckanext-report', - 'SQLAlchemy>=0.6.6', - 'requests', - 'xlrd>=0.8.0', - 'messytables>=0.8', - 'python-magic>=0.4', - 'progressbar', - 'six>=1.9' # until messytables->html5lib releases https://github.com/html5lib/html5lib-python/pull/301 - ], - tests_require=[ - 'nose', - 'mock', - 'flask' - ], + install_requires=[], entry_points=''' [paste.paster_command] qa=ckanext.qa.commands:QACommand diff --git a/test-core.ini b/test-core.ini deleted file mode 100644 index ad68a59c..00000000 --- a/test-core.ini +++ /dev/null @@ -1,63 +0,0 @@ -# -# ckan - Pylons testing environment configuration -# -# The %(here)s variable will be replaced with the parent directory of this file -# -[DEFAULT] -debug = true -# Uncomment and replace with the address which should receive any error reports -#email_to = you@yourdomain.com -smtp_server = localhost -error_email_from = paste@localhost - -[server:main] -use = egg:Paste#http -host = 0.0.0.0 -port = 5000 - - -[app:main] -use = config:../ckan/test-core.ini - -ckan.plugins = qa - -# Logging configuration -[loggers] -keys = root, ckan, ckanext, sqlalchemy - -[handlers] -keys = console - -[formatters] -keys = generic - -[logger_root] -level = WARN -handlers = console - -[logger_ckan] -qualname = ckan -handlers = console -level = INFO -propagate = 0 - -[logger_ckanext] -qualname = ckanext -handlers = console -level = DEBUG -propagate = 0 - -[logger_sqlalchemy] -handlers = -qualname = sqlalchemy.engine -level = WARN -propagate = 0 - -[handler_console] -class = StreamHandler -args = (sys.stdout,) -level = NOTSET -formatter = generic - -[formatter_generic] -format = %(asctime)s %(levelname)-5.5s [%(name)s] %(message)s diff --git a/test.ini b/test.ini index 7c1094c4..32d25f17 100644 --- a/test.ini +++ b/test.ini @@ -17,7 +17,8 @@ port = 5000 [app:main] -use = config:test-core.ini +use = config:../ckan/test-core.ini +ckan.plugins = qa archiver report # Here we hard-code the database and a flag to make default tests # run fast. faster_db_test_hacks = True
{% trans %}Dataset{% endtrans %}