Skip to content

Commit

Permalink
Merge pull request #33 from Scifabric/add-helping-materials-support
Browse files Browse the repository at this point in the history
Add support to Excel 2010 file formats to import tasks.
  • Loading branch information
teleyinex authored Jun 5, 2017
2 parents 2f98180 + 095a936 commit 0f29ee3
Show file tree
Hide file tree
Showing 5 changed files with 127 additions and 36 deletions.
1 change: 1 addition & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -130,6 +130,7 @@ options, please check the **--help** command:
Adding tasks is very simple. You can have your tasks in three formats:

* JSON
* Excel (xlsx from 2010. It imports the first sheet)
* CSV
* PO (any po file that you want to translate)
* PROPERTIES (any PROPERTIES file that you want to translate)
Expand Down
99 changes: 70 additions & 29 deletions helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,19 +32,22 @@
import click
from StringIO import StringIO
import polib
import openpyxl
import itertools
from requests import exceptions
from pbsexceptions import ProjectNotFound, TaskNotFound
import logging
from watchdog.observers import Observer
from watchdog.events import PatternMatchingEventHandler


__all__ = ['find_project_by_short_name', 'check_api_error',
'format_error', 'format_json_task', '_create_project',
'_update_project', '_add_tasks', 'create_task_info',
'_delete_tasks', 'enable_auto_throttling',
'_update_tasks_redundancy',
'_update_project_watch', 'PbsHandler',
'_update_task_presenter_bundle_js']
'_update_task_presenter_bundle_js', 'row_empty']


def _create_project(config):
Expand Down Expand Up @@ -131,40 +134,70 @@ def _update_project(config, task_presenter, results,
raise


def _load_data(data_file, data_type):
"""Load data from CSV, JSON, Excel, ..., formats."""
raw_data = data_file.read()
if data_type is None:
data_type = data_file.name.split('.')[-1]
# Data list to process
data = []
# JSON type
if data_type == 'json':
data = json.loads(raw_data)
return data
# CSV type
elif data_type == 'csv':
csv_data = StringIO(raw_data)
reader = csv.DictReader(csv_data, delimiter=',')
for line in reader:
data.append(line)
return data
elif data_type in ['xlsx', 'xlsm', 'xltx', 'xltm']:
excel_data = StringIO(raw_data)
wb = openpyxl.load_workbook(excel_data)
ws = wb.active
# First headers
headers = []
for row in ws.iter_rows(max_row=1):
for cell in row:
tmp = '_'.join(cell.value.split(" ")).lower()
headers.append(tmp)
# Simulate DictReader
for row in ws.iter_rows(row_offset=1):
values = []
for cell in row:
values.append(cell.value)
tmp = dict(itertools.izip(headers, values))
if len(values) == len(headers) and not row_empty(values):
data.append(tmp)
return data
# PO type
elif data_type == 'po':
po = polib.pofile(raw_data)
for entry in po.untranslated_entries():
data.append(entry.__dict__)
return data
# PROPERTIES type (used in Java and Firefox extensions)
elif data_type == 'properties':
lines = raw_data.split('\n')
for l in lines:
if l:
var_id, string = l.split('=')
tmp = dict(var_id=var_id, string=string)
data.append(tmp)
return data
else:
return data


def _add_tasks(config, tasks_file, tasks_type, priority, redundancy):
"""Add tasks to a project."""
try:
project = find_project_by_short_name(config.project['short_name'],
config.pbclient,
config.all)
tasks = tasks_file.read()
if tasks_type is None:
tasks_type = tasks_file.name.split('.')[-1]
# Data list to process
data = []
# JSON type
if tasks_type == 'json':
data = json.loads(tasks)
# CSV type
elif tasks_type == 'csv':
csv_data = StringIO(tasks)
reader = csv.DictReader(csv_data, delimiter=',')
for line in reader:
data.append(line)
# PO type
elif tasks_type == 'po':
po = polib.pofile(tasks)
for entry in po.untranslated_entries():
data.append(entry.__dict__)
# PROPERTIES type (used in Java and Firefox extensions)
elif tasks_type == 'properties':
lines = tasks.split('\n')
for l in lines:
if l:
var_id, string = l.split('=')
tmp = dict(var_id=var_id, string=string)
data.append(tmp)
else:
data = _load_data(tasks_file, tasks_type)
if len(data) == 0:
return ("Unknown format for the tasks file. Use json, csv, po or "
"properties.")
# Check if for the data we have to auto-throttle task creation
Expand Down Expand Up @@ -329,6 +362,14 @@ def format_json_task(task_info):
return task_info


def row_empty(row):
"""Check if all values in row are None."""
for value in row:
if value is not None:
return False
return True


class PbsHandler(PatternMatchingEventHandler):

patterns = ['*/template.html', '*/tutorial.html',
Expand Down
6 changes: 3 additions & 3 deletions pbs.py
Original file line number Diff line number Diff line change
Expand Up @@ -153,9 +153,9 @@ def update_project(config, task_presenter, results,
@cli.command()
@click.option('--tasks-file', help='File with tasks',
default='project.tasks', type=click.File('r'))
@click.option('--tasks-type', help='Tasks type: JSON|CSV|PO|PROPERTIES',
default=None, type=click.Choice(['json', 'csv', 'po',
'properties']))
@click.option('--tasks-type', help='Tasks type: JSON|CSV|XLSX|XLSM|XLTX|XLTM|PO|PROPERTIES',
default=None, type=click.Choice(['json', 'csv', 'xlsx', 'xlsm',
'xltx', 'xltm', 'po', 'properties']))
@click.option('--priority', help="Priority for the tasks.", default=0)
@click.option('--redundancy', help="Redundancy for tasks.", default=30)
@pass_config
Expand Down
8 changes: 4 additions & 4 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@

setup(
name="pybossa-pbs",
version="2.3.1",
author="SciFabric LTD",
version="2.4.0",
author="Scifabric LTD",
author_email="[email protected]",
description="PYBOSSA command line client",
long_description=long_description,
license="AGPLv3",
url="https://github.com/PyBossa/pbs",
url="https://github.com/Scifabric/pbs",
classifiers = ['Development Status :: 4 - Beta',
'Environment :: Console',
'Intended Audience :: Developers',
Expand All @@ -30,7 +30,7 @@
'Programming Language :: Python',],
py_modules=['pbs', 'helpers', 'pbsexceptions'],
install_requires=['Click>=2.3, <2.4', 'pybossa-client>=1.0.4, <1.0.5', 'requests', 'nose', 'mock', 'coverage',
'rednose', 'pypandoc', 'simplejson', 'jsonschema', 'polib', 'watchdog'],
'rednose', 'pypandoc', 'simplejson', 'jsonschema', 'polib', 'watchdog', 'openpyxl'],
entry_points='''
[console_scripts]
pbs=pbs:cli
Expand Down
49 changes: 49 additions & 0 deletions test/test_pbs_add_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
from nose.tools import assert_raises
from requests import exceptions
from pbsexceptions import *
from openpyxl import Workbook

class TestPbsAddTask(TestDefault):

Expand Down Expand Up @@ -71,6 +72,45 @@ def test_add_tasks_csv_with_info(self, find_mock):
res = _add_tasks(self.config, tasks, 'csv', 0, 30)
assert res == '1 tasks added to project: short_name', res

@patch('helpers.openpyxl.load_workbook')
@patch('helpers.find_project_by_short_name')
def test_add_tasks_excel_with_info(self, find_mock, workbook_mock):
"""Test add_tasks excel with info field works."""
project = MagicMock()
project.name = 'name'
project.short_name = 'short_name'
project.description = 'description'
project.info = dict()
project.id = 1

wb = Workbook()
ws = wb.active

headers = ['Column Name', 'foo']
ws.append(headers)
for row in range(2, 10):
ws.append(['value', 'bar'])

ws.append([None, None])
ws.append([None, None])

find_mock.return_value = project

tasks = MagicMock()
tasks.read.return_value = wb

workbook_mock.return_value = wb

pbclient = MagicMock()
self.config.pbclient = pbclient
res = _add_tasks(self.config, tasks, 'xlsx', 0, 30)
self.config.pbclient.create_task.assert_called_with(project_id=find_mock().id,
info={u'column_name': u'value',
u'foo': u'bar'},
n_answers=30,
priority_0=0)
assert res == '8 tasks added to project: short_name', res

@patch('helpers.find_project_by_short_name')
def test_add_tasks_csv_from_filextension(self, find_mock):
"""Test add_tasks csv without specifying file extension works."""
Expand Down Expand Up @@ -334,3 +374,12 @@ def test_add_tasks_properties_from_filextension(self, find_mock):
self.config.pbclient = pbclient
res = _add_tasks(self.config, tasks, None, 0, 30)
assert res == '1 tasks added to project: short_name', res

def test_empty_row(self):
"""Test that empty_row method detects it properly."""
empty = [None, None, None, None]
assert row_empty(empty) is True
empty = [None, None, None, 'foo']
assert row_empty(empty) is False
empty = [None, 'foo', None, 'foo']
assert row_empty(empty) is False

0 comments on commit 0f29ee3

Please sign in to comment.