diff --git a/README.md b/README.md index fa48ef3..293add8 100644 --- a/README.md +++ b/README.md @@ -130,6 +130,7 @@ options, please check the **--help** command: Adding tasks is very simple. You can have your tasks in three formats: * JSON + * Excel (xlsx from 2010. It imports the first sheet) * CSV * PO (any po file that you want to translate) * PROPERTIES (any PROPERTIES file that you want to translate) diff --git a/helpers.py b/helpers.py index 3d6d1fe..c5ab989 100644 --- a/helpers.py +++ b/helpers.py @@ -32,19 +32,22 @@ import click from StringIO import StringIO import polib +import openpyxl +import itertools from requests import exceptions from pbsexceptions import ProjectNotFound, TaskNotFound import logging from watchdog.observers import Observer from watchdog.events import PatternMatchingEventHandler + __all__ = ['find_project_by_short_name', 'check_api_error', 'format_error', 'format_json_task', '_create_project', '_update_project', '_add_tasks', 'create_task_info', '_delete_tasks', 'enable_auto_throttling', '_update_tasks_redundancy', '_update_project_watch', 'PbsHandler', - '_update_task_presenter_bundle_js'] + '_update_task_presenter_bundle_js', 'row_empty'] def _create_project(config): @@ -131,40 +134,70 @@ def _update_project(config, task_presenter, results, raise +def _load_data(data_file, data_type): + """Load data from CSV, JSON, Excel, ..., formats.""" + raw_data = data_file.read() + if data_type is None: + data_type = data_file.name.split('.')[-1] + # Data list to process + data = [] + # JSON type + if data_type == 'json': + data = json.loads(raw_data) + return data + # CSV type + elif data_type == 'csv': + csv_data = StringIO(raw_data) + reader = csv.DictReader(csv_data, delimiter=',') + for line in reader: + data.append(line) + return data + elif data_type in ['xlsx', 'xlsm', 'xltx', 'xltm']: + excel_data = StringIO(raw_data) + wb = openpyxl.load_workbook(excel_data) + ws = wb.active + # First headers + headers = [] + for row in ws.iter_rows(max_row=1): + for cell in row: + tmp = '_'.join(cell.value.split(" ")).lower() + headers.append(tmp) + # Simulate DictReader + for row in ws.iter_rows(row_offset=1): + values = [] + for cell in row: + values.append(cell.value) + tmp = dict(itertools.izip(headers, values)) + if len(values) == len(headers) and not row_empty(values): + data.append(tmp) + return data + # PO type + elif data_type == 'po': + po = polib.pofile(raw_data) + for entry in po.untranslated_entries(): + data.append(entry.__dict__) + return data + # PROPERTIES type (used in Java and Firefox extensions) + elif data_type == 'properties': + lines = raw_data.split('\n') + for l in lines: + if l: + var_id, string = l.split('=') + tmp = dict(var_id=var_id, string=string) + data.append(tmp) + return data + else: + return data + + def _add_tasks(config, tasks_file, tasks_type, priority, redundancy): """Add tasks to a project.""" try: project = find_project_by_short_name(config.project['short_name'], config.pbclient, config.all) - tasks = tasks_file.read() - if tasks_type is None: - tasks_type = tasks_file.name.split('.')[-1] - # Data list to process - data = [] - # JSON type - if tasks_type == 'json': - data = json.loads(tasks) - # CSV type - elif tasks_type == 'csv': - csv_data = StringIO(tasks) - reader = csv.DictReader(csv_data, delimiter=',') - for line in reader: - data.append(line) - # PO type - elif tasks_type == 'po': - po = polib.pofile(tasks) - for entry in po.untranslated_entries(): - data.append(entry.__dict__) - # PROPERTIES type (used in Java and Firefox extensions) - elif tasks_type == 'properties': - lines = tasks.split('\n') - for l in lines: - if l: - var_id, string = l.split('=') - tmp = dict(var_id=var_id, string=string) - data.append(tmp) - else: + data = _load_data(tasks_file, tasks_type) + if len(data) == 0: return ("Unknown format for the tasks file. Use json, csv, po or " "properties.") # Check if for the data we have to auto-throttle task creation @@ -329,6 +362,14 @@ def format_json_task(task_info): return task_info +def row_empty(row): + """Check if all values in row are None.""" + for value in row: + if value is not None: + return False + return True + + class PbsHandler(PatternMatchingEventHandler): patterns = ['*/template.html', '*/tutorial.html', diff --git a/pbs.py b/pbs.py index 83e634e..3698756 100644 --- a/pbs.py +++ b/pbs.py @@ -153,9 +153,9 @@ def update_project(config, task_presenter, results, @cli.command() @click.option('--tasks-file', help='File with tasks', default='project.tasks', type=click.File('r')) -@click.option('--tasks-type', help='Tasks type: JSON|CSV|PO|PROPERTIES', - default=None, type=click.Choice(['json', 'csv', 'po', - 'properties'])) +@click.option('--tasks-type', help='Tasks type: JSON|CSV|XLSX|XLSM|XLTX|XLTM|PO|PROPERTIES', + default=None, type=click.Choice(['json', 'csv', 'xlsx', 'xlsm', + 'xltx', 'xltm', 'po', 'properties'])) @click.option('--priority', help="Priority for the tasks.", default=0) @click.option('--redundancy', help="Redundancy for tasks.", default=30) @pass_config diff --git a/setup.py b/setup.py index 7d8adf7..e393fcf 100644 --- a/setup.py +++ b/setup.py @@ -15,13 +15,13 @@ setup( name="pybossa-pbs", - version="2.3.1", - author="SciFabric LTD", + version="2.4.0", + author="Scifabric LTD", author_email="info@scifabric.com", description="PYBOSSA command line client", long_description=long_description, license="AGPLv3", - url="https://github.com/PyBossa/pbs", + url="https://github.com/Scifabric/pbs", classifiers = ['Development Status :: 4 - Beta', 'Environment :: Console', 'Intended Audience :: Developers', @@ -30,7 +30,7 @@ 'Programming Language :: Python',], py_modules=['pbs', 'helpers', 'pbsexceptions'], install_requires=['Click>=2.3, <2.4', 'pybossa-client>=1.0.4, <1.0.5', 'requests', 'nose', 'mock', 'coverage', - 'rednose', 'pypandoc', 'simplejson', 'jsonschema', 'polib', 'watchdog'], + 'rednose', 'pypandoc', 'simplejson', 'jsonschema', 'polib', 'watchdog', 'openpyxl'], entry_points=''' [console_scripts] pbs=pbs:cli diff --git a/test/test_pbs_add_tasks.py b/test/test_pbs_add_tasks.py index 4ab0ed9..af76ffd 100644 --- a/test/test_pbs_add_tasks.py +++ b/test/test_pbs_add_tasks.py @@ -5,6 +5,7 @@ from nose.tools import assert_raises from requests import exceptions from pbsexceptions import * +from openpyxl import Workbook class TestPbsAddTask(TestDefault): @@ -71,6 +72,45 @@ def test_add_tasks_csv_with_info(self, find_mock): res = _add_tasks(self.config, tasks, 'csv', 0, 30) assert res == '1 tasks added to project: short_name', res + @patch('helpers.openpyxl.load_workbook') + @patch('helpers.find_project_by_short_name') + def test_add_tasks_excel_with_info(self, find_mock, workbook_mock): + """Test add_tasks excel with info field works.""" + project = MagicMock() + project.name = 'name' + project.short_name = 'short_name' + project.description = 'description' + project.info = dict() + project.id = 1 + + wb = Workbook() + ws = wb.active + + headers = ['Column Name', 'foo'] + ws.append(headers) + for row in range(2, 10): + ws.append(['value', 'bar']) + + ws.append([None, None]) + ws.append([None, None]) + + find_mock.return_value = project + + tasks = MagicMock() + tasks.read.return_value = wb + + workbook_mock.return_value = wb + + pbclient = MagicMock() + self.config.pbclient = pbclient + res = _add_tasks(self.config, tasks, 'xlsx', 0, 30) + self.config.pbclient.create_task.assert_called_with(project_id=find_mock().id, + info={u'column_name': u'value', + u'foo': u'bar'}, + n_answers=30, + priority_0=0) + assert res == '8 tasks added to project: short_name', res + @patch('helpers.find_project_by_short_name') def test_add_tasks_csv_from_filextension(self, find_mock): """Test add_tasks csv without specifying file extension works.""" @@ -334,3 +374,12 @@ def test_add_tasks_properties_from_filextension(self, find_mock): self.config.pbclient = pbclient res = _add_tasks(self.config, tasks, None, 0, 30) assert res == '1 tasks added to project: short_name', res + + def test_empty_row(self): + """Test that empty_row method detects it properly.""" + empty = [None, None, None, None] + assert row_empty(empty) is True + empty = [None, None, None, 'foo'] + assert row_empty(empty) is False + empty = [None, 'foo', None, 'foo'] + assert row_empty(empty) is False