Skip to content

Commit

Permalink
Merge pull request #89 from qld-gov-au/QOLSVC-5123-empty-columns
Browse files Browse the repository at this point in the history
[QOLSVC-5123] handle empty cells past end of row
  • Loading branch information
ThrawnCA authored Mar 8, 2024
2 parents 0d7232d + b1ef90a commit 7857c1a
Show file tree
Hide file tree
Showing 2 changed files with 21 additions and 3 deletions.
4 changes: 2 additions & 2 deletions ckanext/xloader/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,8 +31,8 @@ def xloader_status_description(status):
def is_resource_supported_by_xloader(res_dict, check_access=True):
is_supported_format = XLoaderFormats.is_it_an_xloader_format(res_dict.get('format'))
is_datastore_active = res_dict.get('datastore_active', False)
user_has_access = not check_access or toolkit.h.check_access('package_update',
{'id':res_dict.get('package_id')})
user_has_access = not check_access or toolkit.h.check_access(
'package_update', {'id': res_dict.get('package_id')})
url_type = res_dict.get('url_type')
if url_type:
try:
Expand Down
20 changes: 19 additions & 1 deletion ckanext/xloader/loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,14 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
}.get(existing_info.get(h, {}).get('type_override'), t)
for t, h in zip(types, headers)]

headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
# Strip leading and trailing whitespace, then truncate to maximum length,
# then strip again in case the truncation exposed a space.
headers = [
header.strip()[:MAX_COLUMN_LENGTH].strip()
for header in headers
if header and header.strip()
]
header_count = len(headers)
type_converter = TypeConverter(types=types)

with UnknownEncodingStream(table_filepath, file_format, decoding_result,
Expand All @@ -413,6 +420,17 @@ def row_iterator():
for row in stream:
data_row = {}
for index, cell in enumerate(row):
# Handle files that have extra blank cells in heading and body
# eg from Microsoft Excel adding lots of empty cells on export.
# Blank header cells won't generate a column,
# so row length won't match column count.
if index >= header_count:
# error if there's actual data out of bounds, otherwise ignore
if cell:
raise LoaderError("Found data in column %s but resource only has %s header(s)",
index + 1, header_count)
else:
continue
data_row[headers[index]] = cell
yield data_row
result = row_iterator()
Expand Down

0 comments on commit 7857c1a

Please sign in to comment.