From d59f4eaa06e3f321c4387902ab03f61bdc12b6d7 Mon Sep 17 00:00:00 2001
From: "dependabot-preview[bot]"
 <27856297+dependabot-preview[bot]@users.noreply.github.com>
Date: Wed, 28 Apr 2021 22:14:22 +0000
Subject: [PATCH 001/102] Upgrade to GitHub-native Dependabot

---
 .github/dependabot.yml | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)
 create mode 100644 .github/dependabot.yml

diff --git a/.github/dependabot.yml b/.github/dependabot.yml
new file mode 100644
index 00000000..b5158981
--- /dev/null
+++ b/.github/dependabot.yml
@@ -0,0 +1,18 @@
+version: 2
+registries:
+  python-index-pypi-org:
+    type: python-index
+    url: https://pypi.org/
+    replaces-base: true
+    username: "${{secrets.PYTHON_INDEX_PYPI_ORG_USERNAME}}"
+    password: "${{secrets.PYTHON_INDEX_PYPI_ORG_PASSWORD}}"
+
+updates:
+- package-ecosystem: pip
+  directory: "/"
+  schedule:
+    interval: daily
+    time: "19:00"
+  open-pull-requests-limit: 10
+  registries:
+  - python-index-pypi-org

From 963ada115dc7038b0d2dcd29dba5765627b0477c Mon Sep 17 00:00:00 2001
From: william dutton <will.dutt@gmail.com>
Date: Wed, 9 Nov 2022 08:52:01 +1000
Subject: [PATCH 002/102] fix workflow

---
 .github/workflows/test.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index e963e1f6..8a601204 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -13,7 +13,7 @@ jobs:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v2
         with:
-          python-version: '3.6'
+          python-version: '3.x'
       - name: Install requirements
         run: pip install flake8 pycodestyle
       - name: Check syntax

From 9f96e1676c0be03774273917040546a1b97b2f3f Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 17 Apr 2023 11:22:32 +1000
Subject: [PATCH 003/102] [QOLDEV-347] apply 'str' fallback type correctly,
 #182

- If all types have been rejected, ensure that the fallback flag is correctly set
---
 ckanext/xloader/utils.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py
index cbffaa2f..79facbea 100644
--- a/ckanext/xloader/utils.py
+++ b/ckanext/xloader/utils.py
@@ -175,10 +175,10 @@ def type_guess(rows, types=TYPES, strict=False):
             for ci, cell in enumerate(row):
                 if not cell:
                     continue
-                at_least_one_value[ci] = True
                 for type in list(guesses[ci].keys()):
                     if not isinstance(cell, type):
                         guesses[ci].pop(type)
+                at_least_one_value[ci] = True if guesses[ci] else False
         # no need to set guessing weights before this
         # because we only accept a type if it never fails
         for i, guess in enumerate(guesses):

From cf04a5c5c38443f3d98e0e7b8a4ed0ceede90aa0 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 17 Apr 2023 12:50:09 +1000
Subject: [PATCH 004/102] [QOLDEV-347] fix validation errors on empty strings,
 #182

- replace empty strings with None if they have types that will choke on empty string
---
 ckanext/xloader/loader.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index afc3c980..75bddf51 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -318,9 +318,16 @@ def row_iterator():
 
         logger.info('Copying to database...')
         count = 0
+        # Some types cannot be stored as empty strings and must be converted to None,
+        # https://github.com/ckan/ckanext-xloader/issues/182
+        non_empty_types = ['timestamp', 'numeric']
         for i, records in enumerate(chunky(result, 250)):
             count += len(records)
             logger.info('Saving chunk {number}'.format(number=i))
+            for row in records:
+                for column_index, column_name in enumerate(row):
+                    if headers_dicts[column_index]['type'] in non_empty_types and row[column_name] == '':
+                        row[column_name] = None
             send_resource_to_datastore(resource_id, headers_dicts, records)
         logger.info('...copying done')
 

From b8b99143be32bd0da3e7e125e5f02cff7c8212eb Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 17 Apr 2023 14:54:23 +1000
Subject: [PATCH 005/102] [QOLDEV-347] add tests for edge cases we're fixing

- Column that has some rows with free text and others with numeric data
- Column that has some rows with timestamp and others with empty string
---
 .../samples/mixed_numeric_string_sample.csv   |  3 +++
 .../tests/samples/sample_with_blanks.csv      |  4 ++++
 ckanext/xloader/tests/test_loader.py          | 24 +++++++++++++++++++
 3 files changed, 31 insertions(+)
 create mode 100644 ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv
 create mode 100644 ckanext/xloader/tests/samples/sample_with_blanks.csv

diff --git a/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv
new file mode 100644
index 00000000..9d076602
--- /dev/null
+++ b/ckanext/xloader/tests/samples/mixed_numeric_string_sample.csv
@@ -0,0 +1,3 @@
+Funding agency,Program title,Maximum (indicative) grant amount
+DTIS,Accessible Tourism Infrastructure Grants,Five hundred thousand dollars
+DTIS,Boosting Accessible Tourism Experiences Grants,5000
diff --git a/ckanext/xloader/tests/samples/sample_with_blanks.csv b/ckanext/xloader/tests/samples/sample_with_blanks.csv
new file mode 100644
index 00000000..b53b25db
--- /dev/null
+++ b/ckanext/xloader/tests/samples/sample_with_blanks.csv
@@ -0,0 +1,4 @@
+Funding agency,Program title,Opening date,Service ID
+DTIS,Visitor First Experiences Fund,23/03/2023,63039
+DTIS,First Nations Sport and Recreation Program Round 2,22/03/2023,63040
+,,,63041
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index f31b663b..68452d11 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -612,6 +612,30 @@ def test_german(self, Session):
             u"tsvector",
         ] + [u"text"] * (len(records[0]) - 1)
 
+    def test_with_blanks(self, Session):
+        csv_filepath = get_sample_filepath("sample_with_blanks.csv")
+        resource_id = "test1"
+        factories.Resource(id=resource_id)
+        loader.load_csv(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, "test1")) == 3
+
+    def test_with_mixed_types(self, Session):
+        csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv")
+        resource_id = "test1"
+        factories.Resource(id=resource_id)
+        loader.load_csv(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, "test1")) == 2
+
     def test_reload(self, Session):
         csv_filepath = get_sample_filepath("simple.csv")
         resource_id = "test1"

From b62aa6ccfc2f54008e4bbd240fb031bb130cd1ed Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 17 Apr 2023 16:05:54 +1000
Subject: [PATCH 006/102] [QOLDEV-347] tighten Flake8 rules

- Remove unused imports, or tag those that serve a purpose (testing what can be imported)
- Remove obsolete exclusions from Flake8 config
---
 .flake8                             | 4 ----
 ckanext/xloader/jobs.py             | 2 +-
 ckanext/xloader/loader.py           | 1 -
 ckanext/xloader/parser.py           | 2 --
 ckanext/xloader/plugin.py           | 1 -
 ckanext/xloader/tests/ckan_setup.py | 2 +-
 ckanext/xloader/tests/fixtures.py   | 5 ++---
 7 files changed, 4 insertions(+), 13 deletions(-)

diff --git a/.flake8 b/.flake8
index a4eea9e3..32068ca7 100644
--- a/.flake8
+++ b/.flake8
@@ -17,8 +17,4 @@ max-line-length=127
 
 # List ignore rules one per line.
 ignore =
-    E501
-    C901
     W503
-    F401
-    F403
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 4c4068f9..0d242db1 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -16,7 +16,7 @@
 import sqlalchemy as sa
 
 from ckan import model
-from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config, check_ckan_version
+from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config
 
 from . import loader
 from . import db
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 75bddf51..55c9cab5 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -14,7 +14,6 @@
 from unidecode import unidecode
 
 import ckan.plugins as p
-import ckan.plugins.toolkit as tk
 
 from .job_exceptions import FileCouldNotBeLoadedError, LoaderError
 from .parser import XloaderCSVParser
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index b2a6f889..b52c59a3 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -1,10 +1,8 @@
 # -*- coding: utf-8 -*-
 import csv
-from codecs import iterencode
 from decimal import Decimal, InvalidOperation
 from itertools import chain
 
-import six
 from ckan.plugins.toolkit import asbool
 from dateutil.parser import isoparser, parser
 from dateutil.parser import ParserError
diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index dbde8ed5..159b99de 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -6,7 +6,6 @@
 from ckan.plugins import toolkit
 
 from . import action, auth, helpers as xloader_helpers, utils
-from .loader import fulltext_function_exists, get_write_engine
 
 try:
     config_declarations = toolkit.blanket.config_declarations
diff --git a/ckanext/xloader/tests/ckan_setup.py b/ckanext/xloader/tests/ckan_setup.py
index ae8bfb3e..ff43d74c 100644
--- a/ckanext/xloader/tests/ckan_setup.py
+++ b/ckanext/xloader/tests/ckan_setup.py
@@ -1,5 +1,5 @@
 try:
-    from ckan.tests.pytest_ckan.ckan_setup import *
+    from ckan.tests.pytest_ckan.ckan_setup import *  # noqa
 except ImportError:
     import pkg_resources
     from paste.deploy import loadapp
diff --git a/ckanext/xloader/tests/fixtures.py b/ckanext/xloader/tests/fixtures.py
index f43916ab..9a7ad37f 100644
--- a/ckanext/xloader/tests/fixtures.py
+++ b/ckanext/xloader/tests/fixtures.py
@@ -1,6 +1,5 @@
 # -*- coding: utf-8 -*-
-import sqlalchemy
-import sqlalchemy.orm as orm
+from sqlalchemy import orm
 import os
 
 from ckanext.datastore.tests import helpers as datastore_helpers
@@ -11,7 +10,7 @@
 )
 
 try:
-    from ckan.tests.pytest_ckan.fixtures import *
+    from ckan.tests.pytest_ckan.fixtures import *  # noqa
 except ImportError:
     import pytest
 

From 939ff6bfc9125d3c281b3401cff3d92456c05d44 Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 19 May 2023 11:39:10 +1000
Subject: [PATCH 007/102] [QOLSVC-1863] truncate on-page XLoader logs if there
 are too many

- Show the first 100 and last 100 logs, with a message between to say how many were skipped
---
 ckanext/xloader/templates/xloader/resource_data.html | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index a94ad631..e9786776 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -62,6 +62,8 @@
     <h3>{{ _('Upload Log') }}</h3>
     <ul class="activity">
       {% for item in status.task_info.logs %}
+        {# Truncate very long loops, showing just the start and end #}
+        {% if loop.index <= 100 or loop.revindex <= 100 %}
         {% set icon = 'ok' if item.level == 'INFO' else 'exclamation' %}
         {% set class = ' failure' if icon == 'exclamation' else ' success' %}
         {% set popover_content = 'test' %}
@@ -77,6 +79,12 @@ <h3>{{ _('Upload Log') }}</h3>
             </span>
           </p>
         </li>
+        {% elif loop.index == 101 %}
+          <li class="item no-avatar">
+            <i class="fa icon fa-exclamation"></i>
+            <p>Skipping {{ loop.revindex - 100}} records<br>...</p>
+          </li>
+        {% endif %}
       {% endfor %}
       <li class="item no-avatar">
         <i class="fa icon fa-info"></i>

From 05b2b888a2e6772e019a84f59ee33145dc93e99e Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 19 May 2023 12:19:05 +1000
Subject: [PATCH 008/102] [QOLSVC-1863] make XLoader log truncation
 configurable

- Start with the first and last 50 rows, provide a link to double it
---
 .../xloader/templates/xloader/resource_data.html  | 14 +++++++++++---
 ckanext/xloader/utils.py                          | 15 +++++++++------
 ckanext/xloader/views.py                          | 10 ++++++++--
 3 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index e9786776..e24f79d8 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -61,9 +61,11 @@
   {% if status.status and status.task_info and show_table %}
     <h3>{{ _('Upload Log') }}</h3>
     <ul class="activity">
+      {% set rows = rows or 50 %}
       {% for item in status.task_info.logs %}
         {# Truncate very long loops, showing just the start and end #}
-        {% if loop.index <= 100 or loop.revindex <= 100 %}
+        {% if loop.index <= rows or loop.revindex <= rows
+           or (loop.index == rows + 1 and loop.revindex == rows + 1) %}
         {% set icon = 'ok' if item.level == 'INFO' else 'exclamation' %}
         {% set class = ' failure' if icon == 'exclamation' else ' success' %}
         {% set popover_content = 'test' %}
@@ -79,10 +81,16 @@ <h3>{{ _('Upload Log') }}</h3>
             </span>
           </p>
         </li>
-        {% elif loop.index == 101 %}
+        {% elif loop.index == rows + 1 %}
           <li class="item no-avatar">
             <i class="fa icon fa-exclamation"></i>
-            <p>Skipping {{ loop.revindex - 100}} records<br>...</p>
+            <p>
+              Skipping {{ loop.revindex - rows}} records...
+              <br>
+              <span class="date">
+                <a href="?rows={{rows * 2}}">Show more</a>
+              </span>
+            </p>
           </li>
         {% endif %}
       {% endfor %}
diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py
index 79facbea..bda96fd7 100644
--- a/ckanext/xloader/utils.py
+++ b/ckanext/xloader/utils.py
@@ -11,7 +11,7 @@
 import ckan.plugins as p
 
 
-def resource_data(id, resource_id):
+def resource_data(id, resource_id, rows=None):
 
     if p.toolkit.request.method == "POST":
         try:
@@ -44,13 +44,16 @@ def resource_data(id, resource_id):
     except p.toolkit.NotAuthorized:
         return p.toolkit.abort(403, p.toolkit._("Not authorized to see this page"))
 
+    extra_vars={
+        "status": xloader_status,
+        "resource": resource,
+        "pkg_dict": pkg_dict,
+    }
+    if rows:
+        extra_vars["rows"] = rows
     return p.toolkit.render(
         "xloader/resource_data.html",
-        extra_vars={
-            "status": xloader_status,
-            "resource": resource,
-            "pkg_dict": pkg_dict,
-        },
+        extra_vars=extra_vars,
     )
 
 
diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py
index 198de320..1ca212c8 100644
--- a/ckanext/xloader/views.py
+++ b/ckanext/xloader/views.py
@@ -1,4 +1,4 @@
-from flask import Blueprint
+from flask import Blueprint, request
 
 import ckanext.xloader.utils as utils
 
@@ -12,4 +12,10 @@ def get_blueprints():
 
 @xloader.route("/dataset/<id>/resource_data/<resource_id>", methods=("GET", "POST"))
 def resource_data(id, resource_id):
-    return utils.resource_data(id, resource_id)
+    rows = request.args.get('rows')
+    if rows:
+        try:
+            rows = int(rows)
+        except ValueError:
+            rows = None
+    return utils.resource_data(id, resource_id, rows)

From d937dea83c949d837bf8f18452b34646dcd48e2b Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 19 May 2023 12:58:33 +1000
Subject: [PATCH 009/102] [QOLSVC-1863] adjust XLoader log truncation
 appearance

- Add a notice at the top of the page when logs have been hidden.
- Clarify that we are hiding logs, not skipping actual processing of data.
- Add 'Show All' link to show all logs if the user is confident they can handle it.
---
 .../templates/xloader/resource_data.html      | 20 ++++++++++++++++---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index e24f79d8..d9a22058 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -61,8 +61,22 @@
   {% if status.status and status.task_info and show_table %}
     <h3>{{ _('Upload Log') }}</h3>
     <ul class="activity">
+      {% set items = status.task_info.logs %}
       {% set rows = rows or 50 %}
-      {% for item in status.task_info.logs %}
+      {% set skipped_rows = (items | length) - (rows * 2) %}
+      {% if skipped_rows > 1 %}
+        <li class="item no-avatar">
+          <i class="fa icon fa-exclamation"></i>
+          <p>
+            {{ skipped_rows }} out of {{ items | length }} logs will be hidden.
+            <br>
+            <span class="date">
+              <a href="?rows={{rows * 2}}">Show more</a>&nbsp; &nbsp;<a href="?rows={{ items | length }}">Show all</a>
+            </span>
+          </p>
+        </li>
+      {% endif %}
+      {% for item in items %}
         {# Truncate very long loops, showing just the start and end #}
         {% if loop.index <= rows or loop.revindex <= rows
            or (loop.index == rows + 1 and loop.revindex == rows + 1) %}
@@ -85,10 +99,10 @@ <h3>{{ _('Upload Log') }}</h3>
           <li class="item no-avatar">
             <i class="fa icon fa-exclamation"></i>
             <p>
-              Skipping {{ loop.revindex - rows}} records...
+              Skipping {{ skipped_rows }} logs...
               <br>
               <span class="date">
-                <a href="?rows={{rows * 2}}">Show more</a>
+                <a href="?rows={{rows * 2}}">Show more</a>&nbsp; &nbsp;<a href="?rows={{ items | length }}">Show all</a>
               </span>
             </p>
           </li>

From 288b0ab3d7dfc1e807b30cb61ab862a9024a8fc1 Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 19 May 2023 13:34:21 +1000
Subject: [PATCH 010/102] [QOLSVC-1863] ignore negative numbers of logs to
 display

---
 ckanext/xloader/utils.py | 2 +-
 ckanext/xloader/views.py | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py
index bda96fd7..ec8e4bbd 100644
--- a/ckanext/xloader/utils.py
+++ b/ckanext/xloader/utils.py
@@ -44,7 +44,7 @@ def resource_data(id, resource_id, rows=None):
     except p.toolkit.NotAuthorized:
         return p.toolkit.abort(403, p.toolkit._("Not authorized to see this page"))
 
-    extra_vars={
+    extra_vars = {
         "status": xloader_status,
         "resource": resource,
         "pkg_dict": pkg_dict,
diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py
index 1ca212c8..5a56322c 100644
--- a/ckanext/xloader/views.py
+++ b/ckanext/xloader/views.py
@@ -16,6 +16,8 @@ def resource_data(id, resource_id):
     if rows:
         try:
             rows = int(rows)
+            if rows < 0:
+                rows = None
         except ValueError:
             rows = None
     return utils.resource_data(id, resource_id, rows)

From b351e95c4a38e10971098755667ea59a5804f8f8 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 26 May 2023 12:12:49 +1000
Subject: [PATCH 011/102] [QOLDEV-424] add unit test for parsing CSV file with
 commas inside quotes

---
 .../tests/samples/sample_with_quoted_commas.csv      |  4 ++++
 ckanext/xloader/tests/test_loader.py                 | 12 ++++++++++++
 2 files changed, 16 insertions(+)
 create mode 100644 ckanext/xloader/tests/samples/sample_with_quoted_commas.csv

diff --git a/ckanext/xloader/tests/samples/sample_with_quoted_commas.csv b/ckanext/xloader/tests/samples/sample_with_quoted_commas.csv
new file mode 100644
index 00000000..7fe94e5b
--- /dev/null
+++ b/ckanext/xloader/tests/samples/sample_with_quoted_commas.csv
@@ -0,0 +1,4 @@
+Funding agency,Program title,Opening date,Service ID
+DTIS,"Department of Employment, Small Business and Training",23/03/2023,63039
+DTIS,"Foo, baz, meh",22/03/2023,63040
+,,,63041
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 68452d11..1ab79524 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -624,6 +624,18 @@ def test_with_blanks(self, Session):
         )
         assert len(self._get_records(Session, "test1")) == 3
 
+    def test_with_quoted_commas(self, Session):
+        csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv")
+        resource_id = "test1"
+        factories.Resource(id=resource_id)
+        loader.load_csv(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, "test1")) == 3
+
     def test_with_mixed_types(self, Session):
         csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv")
         resource_id = "test1"

From 83e1b86675ad8c6f22b2252e0568b8a36f66b5d1 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 26 May 2023 12:24:25 +1000
Subject: [PATCH 012/102] [QOLDEV-424] add unit test for parsing CSV file with
 commas inside quotes using tabulator

---
 ckanext/xloader/tests/test_loader.py | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 1ab79524..1b4a2ec5 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -1171,3 +1171,15 @@ def test_no_entries(self):
                 mimetype="csv",
                 logger=logger,
             )
+
+    def test_with_quoted_commas(self, Session):
+        csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv")
+        resource_id = "test1"
+        factories.Resource(id=resource_id)
+        loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, "test1")) == 3

From 9ae1b26e307c03e9b376bec36ef0be735b4c73cc Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 29 May 2023 09:59:15 +1000
Subject: [PATCH 013/102] [QOLDEV-424] add unit test for parsing CSV file with
 a mixture of single and double quotes

---
 .../samples/sample_with_mixed_quotes.csv      | 136 ++++++++++++++++++
 ckanext/xloader/tests/test_loader.py          |  24 ++++
 2 files changed, 160 insertions(+)
 create mode 100644 ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv

diff --git a/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv
new file mode 100644
index 00000000..8408a155
--- /dev/null
+++ b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv
@@ -0,0 +1,136 @@
+Category,Category name,Priority,Initiative name,Investment objectives,Primary digital priority,Initiative stage,Actual start date,Approved end date,Date data current at,Percentage complete,Overall status,Project commencement allocation,Approved expenditure,Actual cost to date,Scope change event,Cost re-evaluation event,Delivery delay event,Project journey and reasons for variance,Learn more (URL)
+DESBT,"Department of Employment, Small Business and Training",High,Business Launchpad Project - Stage 2,"This BLP initiative has a customer-journey based platform for businesses located throughout Queensland. The aim is to assist businesses to better understand their start up and compliance requirements, with a view to streamlining the complex regulatory environment that may delay or impede businesses from starting, growing, and employing. As at 1 July 2022, Business Launchpad Stage 2 has approval to extend beyond the SBRR scope of deliverables to focus on a revised user journey, inclusion of additional industries, and a broader coverage of more than 95% of the Queensland population, to be completed by 30 June 2023.",Collaboration,Delivery,01/07/2022,30/06/2023,31/03/2023,41,G,5633000,5739000,2352000,N,N,N,"As at 31 March 2023
+- Overall 'green' (on track) status
+- Revised user journey following results of BLP UX/UI testing
+- Transition to support progressing with documentation and walk-through of the solution.
+- Ongoing high levels of BLP usage reflecting the success of search engine marketing. BLP focused campaign to further increase awareness and usage is being finalised.
+
+As at 28 February 2023
+- Overall 'green' (on track) status
+- Results of BLP UX/UI testing is guiding development of the revised user journey.
+- BLP transition to BAU support continuing with workshops, showcases and handover documentation.
+- BLP usage is increasing
+
+As at 31 January 2023
+- Continued amber status [closely monitored] with risks under management
+- Search Engine Marketing -'Always On' yielding good results with continued increase in users and the proportion benefitting from BLP
+- Good progress on development of revised BLP user journey.
+
+As at 31 December 2022
+Status AMBER [Closely monitored]
+- Search Engine Marketing commenced 19 December 2022 and already showing increased users and proportion of customers benefitting from BLP
+- External assurance review completed and reported 'green' rating for confidence of delivery.
+
+As at 30 November 2022
+- Continued amber status pending risk management
+- Marketing to commence to increase awareness of platform
+- Good progress on development of revised user journey
+
+As at 31 October 2022
+Status AMBER [Closely monitored]
+- BLP Stage 2 continue reporting amber status reflective of ongoing high-level risks associated with demand-driven labour-market conditions and planned transition to support.
+- Communications and engagement are in progress.
+- The revised user journey continues development and testing. This is planned to be ready for release in the first quarter of 2023. As at 30 September 2022
+Status AMBER [Closely monitored]
+Project journey events:
+- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress.
+- BLP industries expanded to include all industries.
+- Engagement with agencies continues, to heighten BLP awareness and complete validation following recent expansion to encompass all industries.
+
+As at 31 August 2022
+Status GREEN [On track]
+The project is reporting green overall. Ongoing resourcing risk will continue to be monitored and managed for the life of the project, due to a tight labour market.
+Project journey events:
+- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress.
+- Further analysis of June/July 2022 marketing campaign has offered recommendations for consideration, to improve target audience awareness and BLP uptake.
+- BLP industries expanded to include Retail Trade, Accommodation and Non-residential Construction industries finalised.
+- Engagement with agencies continues, to heighten BLP awareness and complete validation following recent expansion with three additional industries.
+
+As at 31 July 2022
+Status AMBER [Closely monitored]
+The project is continuing to report amber overall mainly due to ongoing resourcing challenges.
+Project journey events:
+- A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness, is progressing.
+- Analysis of a major marketing campaign conducted in June/July 2022 showed a significant step-up in number of BLP users.
+- The target of 95% of Queensland population coverage was met in June 2022 with 100% of Queensland population now covered on BLP.
+- Agency engagement for extension industries has commenced.
+
+As at 1 July 2022
+BLP commenced work on expanding industries to include Retail Trade, Accommodation and Non-residential Construction industries.
+
+As at June 2022
+Stage 2 of the project is commencing and will build up the solution delivered in BLP Stage 1. Customer journey will be revised in line with outcome of customer testing. The increased coverage target of at least 95% of the Queensland population was met in June 2022, with all local governments included on BLP. Benefits realisation through marketing and promotion of BLP.",https://www.business.qld.gov.au/starting-business/planning/launchpad
+DESBT,"Department of Employment, Small Business and Training",High,VET Modernisation and Transformation Program - Tranche 1,"The Vocational Education and Training (VET) Modernisation and Transformation (VMT) Program seeks to reduce the risks associated with department legacy systems by delivering contemporary, consolidated, integrated, user-friendly applications to support delivery of VET outcomes. To optimise the technical capabilities of the new solutions, engagement with business teams in the review and development of business processes is a priority. ",Trust,Delivery,01/07/2021,31/08/2023,28/02/2023,52,G,8692200,9614968,4961147,Y,Y,Y,"As at 28 February 2023
+- Tranche 1 VMT projects continue on schedule and on budget for Tranche 1 completion by 31 August 2023.
+- Customer Engagement and Contract Establishment projects continue to progress focusing on delivery activities for new CRM and Portal enhancements.
+- VMT Tranche 2 Business Case tracking for completion April 2023.
+
+As at 31 January 2023
+- VMT Projects continue to track to schedule and on budget for Tranche 1 completion 31 August 2023.
+- Customer Engagement and Contract Establishment Projects progressing well with delivery activities for new CRM and Portal enhancements.
+
+As at 31 December 2022
+Status GREEN
+- VMT projects continuing to track to board endorsed updated schedule and on budget for Tranche 1 completion on 31 August 2023.
+- Customer Engagement and Contract Establishment projects completed partner onboarding and delivery activities underway.
+- Planning in progress for Tranche 2, focusing on remaining legacy systems for planned commencement at completion of Tranch 1.
+
+As at 30 November 2022
+Status GREEN
+- Tranche 1 delivery date extended to 31 August 2023 due to CRM vendor procurement delays and subsequent additional time requirements for build completion and testing of new CRM.
+- All projects maintaining momentum and progressing to revised schedule within budget.
+
+As at 31 October 2022
+Status GREEN
+-New 'Partner Portal' Digital Channel continues to perform well with 3516 registered, active, external users from 634 different organisations.  Update release being planned for January 2023.
+-SkillsCRM (CEP Project) delivery partner on-boarded and formal delivery stage commenced.
+-Contract Establishment and Variation (CEV PRoject) continuing delivery partner select with a view to commencing prior to end of December 2022.
+
+As at 30 September 2022 Status GREEN.
+The VMT 'Partner Portal' solution was successfully launched on the 17 August 2022. The decommissioning of the outdated legacy application, 'DETConnect', has completed.  Work is now increasing on the next VET systems to be replaced, SkillsCRM (via the Customer Engagement Project) and Policy on Line (via the Contract Establishment and Variation Project).
+Project Journey Events:
+- Partner Portal. After the successful launch of Partner Portal and decommissioning of DETConnect, the transition to BAU is underway with the Project team continuing to support business until BAU transition is completed.
+- Data, Infrastructure and Reporting.
+New 'Data Lake' infrastructure built. Data ingestion processes being trialled.  QTS report requirement gathering underway which will showcase new capability once completed. Compliance tool SMCM successfully launched September 30.
+-Customer Engagement Project (CEP). Completed assurance reviews successfully. Delivery partner selection completed. Partner and formal delivery stage due to start 18 October 2022.  Ramp up of activities continuing with business demonstrations of CRM proof of concept.
+-Contract Establishment and Variation (CEV).
+Requirements gathering completed. Delivery partner selection process commenced. 'As is' process documentation underway.
+
+As at 31 August 2022
+Status GREEN. The project remains on track.  Successful launch of new secure 'Partner Portal' Digital Channel for VET related organisations occurred 17 August 2022.
+
+Current Projects underway:
+- Partner Portal. Go-live occurred on track 17 August 2022. All registered VET organisations now able to use the portal to access key applications and send information to DESBT via secure channel.  Enhanced support being provided for 6 weeks.  Legacy system decommissioning underway.
+- Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) continuing and requirement gathering of first report planned to use new capabilites commenced.
+- Customer Services Hub (CRM).  Implementation partner selection complete. Solution delivery activities due to start by end September 2022.
+- Contract Engagement and Variation.  Requirements gathering complete and partner selection process to commence by end September 2022.
+
+As at 31 July 2022
+Status GREEN
+
+Project journey events:
+Implementation of next changes to VMT applications remain on track for August 2022 with full launch of new secure Partner Portal Digital Channel for VET related organisations.
+VMT Program scope adjusted to include additional at risk system decommission activties during this financial year.  Approved expenditure updated to align with revised scope.
+
+Current Projects underway
+- Partner Portal. Opened for registrations 4 July 2022. Majority of VET related organisation now registered. Full access (go-live) on track to commence 17 August 2022. Legacy system to be disabled and decommissioned September 2022.
+- Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) underway with population and work on first report to commence in September.
+- Customer Services Hub (CRM).  Requirements confirmed and partner selection underway. Work on legacy CRM replacement due to start September/October 2022.
+- Contract Engagement and Variation.  Requirements gathering and new process design activities in progress.
+
+15 May 2022 Update
+Status GREEN
+
+Implementation of next changes to VET applications on track for August 2022 with introduction of new secure 'Patner Portal' Digital Channel for VET related organisations.
+
+Projects Completed
+-Database consolidation - key databases transitioned to supported versions and platforms. Completed November 2021.
+-System to System Integration platform.   Completed 9 May 2022.
+
+Current projects underway
+-Partner Portal secure digital channel, in final testing. Pilot successfully complete and on track for release in August 2022.
+Projects in startup
+-Data, Infrastructure and Reporting, planning underway.
+-Customer Services Hub (CRM), planning underway.
+-Contract Engagement and Variation, planning underway.
+-Planning continues for Tranche 2.",https://portal.desbt.qld.gov.au/
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 1b4a2ec5..451c42ae 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -636,6 +636,18 @@ def test_with_quoted_commas(self, Session):
         )
         assert len(self._get_records(Session, "test1")) == 3
 
+    def test_with_mixed_quotes(self, Session):
+        csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv")
+        resource_id = "test1"
+        factories.Resource(id=resource_id)
+        loader.load_csv(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, "test1")) == 2
+
     def test_with_mixed_types(self, Session):
         csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv")
         resource_id = "test1"
@@ -1183,3 +1195,15 @@ def test_with_quoted_commas(self, Session):
             logger=logger,
         )
         assert len(self._get_records(Session, "test1")) == 3
+
+    def test_with_mixed_quotes(self, Session):
+        csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv")
+        resource_id = "test1"
+        factories.Resource(id=resource_id)
+        loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, "test1")) == 2

From 08298013eb4f4b9fbc514e449dd829a8048ef4a0 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 29 May 2023 10:19:16 +1000
Subject: [PATCH 014/102] [QOLDEV-424] reuse sample size constant for both
 loading methods

---
 ckanext/xloader/loader.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 55c9cab5..15783021 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -16,7 +16,7 @@
 import ckan.plugins as p
 
 from .job_exceptions import FileCouldNotBeLoadedError, LoaderError
-from .parser import XloaderCSVParser
+from .parser import CSV_SAMPLE_LINES, XloaderCSVParser
 from .utils import headers_guess, type_guess
 
 from ckan.plugins.toolkit import config
@@ -36,12 +36,12 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     # Determine the header row
     try:
         file_format = os.path.splitext(csv_filepath)[1].strip('.')
-        with Stream(csv_filepath, format=file_format) as stream:
+        with Stream(csv_filepath, format=file_format, sample_size=CSV_SAMPLE_LINES) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
-            with Stream(csv_filepath, format=file_format) as stream:
+            with Stream(csv_filepath, format=file_format, sample_size=CSV_SAMPLE_LINES) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
             raise LoaderError('Tabulator error: {}'.format(e))

From b444c6c147cea8a0f0e4ad721abe52677c415b49 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 29 May 2023 10:19:42 +1000
Subject: [PATCH 015/102] [QOLDEV-424] increase CSV sample size to better match
 Messytables behaviour

---
 ckanext/xloader/loader.py | 2 +-
 ckanext/xloader/parser.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 15783021..7ab76ca5 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -72,7 +72,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     logger.info('Ensuring character coding is UTF8')
     f_write = tempfile.NamedTemporaryFile(suffix=file_format, delete=False)
     try:
-        with Stream(csv_filepath, format=file_format, skip_rows=skip_rows) as stream:
+        with Stream(csv_filepath, format=file_format, skip_rows=skip_rows, sample_size=CSV_SAMPLE_LINES) as stream:
             stream.save(target=f_write.name, format='csv', encoding='utf-8',
                         delimiter=delimiter)
             csv_filepath = f_write.name
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index b52c59a3..82539f4d 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -12,7 +12,7 @@
 
 from ckan.plugins.toolkit import config
 
-CSV_SAMPLE_LINES = 100
+CSV_SAMPLE_LINES = 1000
 
 
 class XloaderCSVParser(Parser):

From 37a2a5428ab6fd886e8bd93bf78743ed9a6e8daa Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 29 May 2023 11:21:32 +1000
Subject: [PATCH 016/102] [QOLDEV-424] set default CSV sample size in config

- This is more efficient than setting it on each call,
and applies even to code that just reads the config without accepting an override.
---
 ckanext/xloader/loader.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 7ab76ca5..2060a9ef 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -10,7 +10,7 @@
 
 import psycopg2
 from six.moves import zip
-from tabulator import Stream, TabulatorException
+from tabulator import config as tabulator_config, Stream, TabulatorException
 from unidecode import unidecode
 
 import ckan.plugins as p
@@ -28,6 +28,7 @@
 _drop_indexes = datastore_db._drop_indexes
 
 MAX_COLUMN_LENGTH = 63
+tabulator_config.CSV_SAMPLE_LINES = CSV_SAMPLE_LINES
 
 
 def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
@@ -36,12 +37,12 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     # Determine the header row
     try:
         file_format = os.path.splitext(csv_filepath)[1].strip('.')
-        with Stream(csv_filepath, format=file_format, sample_size=CSV_SAMPLE_LINES) as stream:
+        with Stream(csv_filepath, format=file_format) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
-            with Stream(csv_filepath, format=file_format, sample_size=CSV_SAMPLE_LINES) as stream:
+            with Stream(csv_filepath, format=file_format) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
             raise LoaderError('Tabulator error: {}'.format(e))
@@ -72,7 +73,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     logger.info('Ensuring character coding is UTF8')
     f_write = tempfile.NamedTemporaryFile(suffix=file_format, delete=False)
     try:
-        with Stream(csv_filepath, format=file_format, skip_rows=skip_rows, sample_size=CSV_SAMPLE_LINES) as stream:
+        with Stream(csv_filepath, format=file_format, skip_rows=skip_rows) as stream:
             stream.save(target=f_write.name, format='csv', encoding='utf-8',
                         delimiter=delimiter)
             csv_filepath = f_write.name

From 51fffade18e76eb6ec41a29ae885f5c00aac16a2 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 29 May 2023 11:56:38 +1000
Subject: [PATCH 017/102] [QOLDEV-424] alter sample mixed-quotes file to use
 generic data

---
 .../samples/sample_with_mixed_quotes.csv      | 122 +++++++++---------
 1 file changed, 61 insertions(+), 61 deletions(-)

diff --git a/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv
index 8408a155..a9527cf7 100644
--- a/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv
+++ b/ckanext/xloader/tests/samples/sample_with_mixed_quotes.csv
@@ -1,136 +1,136 @@
 Category,Category name,Priority,Initiative name,Investment objectives,Primary digital priority,Initiative stage,Actual start date,Approved end date,Date data current at,Percentage complete,Overall status,Project commencement allocation,Approved expenditure,Actual cost to date,Scope change event,Cost re-evaluation event,Delivery delay event,Project journey and reasons for variance,Learn more (URL)
-DESBT,"Department of Employment, Small Business and Training",High,Business Launchpad Project - Stage 2,"This BLP initiative has a customer-journey based platform for businesses located throughout Queensland. The aim is to assist businesses to better understand their start up and compliance requirements, with a view to streamlining the complex regulatory environment that may delay or impede businesses from starting, growing, and employing. As at 1 July 2022, Business Launchpad Stage 2 has approval to extend beyond the SBRR scope of deliverables to focus on a revised user journey, inclusion of additional industries, and a broader coverage of more than 95% of the Queensland population, to be completed by 30 June 2023.",Collaboration,Delivery,01/07/2022,30/06/2023,31/03/2023,41,G,5633000,5739000,2352000,N,N,N,"As at 31 March 2023
+DDSSHHESW,"Department of Defence, Social Security, Health, Housing, Education, and Silly Walks",High,Silly Walks project - Stage 2,"Lorum ipsum.",Collaboration,Delivery,01/07/1970,30/06/1971,31/03/1971,41,G,5633000,5739000,2352000,N,N,N,"As at 31 March 1971
 - Overall 'green' (on track) status
-- Revised user journey following results of BLP UX/UI testing
+- Revised user journey following results of Silly Walk UX/UI testing
 - Transition to support progressing with documentation and walk-through of the solution.
-- Ongoing high levels of BLP usage reflecting the success of search engine marketing. BLP focused campaign to further increase awareness and usage is being finalised.
+- Ongoing high levels of silly walk usage reflecting the success of search engine marketing. Silly walk focused campaign to further increase awareness and usage is being finalised.
 
-As at 28 February 2023
+As at 28 February 1971
 - Overall 'green' (on track) status
-- Results of BLP UX/UI testing is guiding development of the revised user journey.
-- BLP transition to BAU support continuing with workshops, showcases and handover documentation.
-- BLP usage is increasing
+- Results of Silly Walk UX/UI testing is guiding development of the revised user journey.
+- Silly Walk transition to BAU support continuing with workshops, showcases and handover documentation.
+- Silly Walk usage is increasing
 
-As at 31 January 2023
+As at 31 January 1971
 - Continued amber status [closely monitored] with risks under management
-- Search Engine Marketing -'Always On' yielding good results with continued increase in users and the proportion benefitting from BLP
-- Good progress on development of revised BLP user journey.
+- Search Engine Marketing -'Always On' yielding good results with continued increase in users and the proportion benefitting from Silly Walk
+- Good progress on development of revised Silly Walk user journey.
 
-As at 31 December 2022
+As at 31 December 1970
 Status AMBER [Closely monitored]
-- Search Engine Marketing commenced 19 December 2022 and already showing increased users and proportion of customers benefitting from BLP
+- Search Engine Marketing commenced 19 December 1970 and already showing increased users and proportion of customers benefitting from Silly Walk
 - External assurance review completed and reported 'green' rating for confidence of delivery.
 
-As at 30 November 2022
+As at 30 November 1970
 - Continued amber status pending risk management
 - Marketing to commence to increase awareness of platform
 - Good progress on development of revised user journey
 
-As at 31 October 2022
+As at 31 October 1970
 Status AMBER [Closely monitored]
-- BLP Stage 2 continue reporting amber status reflective of ongoing high-level risks associated with demand-driven labour-market conditions and planned transition to support.
+- Silly Walk Stage 2 continue reporting amber status reflective of ongoing high-level risks associated with demand-driven labour-market conditions and planned transition to support.
 - Communications and engagement are in progress.
-- The revised user journey continues development and testing. This is planned to be ready for release in the first quarter of 2023. As at 30 September 2022
+- The revised user journey continues development and testing. This is planned to be ready for release in the first quarter of 1971. As at 30 September 1970
 Status AMBER [Closely monitored]
 Project journey events:
 - A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress.
-- BLP industries expanded to include all industries.
-- Engagement with agencies continues, to heighten BLP awareness and complete validation following recent expansion to encompass all industries.
+- Silly Walk industries expanded to include all industries.
+- Engagement with agencies continues, to heighten Silly Walk awareness and complete validation following recent expansion to encompass all industries.
 
-As at 31 August 2022
+As at 31 August 1970
 Status GREEN [On track]
 The project is reporting green overall. Ongoing resourcing risk will continue to be monitored and managed for the life of the project, due to a tight labour market.
 Project journey events:
 - A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness continues to progress.
-- Further analysis of June/July 2022 marketing campaign has offered recommendations for consideration, to improve target audience awareness and BLP uptake.
-- BLP industries expanded to include Retail Trade, Accommodation and Non-residential Construction industries finalised.
-- Engagement with agencies continues, to heighten BLP awareness and complete validation following recent expansion with three additional industries.
+- Further analysis of June/July 1970 marketing campaign has offered recommendations for consideration, to improve target audience awareness and Silly Walk uptake.
+- Silly Walk industries expanded to include Retail Trade, Accommodation and Non-residential Construction industries finalised.
+- Engagement with agencies continues, to heighten Silly Walk awareness and complete validation following recent expansion with three additional industries.
 
-As at 31 July 2022
+As at 31 July 1970
 Status AMBER [Closely monitored]
 The project is continuing to report amber overall mainly due to ongoing resourcing challenges.
 Project journey events:
 - A revised customer journey in line with outcomes of customer testing and retesting to validate solution usefulness, is progressing.
-- Analysis of a major marketing campaign conducted in June/July 2022 showed a significant step-up in number of BLP users.
-- The target of 95% of Queensland population coverage was met in June 2022 with 100% of Queensland population now covered on BLP.
+- Analysis of a major marketing campaign conducted in June/July 1970 showed a significant step-up in number of Silly Walk users.
+- The target of 95% of Circus population coverage was met in June 1970 with 100% of Circus population now covered on Silly Walk.
 - Agency engagement for extension industries has commenced.
 
-As at 1 July 2022
-BLP commenced work on expanding industries to include Retail Trade, Accommodation and Non-residential Construction industries.
+As at 1 July 1970
+Silly Walk commenced work on expanding industries to include Retail Trade, Accommodation and Non-residential Construction industries.
 
-As at June 2022
-Stage 2 of the project is commencing and will build up the solution delivered in BLP Stage 1. Customer journey will be revised in line with outcome of customer testing. The increased coverage target of at least 95% of the Queensland population was met in June 2022, with all local governments included on BLP. Benefits realisation through marketing and promotion of BLP.",https://www.business.qld.gov.au/starting-business/planning/launchpad
-DESBT,"Department of Employment, Small Business and Training",High,VET Modernisation and Transformation Program - Tranche 1,"The Vocational Education and Training (VET) Modernisation and Transformation (VMT) Program seeks to reduce the risks associated with department legacy systems by delivering contemporary, consolidated, integrated, user-friendly applications to support delivery of VET outcomes. To optimise the technical capabilities of the new solutions, engagement with business teams in the review and development of business processes is a priority. ",Trust,Delivery,01/07/2021,31/08/2023,28/02/2023,52,G,8692200,9614968,4961147,Y,Y,Y,"As at 28 February 2023
-- Tranche 1 VMT projects continue on schedule and on budget for Tranche 1 completion by 31 August 2023.
+As at June 1970
+Stage 2 of the project is commencing and will build up the solution delivered in Silly Walk Stage 1. Customer journey will be revised in line with outcome of customer testing. The increased coverage target of at least 95% of the Circus population was met in June 1970, with all local governments included on Silly Walk. Benefits realisation through marketing and promotion of Silly Walk.",https://example.com
+DDSSHHESW,"Department of Defence, Social Security, Health, Housing, Education, and Silly Walks",High,Flying Circus Modernisation and Transformation Program - Tranche 1,"The Flying Circus Modernisation and Transformation (FCMT) Program seeks to reduce the risks associated with department legacy systems by delivering contemporary, consolidated, integrated, user-friendly applications to support delivery of Flying Circus outcomes. To optimise the technical capabilities of the new solutions, engagement with business teams in the review and development of business processes is a priority. ",Trust,Delivery,01/07/1969,31/08/1971,28/02/1971,52,G,8692200,9614968,4961147,Y,Y,Y,"As at 28 February 1971
+- Tranche 1 FCMT projects continue on schedule and on budget for Tranche 1 completion by 31 August 1971.
 - Customer Engagement and Contract Establishment projects continue to progress focusing on delivery activities for new CRM and Portal enhancements.
-- VMT Tranche 2 Business Case tracking for completion April 2023.
+- FCMT Tranche 2 Business Case tracking for completion April 1971.
 
-As at 31 January 2023
-- VMT Projects continue to track to schedule and on budget for Tranche 1 completion 31 August 2023.
+As at 31 January 1971
+- FCMT Projects continue to track to schedule and on budget for Tranche 1 completion 31 August 1971.
 - Customer Engagement and Contract Establishment Projects progressing well with delivery activities for new CRM and Portal enhancements.
 
-As at 31 December 2022
+As at 31 December 1970
 Status GREEN
-- VMT projects continuing to track to board endorsed updated schedule and on budget for Tranche 1 completion on 31 August 2023.
+- FCMT projects continuing to track to board endorsed updated schedule and on budget for Tranche 1 completion on 31 August 1971.
 - Customer Engagement and Contract Establishment projects completed partner onboarding and delivery activities underway.
 - Planning in progress for Tranche 2, focusing on remaining legacy systems for planned commencement at completion of Tranch 1.
 
-As at 30 November 2022
+As at 30 November 1970
 Status GREEN
-- Tranche 1 delivery date extended to 31 August 2023 due to CRM vendor procurement delays and subsequent additional time requirements for build completion and testing of new CRM.
+- Tranche 1 delivery date extended to 31 August 1971 due to CRM vendor procurement delays and subsequent additional time requirements for build completion and testing of new CRM.
 - All projects maintaining momentum and progressing to revised schedule within budget.
 
-As at 31 October 2022
+As at 31 October 1970
 Status GREEN
--New 'Partner Portal' Digital Channel continues to perform well with 3516 registered, active, external users from 634 different organisations.  Update release being planned for January 2023.
+-New 'Partner Portal' Digital Channel continues to perform well with 3516 registered, active, external users from 634 different organisations.  Update release being planned for January 1971.
 -SkillsCRM (CEP Project) delivery partner on-boarded and formal delivery stage commenced.
--Contract Establishment and Variation (CEV PRoject) continuing delivery partner select with a view to commencing prior to end of December 2022.
+-Contract Establishment and Variation (CEV PRoject) continuing delivery partner select with a view to commencing prior to end of December 1970.
 
-As at 30 September 2022 Status GREEN.
-The VMT 'Partner Portal' solution was successfully launched on the 17 August 2022. The decommissioning of the outdated legacy application, 'DETConnect', has completed.  Work is now increasing on the next VET systems to be replaced, SkillsCRM (via the Customer Engagement Project) and Policy on Line (via the Contract Establishment and Variation Project).
+As at 30 September 1970 Status GREEN.
+The FCMT 'Partner Portal' solution was successfully launched on the 17 August 1970. The decommissioning of the outdated legacy application, 'WalkConnect', has completed.  Work is now increasing on the next Flying Circus systems to be replaced, SkillsCRM (via the Customer Engagement Project) and Policy on Line (via the Contract Establishment and Variation Project).
 Project Journey Events:
-- Partner Portal. After the successful launch of Partner Portal and decommissioning of DETConnect, the transition to BAU is underway with the Project team continuing to support business until BAU transition is completed.
+- Partner Portal. After the successful launch of Partner Portal and decommissioning of WalkConnect, the transition to BAU is underway with the Project team continuing to support business until BAU transition is completed.
 - Data, Infrastructure and Reporting.
 New 'Data Lake' infrastructure built. Data ingestion processes being trialled.  QTS report requirement gathering underway which will showcase new capability once completed. Compliance tool SMCM successfully launched September 30.
--Customer Engagement Project (CEP). Completed assurance reviews successfully. Delivery partner selection completed. Partner and formal delivery stage due to start 18 October 2022.  Ramp up of activities continuing with business demonstrations of CRM proof of concept.
+-Customer Engagement Project (CEP). Completed assurance reviews successfully. Delivery partner selection completed. Partner and formal delivery stage due to start 18 October 1970.  Ramp up of activities continuing with business demonstrations of CRM proof of concept.
 -Contract Establishment and Variation (CEV).
 Requirements gathering completed. Delivery partner selection process commenced. 'As is' process documentation underway.
 
-As at 31 August 2022
-Status GREEN. The project remains on track.  Successful launch of new secure 'Partner Portal' Digital Channel for VET related organisations occurred 17 August 2022.
+As at 31 August 1970
+Status GREEN. The project remains on track.  Successful launch of new secure 'Partner Portal' Digital Channel for Flying Circus related organisations occurred 17 August 1970.
 
 Current Projects underway:
-- Partner Portal. Go-live occurred on track 17 August 2022. All registered VET organisations now able to use the portal to access key applications and send information to DESBT via secure channel.  Enhanced support being provided for 6 weeks.  Legacy system decommissioning underway.
+- Partner Portal. Go-live occurred on track 17 August 1970. All registered Flying Circus organisations now able to use the portal to access key applications and send information to DDSSHHESW via secure channel.  Enhanced support being provided for 6 weeks.  Legacy system decommissioning underway.
 - Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) continuing and requirement gathering of first report planned to use new capabilites commenced.
-- Customer Services Hub (CRM).  Implementation partner selection complete. Solution delivery activities due to start by end September 2022.
-- Contract Engagement and Variation.  Requirements gathering complete and partner selection process to commence by end September 2022.
+- Customer Services Hub (CRM).  Implementation partner selection complete. Solution delivery activities due to start by end September 1970.
+- Contract Engagement and Variation.  Requirements gathering complete and partner selection process to commence by end September 1970.
 
-As at 31 July 2022
+As at 31 July 1970
 Status GREEN
 
 Project journey events:
-Implementation of next changes to VMT applications remain on track for August 2022 with full launch of new secure Partner Portal Digital Channel for VET related organisations.
-VMT Program scope adjusted to include additional at risk system decommission activties during this financial year.  Approved expenditure updated to align with revised scope.
+Implementation of next changes to FCMT applications remain on track for August 1970 with full launch of new secure Partner Portal Digital Channel for Flying Circus related organisations.
+FCMT Program scope adjusted to include additional at risk system decommission activties during this financial year.  Approved expenditure updated to align with revised scope.
 
 Current Projects underway
-- Partner Portal. Opened for registrations 4 July 2022. Majority of VET related organisation now registered. Full access (go-live) on track to commence 17 August 2022. Legacy system to be disabled and decommissioned September 2022.
+- Partner Portal. Opened for registrations 4 July 1970. Majority of Flying Circus related organisation now registered. Full access (go-live) on track to commence 17 August 1970. Legacy system to be disabled and decommissioned September 1970.
 - Data, Infrastructure and Reporting. Build of initial Data Lake (centralised, quality, information source) underway with population and work on first report to commence in September.
-- Customer Services Hub (CRM).  Requirements confirmed and partner selection underway. Work on legacy CRM replacement due to start September/October 2022.
+- Customer Services Hub (CRM).  Requirements confirmed and partner selection underway. Work on legacy CRM replacement due to start September/October 1970.
 - Contract Engagement and Variation.  Requirements gathering and new process design activities in progress.
 
-15 May 2022 Update
+15 May 1970 Update
 Status GREEN
 
-Implementation of next changes to VET applications on track for August 2022 with introduction of new secure 'Patner Portal' Digital Channel for VET related organisations.
+Implementation of next changes to Flying Circus applications on track for August 1970 with introduction of new secure 'Silly Portal' Digital Channel for Flying Circus related organisations.
 
 Projects Completed
--Database consolidation - key databases transitioned to supported versions and platforms. Completed November 2021.
--System to System Integration platform.   Completed 9 May 2022.
+-Database consolidation - key databases transitioned to supported versions and platforms. Completed November 1969.
+-System to System Integration platform.   Completed 9 May 1970.
 
 Current projects underway
--Partner Portal secure digital channel, in final testing. Pilot successfully complete and on track for release in August 2022.
+-Partner Portal secure digital channel, in final testing. Pilot successfully complete and on track for release in August 1970.
 Projects in startup
 -Data, Infrastructure and Reporting, planning underway.
 -Customer Services Hub (CRM), planning underway.
 -Contract Engagement and Variation, planning underway.
--Planning continues for Tranche 2.",https://portal.desbt.qld.gov.au/
+-Planning continues for Tranche 2.",https://example.com

From 4656f063f833cd0b771223b3860ad8e37791d076 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 28 Jul 2023 14:48:09 +1000
Subject: [PATCH 018/102] [QOLDEV-424] fix tests to avoid hardcoding resource
 IDs

- Our test IDs don't have the right format, and we should avoid hardcoding IDs anyway
---
 ckanext/xloader/helpers.py           |   4 +-
 ckanext/xloader/tests/test_loader.py | 192 +++++++++++++--------------
 2 files changed, 98 insertions(+), 98 deletions(-)

diff --git a/ckanext/xloader/helpers.py b/ckanext/xloader/helpers.py
index 3c071028..829b7b74 100644
--- a/ckanext/xloader/helpers.py
+++ b/ckanext/xloader/helpers.py
@@ -28,11 +28,11 @@ def xloader_status_description(status):
         return _('Not Uploaded Yet')
 
 
-def is_resource_supported_by_xloader(res_dict, check_access = True):
+def is_resource_supported_by_xloader(res_dict, check_access=True):
     is_supported_format = XLoaderFormats.is_it_an_xloader_format(res_dict.get('format'))
     is_datastore_active = res_dict.get('datastore_active', False)
     if check_access:
-        user_has_access = toolkit.h.check_access('package_update', {'id':res_dict.get('package_id')})
+        user_has_access = toolkit.h.check_access('package_update', {'id': res_dict.get('package_id')})
     else:
         user_has_access = True
     try:
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 451c42ae..8cc69a06 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -85,8 +85,8 @@ def _get_column_types(self, Session, table_name):
 class TestLoadCsv(TestLoadBase):
     def test_simple(self, Session):
         csv_filepath = get_sample_filepath("simple.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
@@ -95,7 +95,7 @@ def test_simple(self, Session):
         )
 
         assert self._get_records(
-            Session, "test1", limit=1, exclude_full_text_column=False
+            Session, resource_id, limit=1, exclude_full_text_column=False
         ) == [
             (
                 1,
@@ -105,7 +105,7 @@ def test_simple(self, Session):
                 u"Galway",
             )
         ]
-        assert self._get_records(Session, "test1") == [
+        assert self._get_records(Session, resource_id) == [
             (1, u"2011-01-01", u"1", u"Galway"),
             (2, u"2011-01-02", u"-1", u"Galway"),
             (3, u"2011-01-03", u"0", u"Galway"),
@@ -113,14 +113,14 @@ def test_simple(self, Session):
             (5, None, None, u"Berkeley"),
             (6, u"2011-01-03", u"5", None),
         ]
-        assert self._get_column_names(Session, "test1") == [
+        assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
             u"date",
             u"temperature",
             u"place",
         ]
-        assert self._get_column_types(Session, "test1") == [
+        assert self._get_column_types(Session, resource_id) == [
             u"int4",
             u"tsvector",
             u"text",
@@ -130,8 +130,8 @@ def test_simple(self, Session):
 
     def test_simple_with_indexing(self, Session):
         csv_filepath = get_sample_filepath("simple.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         fields = loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
@@ -144,7 +144,7 @@ def test_simple_with_indexing(self, Session):
 
         assert (
             self._get_records(
-                Session, "test1", limit=1, exclude_full_text_column=False
+                Session, resource_id, limit=1, exclude_full_text_column=False
             )[0][1]
             == "'-01':2,3 '1':4 '2011':1 'galway':5"
         )
@@ -155,8 +155,8 @@ def test_boston_311_complete(self):
         # to get the test file:
         # curl -o ckanext/xloader/tests/samples/boston_311.csv https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/2968e2c0-d479-49ba-a884-4ef523ada3c0/download/311.csv  # noqa
         csv_filepath = get_sample_filepath("boston_311.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         import time
 
         t0 = time.time()
@@ -179,8 +179,8 @@ def test_boston_311_sample5(self):
         # to create the test file:
         # head -n 100001 ckanext/xloader/tests/samples/boston_311.csv > ckanext/xloader/tests/samples/boston_311_sample5.csv
         csv_filepath = get_sample_filepath("boston_311_sample5.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         import time
 
         t0 = time.time()
@@ -199,8 +199,8 @@ def test_boston_311_sample5(self):
 
     def test_boston_311(self, Session):
         csv_filepath = get_sample_filepath("boston_311_sample.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
@@ -208,7 +208,7 @@ def test_boston_311(self, Session):
             logger=logger,
         )
 
-        records = self._get_records(Session, "test1")
+        records = self._get_records(Session, resource_id)
         print(records)
         assert records == [
             (
@@ -308,8 +308,8 @@ def test_boston_311(self, Session):
                 u"Citizens Connect App",
             ),
         ]  # noqa
-        print(self._get_column_names(Session, "test1"))
-        assert self._get_column_names(Session, "test1") == [
+        print(self._get_column_names(Session, resource_id))
+        assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
             u"CASE_ENQUIRY_ID",
@@ -342,16 +342,16 @@ def test_boston_311(self, Session):
             u"Longitude",
             u"Source",
         ]  # noqa
-        print(self._get_column_types(Session, "test1"))
-        assert self._get_column_types(Session, "test1") == [
+        print(self._get_column_types(Session, resource_id))
+        assert self._get_column_types(Session, resource_id) == [
             u"int4",
             u"tsvector",
         ] + [u"text"] * (len(records[0]) - 1)
 
     def test_brazilian(self, Session):
         csv_filepath = get_sample_filepath("brazilian_sample.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
@@ -359,7 +359,7 @@ def test_brazilian(self, Session):
             logger=logger,
         )
 
-        records = self._get_records(Session, "test1")
+        records = self._get_records(Session, resource_id)
         print(records)
         assert records[0] == (
             1,
@@ -459,8 +459,8 @@ def test_brazilian(self, Session):
             None,
             None,
         )  # noqa
-        print(self._get_column_names(Session, "test1"))
-        assert self._get_column_names(Session, "test1") == [
+        print(self._get_column_names(Session, resource_id))
+        assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
             u"NU_ANO_CENSO",
@@ -559,16 +559,16 @@ def test_brazilian(self, Session):
             u"PROVA_MEAN_MAT_I_MUN",
             u"PROVA_MEAN_MAT_T_MUN",
         ]  # noqa
-        print(self._get_column_types(Session, "test1"))
-        assert self._get_column_types(Session, "test1") == [
+        print(self._get_column_types(Session, resource_id))
+        assert self._get_column_types(Session, resource_id) == [
             u"int4",
             u"tsvector",
         ] + [u"text"] * (len(records[0]) - 1)
 
     def test_german(self, Session):
         csv_filepath = get_sample_filepath("german_sample.csv")
-        resource_id = "test_german"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
@@ -576,7 +576,7 @@ def test_german(self, Session):
             logger=logger,
         )
 
-        records = self._get_records(Session, "test_german")
+        records = self._get_records(Session, resource_id)
         print(records)
         assert records[0] == (
             1,
@@ -591,8 +591,8 @@ def test_german(self, Session):
             u"24221",
             u"672",
         )
-        print(self._get_column_names(Session, "test_german"))
-        assert self._get_column_names(Session, "test_german") == [
+        print(self._get_column_names(Session, resource_id))
+        assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
             u"Stadtname",
@@ -606,64 +606,64 @@ def test_german(self, Session):
             u"Schuler_Berufsausbildung_2010/2011",
             u"Schuler_andere allgemeinbildende Schulen_2010/2011",
         ]
-        print(self._get_column_types(Session, "test_german"))
-        assert self._get_column_types(Session, "test_german") == [
+        print(self._get_column_types(Session, resource_id))
+        assert self._get_column_types(Session, resource_id) == [
             u"int4",
             u"tsvector",
         ] + [u"text"] * (len(records[0]) - 1)
 
     def test_with_blanks(self, Session):
         csv_filepath = get_sample_filepath("sample_with_blanks.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
             mimetype="text/csv",
             logger=logger,
         )
-        assert len(self._get_records(Session, "test1")) == 3
+        assert len(self._get_records(Session, resource_id)) == 3
 
     def test_with_quoted_commas(self, Session):
         csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
             mimetype="text/csv",
             logger=logger,
         )
-        assert len(self._get_records(Session, "test1")) == 3
+        assert len(self._get_records(Session, resource_id)) == 3
 
     def test_with_mixed_quotes(self, Session):
         csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
             mimetype="text/csv",
             logger=logger,
         )
-        assert len(self._get_records(Session, "test1")) == 2
+        assert len(self._get_records(Session, resource_id)) == 2
 
     def test_with_mixed_types(self, Session):
         csv_filepath = get_sample_filepath("mixed_numeric_string_sample.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
             mimetype="text/csv",
             logger=logger,
         )
-        assert len(self._get_records(Session, "test1")) == 2
+        assert len(self._get_records(Session, resource_id)) == 2
 
     def test_reload(self, Session):
         csv_filepath = get_sample_filepath("simple.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
@@ -679,15 +679,15 @@ def test_reload(self, Session):
             logger=logger,
         )
 
-        assert len(self._get_records(Session, "test1")) == 6
-        assert self._get_column_names(Session, "test1") == [
+        assert len(self._get_records(Session, resource_id)) == 6
+        assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
             u"date",
             u"temperature",
             u"place",
         ]
-        assert self._get_column_types(Session, "test1") == [
+        assert self._get_column_types(Session, resource_id) == [
             u"int4",
             u"tsvector",
             u"text",
@@ -701,8 +701,8 @@ def test_reload(self, Session):
     )
     def test_reload_with_overridden_types(self, Session):
         csv_filepath = get_sample_filepath("simple.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
@@ -732,15 +732,15 @@ def test_reload_with_overridden_types(self, Session):
             fields=fields, resource_id=resource_id, logger=logger
         )
 
-        assert len(self._get_records(Session, "test1")) == 6
-        assert self._get_column_names(Session, "test1") == [
+        assert len(self._get_records(Session, resource_id)) == 6
+        assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
             u"date",
             u"temperature",
             u"place",
         ]
-        assert self._get_column_types(Session, "test1") == [
+        assert self._get_column_types(Session, resource_id) == [
             u"int4",
             u"tsvector",
             u"timestamp",
@@ -750,7 +750,7 @@ def test_reload_with_overridden_types(self, Session):
 
         # check that rows with nulls are indexed correctly
         records = self._get_records(
-            Session, "test1", exclude_full_text_column=False
+            Session, resource_id, exclude_full_text_column=False
         )
         print(records)
         assert records[4][1] == "'berkeley':1"
@@ -775,8 +775,8 @@ def test_encode_headers(self):
 
     def test_column_names(self, Session):
         csv_filepath = get_sample_filepath("column_names.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             csv_filepath,
             resource_id=resource_id,
@@ -784,12 +784,12 @@ def test_column_names(self, Session):
             logger=logger,
         )
 
-        assert self._get_column_names(Session, "test1")[2:] == [
+        assert self._get_column_names(Session, resource_id)[2:] == [
             u"d@t$e",
             u"t^e&m*pe!r(a)t?u:r%%e",
             r"p\l/a[c{e%",
         ]
-        assert self._get_records(Session, "test1")[0] == (
+        assert self._get_records(Session, resource_id)[0] == (
             1,
             u"2011-01-01",
             u"1",
@@ -800,8 +800,8 @@ def test_column_names(self, Session):
 class TestLoadUnhandledTypes(TestLoadBase):
     def test_kml(self):
         filepath = get_sample_filepath("polling_locations.kml")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         with pytest.raises(LoaderError) as exception:
             loader.load_csv(
                 filepath,
@@ -817,8 +817,8 @@ def test_kml(self):
 
     def test_geojson(self):
         filepath = get_sample_filepath("polling_locations.geojson")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         with pytest.raises(LoaderError) as exception:
             loader.load_csv(
                 filepath,
@@ -839,8 +839,8 @@ def test_geojson(self):
     )
     def test_shapefile_zip_python2(self):
         filepath = get_sample_filepath("polling_locations.shapefile.zip")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         with pytest.raises(LoaderError):
             loader.load_csv(
                 filepath,
@@ -859,8 +859,8 @@ def test_shapefile_zip_python3(self, Session):
         # finds, 'Polling_Locations.cpg'. This file only contains the
         # following data: `UTF-8`.
         filepath = get_sample_filepath("polling_locations.shapefile.zip")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_csv(
             filepath,
             resource_id=resource_id,
@@ -868,8 +868,8 @@ def test_shapefile_zip_python3(self, Session):
             logger=logger,
         )
 
-        assert self._get_records(Session, "test1") == []
-        assert self._get_column_names(Session, "test1") == [
+        assert self._get_records(Session, resource_id) == []
+        assert self._get_column_names(Session, resource_id) == [
             '_id',
             '_full_text',
             'UTF-8'
@@ -879,8 +879,8 @@ def test_shapefile_zip_python3(self, Session):
 class TestLoadTabulator(TestLoadBase):
     def test_simple(self, Session):
         csv_filepath = get_sample_filepath("simple.xls")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_table(
             csv_filepath,
             resource_id=resource_id,
@@ -891,7 +891,7 @@ def test_simple(self, Session):
         assert (
             "'galway':"
             in self._get_records(
-                Session, "test1", limit=1, exclude_full_text_column=False
+                Session, resource_id, limit=1, exclude_full_text_column=False
             )[0][1]
         )
         # Indexed record looks like this (depending on CKAN version?):
@@ -899,7 +899,7 @@ def test_simple(self, Session):
         #   "'-01':4,5 '00':6,7,8 '1':1 '2011':3 'galway':2"
         #   "'-01':2,3 '00':5,6 '1':7 '2011':1 'galway':8 't00':4"
 
-        assert self._get_records(Session, "test1") == [
+        assert self._get_records(Session, resource_id) == [
             (1, datetime.datetime(2011, 1, 1, 0, 0), Decimal("1"), u"Galway",),
             (
                 2,
@@ -927,14 +927,14 @@ def test_simple(self, Session):
                 u"Berkeley",
             ),
         ]
-        assert self._get_column_names(Session, "test1") == [
+        assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
             u"date",
             u"temperature",
             u"place",
         ]
-        assert self._get_column_types(Session, "test1") == [
+        assert self._get_column_types(Session, resource_id) == [
             u"int4",
             u"tsvector",
             u"timestamp",
@@ -948,8 +948,8 @@ def test_boston_311_complete(self):
         # to get the test file:
         # curl -o ckanext/xloader/tests/samples/boston_311.csv https://data.boston.gov/dataset/8048697b-ad64-4bfc-b090-ee00169f2323/resource/2968e2c0-d479-49ba-a884-4ef523ada3c0/download/311.csv  # noqa
         csv_filepath = get_sample_filepath("boston_311.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         import time
 
         t0 = time.time()
@@ -972,8 +972,8 @@ def test_boston_311_sample5(self):
         # to create the test file:
         # head -n 100001 ckanext/xloader/tests/samples/boston_311.csv > ckanext/xloader/tests/samples/boston_311_sample5.csv
         csv_filepath = get_sample_filepath("boston_311_sample5.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         import time
 
         t0 = time.time()
@@ -992,8 +992,8 @@ def test_boston_311_sample5(self):
 
     def test_boston_311(self, Session):
         csv_filepath = get_sample_filepath("boston_311_sample.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_table(
             csv_filepath,
             resource_id=resource_id,
@@ -1001,7 +1001,7 @@ def test_boston_311(self, Session):
             logger=logger,
         )
 
-        records = self._get_records(Session, "test1")
+        records = self._get_records(Session, resource_id)
         print(records)
         assert records == [
             (
@@ -1101,8 +1101,8 @@ def test_boston_311(self, Session):
                 u"Citizens Connect App",
             ),
         ]  # noqa
-        print(self._get_column_names(Session, "test1"))
-        assert self._get_column_names(Session, "test1") == [
+        print(self._get_column_names(Session, resource_id))
+        assert self._get_column_names(Session, resource_id) == [
             u"_id",
             u"_full_text",
             u"CASE_ENQUIRY_ID",
@@ -1135,8 +1135,8 @@ def test_boston_311(self, Session):
             u"Longitude",
             u"Source",
         ]  # noqa
-        print(self._get_column_types(Session, "test1"))
-        assert self._get_column_types(Session, "test1") == [
+        print(self._get_column_types(Session, resource_id))
+        assert self._get_column_types(Session, resource_id) == [
             u"int4",
             u"tsvector",
             u"numeric",
@@ -1174,8 +1174,8 @@ def test_no_entries(self):
         csv_filepath = get_sample_filepath("no_entries.csv")
         # no datastore table is created - we need to except, or else
         # datastore_active will be set on a non-existent datastore table
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         with pytest.raises(LoaderError):
             loader.load_table(
                 csv_filepath,
@@ -1186,24 +1186,24 @@ def test_no_entries(self):
 
     def test_with_quoted_commas(self, Session):
         csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_table(
             csv_filepath,
             resource_id=resource_id,
             mimetype="text/csv",
             logger=logger,
         )
-        assert len(self._get_records(Session, "test1")) == 3
+        assert len(self._get_records(Session, resource_id)) == 3
 
     def test_with_mixed_quotes(self, Session):
         csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv")
-        resource_id = "test1"
-        factories.Resource(id=resource_id)
+        resource = factories.Resource()
+        resource_id = resource['id']
         loader.load_table(
             csv_filepath,
             resource_id=resource_id,
             mimetype="text/csv",
             logger=logger,
         )
-        assert len(self._get_records(Session, "test1")) == 2
+        assert len(self._get_records(Session, resource_id)) == 2

From b23b22c9426f0a7436cdad8bcb5b745bea4230ca Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 28 Jul 2023 15:44:31 +1000
Subject: [PATCH 019/102] [QOLDEV-424] ensure consistent column name ordering
 in tests

- This is intermittently breaking the boston_311 test, when columns load in an unexpected order
---
 ckanext/xloader/tests/test_loader.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 8cc69a06..d55ec949 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -64,8 +64,12 @@ def _get_column_names(self, Session, table_name):
         # SELECT column_name FROM information_schema.columns WHERE table_name='test1';
         c = Session.connection()
         sql = (
-            "SELECT column_name FROM information_schema.columns "
-            "WHERE table_name='{}';".format(table_name)
+            """
+            SELECT column_name
+            FROM information_schema.columns
+            WHERE table_name='{}'
+            ORDER BY ordinal_position;
+            """.format(table_name)
         )
         results = c.execute(sql)
         records = results.fetchall()
@@ -74,8 +78,12 @@ def _get_column_names(self, Session, table_name):
     def _get_column_types(self, Session, table_name):
         c = Session.connection()
         sql = (
-            "SELECT udt_name FROM information_schema.columns "
-            "WHERE table_name='{}';".format(table_name)
+            """
+            SELECT udt_name
+            FROM information_schema.columns
+            WHERE table_name='{}'
+            ORDER BY ordinal_position;
+            """.format(table_name)
         )
         results = c.execute(sql)
         records = results.fetchall()

From bef37d2a561ecc3c27780e102a216df74c1582cc Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 1 Aug 2023 14:16:56 +1000
Subject: [PATCH 020/102] [QOLDEV-490] use fast loading for resources that
 already have a data dictionary

- Tabulator is good at type guessing but is slow. Once it has configured the column types, there's no need to use it every time
---
 ckanext/xloader/jobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 0d242db1..f2263dec 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -212,7 +212,7 @@ def tabulator_load():
     logger.info("'use_type_guessing' mode is: %s",
                 use_type_guessing)
     try:
-        if use_type_guessing:
+        if use_type_guessing and not loader.datastore_resource_exists(resource['id']):
             tabulator_load()
         else:
             try:

From 92009687c3a52aaf49bd00e39f4a90d40b05d871 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 1 Aug 2023 14:38:55 +1000
Subject: [PATCH 021/102] [QOLDEV-490] extend job timeout if type guessing is
 needed

- Also refactor: move the function to check if a datastore entry exists, from the job module to utils,
so different modules can check it.
---
 ckanext/xloader/action.py |  4 ++++
 ckanext/xloader/jobs.py   | 14 +++++---------
 ckanext/xloader/loader.py | 13 +------------
 ckanext/xloader/utils.py  | 19 ++++++++++++++++++-
 4 files changed, 28 insertions(+), 22 deletions(-)

diff --git a/ckanext/xloader/action.py b/ckanext/xloader/action.py
index 3fa26803..f52d8d77 100644
--- a/ckanext/xloader/action.py
+++ b/ckanext/xloader/action.py
@@ -153,6 +153,10 @@ def xloader_submit(context, data_dict):
         }
     }
     timeout = config.get('ckanext.xloader.job_timeout', '3600')
+    if not utils.datastore_resource_exists(res_id):
+        # Expand timeout for resources that have to be type-guessed
+        timeout = timeout * 3
+
     try:
         job = enqueue_job(
             jobs.xloader_data_into_datastore, [data], rq_kwargs=dict(timeout=timeout)
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index f2263dec..94784bd7 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -18,10 +18,9 @@
 from ckan import model
 from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config
 
-from . import loader
-from . import db
+from . import db, loader
 from .job_exceptions import JobError, HTTPError, DataTooBigError, FileCouldNotBeLoadedError
-from .utils import set_resource_metadata
+from .utils import set_resource_metadata, should_guess_types
 
 try:
     from ckan.lib.api_token import get_user_from_token
@@ -206,13 +205,10 @@ def tabulator_load():
     logger.info('Loading CSV')
     # If ckanext.xloader.use_type_guessing is not configured, fall back to
     # deprecated ckanext.xloader.just_load_with_messytables
-    use_type_guessing = asbool(config.get(
-        'ckanext.xloader.use_type_guessing', config.get(
-            'ckanext.xloader.just_load_with_messytables', False)))
-    logger.info("'use_type_guessing' mode is: %s",
-                use_type_guessing)
+    use_type_guessing = should_guess_types(resource['id'])
+    logger.info("'use_type_guessing' mode is: %s", use_type_guessing)
     try:
-        if use_type_guessing and not loader.datastore_resource_exists(resource['id']):
+        if use_type_guessing:
             tabulator_load()
         else:
             try:
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 2060a9ef..11eb637c 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -17,7 +17,7 @@
 
 from .job_exceptions import FileCouldNotBeLoadedError, LoaderError
 from .parser import CSV_SAMPLE_LINES, XloaderCSVParser
-from .utils import headers_guess, type_guess
+from .utils import datastore_resource_exists, headers_guess, type_guess
 
 from ckan.plugins.toolkit import config
 
@@ -402,17 +402,6 @@ def send_resource_to_datastore(resource_id, headers, records):
                           .format(str(e)))
 
 
-def datastore_resource_exists(resource_id):
-    from ckan import model
-    context = {'model': model, 'ignore_auth': True}
-    try:
-        response = p.toolkit.get_action('datastore_search')(context, dict(
-            id=resource_id, limit=0))
-    except p.toolkit.ObjectNotFound:
-        return False
-    return response or {'fields': []}
-
-
 def delete_datastore_resource(resource_id):
     from ckan import model
     context = {'model': model, 'user': '', 'ignore_auth': True}
diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py
index 0d2a182b..62b25320 100644
--- a/ckanext/xloader/utils.py
+++ b/ckanext/xloader/utils.py
@@ -9,7 +9,7 @@
 from decimal import Decimal
 
 import ckan.plugins as p
-from ckan.plugins.toolkit import config
+from ckan.plugins.toolkit import asbool, config
 
 # resource.formats accepted by ckanext-xloader. Must be lowercase here.
 DEFAULT_FORMATS = [
@@ -245,3 +245,20 @@ def type_guess(rows, types=TYPES, strict=False):
         guesses_tuples = [(t, guess[t]) for t in types if t in guess]
         _columns.append(max(guesses_tuples, key=lambda t_n: t_n[1])[0])
     return _columns
+
+
+def datastore_resource_exists(resource_id):
+    context = {'model': model, 'ignore_auth': True}
+    try:
+        response = p.toolkit.get_action('datastore_search')(context, dict(
+            id=resource_id, limit=0))
+    except p.toolkit.ObjectNotFound:
+        return False
+    return response or {'fields': []}
+
+
+def should_guess_types(resource_id):
+    return asbool(
+        config.get('ckanext.xloader.use_type_guessing', config.get(
+            'ckanext.xloader.just_load_with_messytables', False))) \
+        and datastore_resource_exists(resource_id)

From 507508ef3a0082cc7e2e33a8b39dca9be33877a7 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 1 Aug 2023 15:22:58 +1000
Subject: [PATCH 022/102] [QOLDEV-490] always use COPY for large files

- Ignore the 'use_type_guessing' flag for large files since they will take too long.
---
 ckanext/xloader/config_declaration.yaml | 13 ++++++++++---
 ckanext/xloader/jobs.py                 | 11 +++++++++--
 ckanext/xloader/utils.py                |  9 +--------
 3 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/ckanext/xloader/config_declaration.yaml b/ckanext/xloader/config_declaration.yaml
index b31f12e2..feb1cc9c 100644
--- a/ckanext/xloader/config_declaration.yaml
+++ b/ckanext/xloader/config_declaration.yaml
@@ -29,9 +29,7 @@ groups:
         default: 1_000_000_000
         example: 100000
         description: |
-            The connection string for the jobs database used by XLoader. The
-            default of an sqlite file is fine for development. For production use a
-            Postgresql database.
+            The maximum file size that XLoader will attempt to load.
         type: int
         required: false
       - key: ckanext.xloader.use_type_guessing
@@ -48,6 +46,15 @@ groups:
         type: bool
         required: false
         legacy_key: ckanext.xloader.just_load_with_messytables
+      - key: ckanext.xloader.max_type_guessing_length
+        default: 0
+        example: 100000
+        description: |
+            The maximum file size that will be passed to Tabulator if the
+            use_type_guessing flag is enabled. Larger files will use COPY even if
+            the flag is set. Defaults to 1/10 of the maximum content length.
+        type: int
+        required: false
       - key: ckanext.xloader.parse_dates_dayfirst
         default: False
         example: False
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 94784bd7..7819c96e 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -7,6 +7,7 @@
 import tempfile
 import json
 import datetime
+import os
 import traceback
 import sys
 
@@ -20,7 +21,7 @@
 
 from . import db, loader
 from .job_exceptions import JobError, HTTPError, DataTooBigError, FileCouldNotBeLoadedError
-from .utils import set_resource_metadata, should_guess_types
+from .utils import datastore_resource_exists, set_resource_metadata
 
 try:
     from ckan.lib.api_token import get_user_from_token
@@ -32,6 +33,8 @@
     requests.packages.urllib3.disable_warnings()
 
 MAX_CONTENT_LENGTH = int(config.get('ckanext.xloader.max_content_length') or 1e9)
+# Don't try Tabulator load on large files
+MAX_TYPE_GUESSING_LENGTH = int(config.get('ckanext.xloader.max_type_guessing_length') or MAX_CONTENT_LENGTH / 10)
 MAX_EXCERPT_LINES = int(config.get('ckanext.xloader.max_excerpt_lines') or 0)
 CHUNK_SIZE = 16 * 1024  # 16kb
 DOWNLOAD_TIMEOUT = 30
@@ -205,7 +208,11 @@ def tabulator_load():
     logger.info('Loading CSV')
     # If ckanext.xloader.use_type_guessing is not configured, fall back to
     # deprecated ckanext.xloader.just_load_with_messytables
-    use_type_guessing = should_guess_types(resource['id'])
+    use_type_guessing = asbool(
+        config.get('ckanext.xloader.use_type_guessing', config.get(
+            'ckanext.xloader.just_load_with_messytables', False))) \
+        and datastore_resource_exists(resource['id']) \
+        and os.path.getsize(tmp_file.name) <= MAX_TYPE_GUESSING_LENGTH
     logger.info("'use_type_guessing' mode is: %s", use_type_guessing)
     try:
         if use_type_guessing:
diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py
index 62b25320..994e6754 100644
--- a/ckanext/xloader/utils.py
+++ b/ckanext/xloader/utils.py
@@ -9,7 +9,7 @@
 from decimal import Decimal
 
 import ckan.plugins as p
-from ckan.plugins.toolkit import asbool, config
+from ckan.plugins.toolkit import config
 
 # resource.formats accepted by ckanext-xloader. Must be lowercase here.
 DEFAULT_FORMATS = [
@@ -255,10 +255,3 @@ def datastore_resource_exists(resource_id):
     except p.toolkit.ObjectNotFound:
         return False
     return response or {'fields': []}
-
-
-def should_guess_types(resource_id):
-    return asbool(
-        config.get('ckanext.xloader.use_type_guessing', config.get(
-            'ckanext.xloader.just_load_with_messytables', False))) \
-        and datastore_resource_exists(resource_id)

From d425e31326855a579d3905ee1b44721beeccf79e Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 1 Aug 2023 15:56:29 +1000
Subject: [PATCH 023/102] adjust datastore tab link to work on current CKAN
 2.10

---
 ckanext/xloader/templates/package/resource_read.html | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/xloader/templates/package/resource_read.html b/ckanext/xloader/templates/package/resource_read.html
index 6d5f5ff2..56bf0266 100644
--- a/ckanext/xloader/templates/package/resource_read.html
+++ b/ckanext/xloader/templates/package/resource_read.html
@@ -1,6 +1,6 @@
 {% ckan_extends %}
 
-{% block action_manage_inner %}
+{% block action_manage %}
   {{ super() }}
   {% if h.is_resource_supported_by_xloader(res) %}
     <li>{% link_for _('DataStore'), named_route='xloader.resource_data', id=pkg.name, resource_id=res.id, class_='btn btn-light', icon='cloud-upload' %}</li>

From 6df99ead7b017f7f19478fae9202b142f1a75be3 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 7 Aug 2023 09:28:25 +1000
Subject: [PATCH 024/102] [QOLDEV-490] fix timeout extension

- Can't multiply a string by 3, we get repeated text instead of arithmetic multiplication
---
 ckanext/xloader/action.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/ckanext/xloader/action.py b/ckanext/xloader/action.py
index f52d8d77..e45394a9 100644
--- a/ckanext/xloader/action.py
+++ b/ckanext/xloader/action.py
@@ -152,10 +152,11 @@ def xloader_submit(context, data_dict):
             'original_url': resource_dict.get('url'),
         }
     }
-    timeout = config.get('ckanext.xloader.job_timeout', '3600')
-    if not utils.datastore_resource_exists(res_id):
-        # Expand timeout for resources that have to be type-guessed
-        timeout = timeout * 3
+    # Expand timeout for resources that have to be type-guessed
+    timeout = config.get(
+        'ckanext.xloader.job_timeout',
+        '3600' if utils.datastore_resource_exists(res_id) else '10800')
+    log.debug("Timeout for XLoading resource %s is %s", res_id, timeout)
 
     try:
         job = enqueue_job(

From 388bc7b70c7bd733380fc73fe4b74d384bb7b114 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 7 Aug 2023 09:50:30 +1000
Subject: [PATCH 025/102] [QOLDEV-490] fix load method logic

- Missed a 'not' when checking if datastore exists
---
 ckanext/xloader/jobs.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 7819c96e..9fae67c2 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -211,7 +211,7 @@ def tabulator_load():
     use_type_guessing = asbool(
         config.get('ckanext.xloader.use_type_guessing', config.get(
             'ckanext.xloader.just_load_with_messytables', False))) \
-        and datastore_resource_exists(resource['id']) \
+        and not datastore_resource_exists(resource['id']) \
         and os.path.getsize(tmp_file.name) <= MAX_TYPE_GUESSING_LENGTH
     logger.info("'use_type_guessing' mode is: %s", use_type_guessing)
     try:

From da8c602e93922dc682994ff0bec39c479f5fc412 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 22 Aug 2023 13:11:19 +1000
Subject: [PATCH 026/102] [QOLDEV-554] fix overly aggressive timestamp parsing

- Use our guessed types to restrict the behaviour of the numeric/timestamp converter,
so columns that sniff as text won't be incorrectly partially converted to timestamps
---
 ckanext/xloader/loader.py |  5 +--
 ckanext/xloader/parser.py | 75 ++++++++++++++++++++++++++-------------
 2 files changed, 54 insertions(+), 26 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 11eb637c..92e990ed 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -16,7 +16,7 @@
 import ckan.plugins as p
 
 from .job_exceptions import FileCouldNotBeLoadedError, LoaderError
-from .parser import CSV_SAMPLE_LINES, XloaderCSVParser
+from .parser import CSV_SAMPLE_LINES, XloaderCSVParser, TypeConverter
 from .utils import datastore_resource_exists, headers_guess, type_guess
 
 from ckan.plugins.toolkit import config
@@ -279,9 +279,10 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
             for t, h in zip(types, headers)]
 
     headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
+    type_converter = TypeConverter(types=types)
 
     with Stream(table_filepath, format=file_format, skip_rows=skip_rows,
-                custom_parsers={'csv': XloaderCSVParser}) as stream:
+                post_parse=[type_converter.convert_types]) as stream:
         def row_iterator():
             for row in stream:
                 data_row = {}
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index 82539f4d..b7a1c34e 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -1,11 +1,11 @@
 # -*- coding: utf-8 -*-
 import csv
+import datetime
 from decimal import Decimal, InvalidOperation
 from itertools import chain
 
 from ckan.plugins.toolkit import asbool
-from dateutil.parser import isoparser, parser
-from dateutil.parser import ParserError
+from dateutil.parser import isoparser, parser, ParserError
 
 from tabulator import helpers
 from tabulator.parser import Parser
@@ -97,28 +97,7 @@ def type_value(value):
             if value in ('', None):
                 return ''
 
-            try:
-                return Decimal(value)
-            except InvalidOperation:
-                pass
-
-            try:
-                i = isoparser()
-                return i.isoparse(value)
-            except ValueError:
-                pass
-
-            try:
-                p = parser()
-                yearfirst = asbool(config.get(
-                    'ckanext.xloader.parse_dates_yearfirst', False))
-                dayfirst = asbool(config.get(
-                    'ckanext.xloader.parse_dates_dayfirst', False))
-                return p.parse(value, yearfirst=yearfirst, dayfirst=dayfirst)
-            except ParserError:
-                pass
-
-            return value
+            return to_number(value) or to_timestamp(value) or value
 
         sample, dialect = self.__prepare_dialect(self.__chars)
         items = csv.reader(chain(sample, self.__chars), dialect=dialect)
@@ -159,3 +138,51 @@ class dialect(csv.excel):
 
         self.__dialect = dialect
         return sample, dialect
+
+
+class TypeConverter:
+    """ Post-process table cells to convert strings into numbers and timestamps
+    as desired.
+    """
+
+    def __init__(self, types):
+        self.types = types
+
+    def convert_types(self, extended_rows):
+        """ Try converting cells to numbers or timestamps if applicable.
+        If a list of types was supplied, use that.
+        If not, then try converting each column to numeric first,
+        then to a timestamp. If both fail, just keep it as a string.
+        """
+        for row_number, headers, row in extended_rows:
+            for cell_index, cell_value in enumerate(row):
+                if cell_value is None:
+                    row[cell_index] = ''
+                if cell_value:
+                    cell_type = self.types[cell_index]
+                    if cell_type == Decimal:
+                        row[cell_index] = to_number(cell_value) or cell_value
+                    elif cell_type == datetime.datetime:
+                        row[cell_index] = to_timestamp(row[cell_index]) or cell_value
+            yield (row_number, headers, row)
+
+
+def to_number(value):
+    try:
+        return Decimal(value)
+    except InvalidOperation:
+        return None
+
+
+def to_timestamp(value):
+    try:
+        i = isoparser()
+        return i.isoparse(value)
+    except ValueError:
+        try:
+            p = parser()
+            yearfirst = asbool(config.get('ckanext.xloader.parse_dates_yearfirst', False))
+            dayfirst = asbool(config.get('ckanext.xloader.parse_dates_dayfirst', False))
+            return p.parse(value, yearfirst=yearfirst, dayfirst=dayfirst)
+        except ParserError:
+            return None

From 13d076fe0e7bb108eebf445404262a2ba61b5e69 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 22 Aug 2023 13:26:34 +1000
Subject: [PATCH 027/102] [QOLDEV-554] skip conversion if value already has the
 desired type

---
 ckanext/xloader/parser.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index b7a1c34e..e063c762 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -168,6 +168,8 @@ def convert_types(self, extended_rows):
 
 
 def to_number(value):
+    if isinstance(value, Decimal):
+        return value
     try:
         return Decimal(value)
     except InvalidOperation:
@@ -175,6 +177,8 @@ def to_number(value):
 
 
 def to_timestamp(value):
+    if isinstance(value, datetime.datetime):
+        return value
     try:
         i = isoparser()
         return i.isoparse(value)

From 03967f7b69f182de6e99d29b858283fab085e7ed Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 22 Aug 2023 14:19:44 +1000
Subject: [PATCH 028/102] [QOLDEV-554] add unit test for time ranges being
 preserved as strings

---
 .../tests/samples/non_timestamp_sample.csv    |  3 +++
 ckanext/xloader/tests/test_loader.py          | 19 +++++++++++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 ckanext/xloader/tests/samples/non_timestamp_sample.csv

diff --git a/ckanext/xloader/tests/samples/non_timestamp_sample.csv b/ckanext/xloader/tests/samples/non_timestamp_sample.csv
new file mode 100644
index 00000000..daf438e5
--- /dev/null
+++ b/ckanext/xloader/tests/samples/non_timestamp_sample.csv
@@ -0,0 +1,3 @@
+Title,Postal postcode,Latitude,Longitude,Mon am,Mon pm,Last updated
+Adavale,4474,-25.9092582,144.5975769,8:00,16:00,19/07/2018
+Aramac,4726,-22.971298,145.241481,9:00-13:00,14:00-16:45,17/07/2018
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index d55ec949..2752e11a 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -1215,3 +1215,22 @@ def test_with_mixed_quotes(self, Session):
             logger=logger,
         )
         assert len(self._get_records(Session, resource_id)) == 2
+
+    def test_preserving_time_ranges(self, Session):
+        """ Time ranges should not be treated as timestamps
+        """
+        csv_filepath = get_sample_filepath("non_timestamp_sample.csv")
+        resource = factories.Resource()
+        resource_id = resource['id']
+        loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert self._get_records(Session, resource_id) == [
+            (1, "Adavale", 4474, Decimal("-25.9092582"), Decimal("144.5975769"),
+             "8:00", "16:00", datetime.datetime(2018, 7, 19)),
+            (2, "Aramac", 4726, Decimal("-22.971298"), Decimal("145.241481"),
+             "9:00-13:00", "14:00-16:45", datetime.datetime(2018, 7, 17))
+        ]

From f5fad5b65062e7ae94bfd87251b047b9cf8cf7ec Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 22 Aug 2023 14:44:32 +1000
Subject: [PATCH 029/102] [QOLDEV-554] add extra data to the unit test

---
 ckanext/xloader/tests/samples/non_timestamp_sample.csv | 1 +
 ckanext/xloader/tests/test_loader.py                   | 2 ++
 2 files changed, 3 insertions(+)

diff --git a/ckanext/xloader/tests/samples/non_timestamp_sample.csv b/ckanext/xloader/tests/samples/non_timestamp_sample.csv
index daf438e5..d1b39e90 100644
--- a/ckanext/xloader/tests/samples/non_timestamp_sample.csv
+++ b/ckanext/xloader/tests/samples/non_timestamp_sample.csv
@@ -1,3 +1,4 @@
 Title,Postal postcode,Latitude,Longitude,Mon am,Mon pm,Last updated
 Adavale,4474,-25.9092582,144.5975769,8:00,16:00,19/07/2018
 Aramac,4726,-22.971298,145.241481,9:00-13:00,14:00-16:45,17/07/2018
+Barcaldine,4725,-23.55327901,145.289156,9:00-12:30,13:30-16:30,20/07/2018
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 2752e11a..c01d830c 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -1233,4 +1233,6 @@ def test_preserving_time_ranges(self, Session):
              "8:00", "16:00", datetime.datetime(2018, 7, 19)),
             (2, "Aramac", 4726, Decimal("-22.971298"), Decimal("145.241481"),
              "9:00-13:00", "14:00-16:45", datetime.datetime(2018, 7, 17))
+            (3, "Barcaldine", 4725, Decimal("-23.55327901"), Decimal("145.289156"),
+             "9:00-12:30", "13:30-16:30", datetime.datetime(2018, 7, 20))
         ]

From e6b05c6450b4a5c2dd8e18fee59e15d38e4f95b9 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 22 Aug 2023 15:55:20 +1000
Subject: [PATCH 030/102] [QOLDEV-554] restrict recognised date types

- Apply a regex to limit the values that will potentially be parsed as dates.
We aren't interested in anything that doesn't seem to have day, month, or year components.
---
 ckanext/xloader/parser.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index e063c762..d17b2f80 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -3,6 +3,8 @@
 import datetime
 from decimal import Decimal, InvalidOperation
 from itertools import chain
+import re
+import six
 
 from ckan.plugins.toolkit import asbool
 from dateutil.parser import isoparser, parser, ParserError
@@ -13,6 +15,7 @@
 from ckan.plugins.toolkit import config
 
 CSV_SAMPLE_LINES = 1000
+DATE_REGEX = re.compile(r'''^\d{1,4}[-/.\s]\S+[-/.\s]\S+''')
 
 
 class XloaderCSVParser(Parser):
@@ -168,8 +171,8 @@ def convert_types(self, extended_rows):
 
 
 def to_number(value):
-    if isinstance(value, Decimal):
-        return value
+    if not isinstance(value, six.string_types):
+        return None
     try:
         return Decimal(value)
     except InvalidOperation:
@@ -177,8 +180,8 @@ def to_number(value):
 
 
 def to_timestamp(value):
-    if isinstance(value, datetime.datetime):
-        return value
+    if not isinstance(value, six.string_types) or not DATE_REGEX.search(value):
+        return None
     try:
         i = isoparser()
         return i.isoparse(value)

From 00ee904e46009dd1be6beff41f65a45e2a443ce2 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 22 Aug 2023 15:59:37 +1000
Subject: [PATCH 031/102] [QOLDEV-554] fix missing comma

---
 ckanext/xloader/tests/test_loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index c01d830c..f17e6c10 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -1232,7 +1232,7 @@ def test_preserving_time_ranges(self, Session):
             (1, "Adavale", 4474, Decimal("-25.9092582"), Decimal("144.5975769"),
              "8:00", "16:00", datetime.datetime(2018, 7, 19)),
             (2, "Aramac", 4726, Decimal("-22.971298"), Decimal("145.241481"),
-             "9:00-13:00", "14:00-16:45", datetime.datetime(2018, 7, 17))
+             "9:00-13:00", "14:00-16:45", datetime.datetime(2018, 7, 17)),
             (3, "Barcaldine", 4725, Decimal("-23.55327901"), Decimal("145.289156"),
              "9:00-12:30", "13:30-16:30", datetime.datetime(2018, 7, 20))
         ]

From e54b212e760b11f0d42f14dadcc0e8b0bc56b1fd Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 22 Aug 2023 16:17:46 +1000
Subject: [PATCH 032/102] [QOLDEV-554] replace custom parser with
 post-processing

- Customising the parser requires a lot of boilerplate, and we already have a post-processor to do the job.
---
 ckanext/xloader/jobs.py              |   4 +-
 ckanext/xloader/loader.py            |   6 +-
 ckanext/xloader/parser.py            | 150 +++------------------------
 ckanext/xloader/tests/test_parser.py |  10 +-
 4 files changed, 23 insertions(+), 147 deletions(-)

diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 9fae67c2..9c6e0a67 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -28,6 +28,8 @@
 except ImportError:
     get_user_from_token = None
 
+log = logging.getLogger(__name__)
+
 SSL_VERIFY = asbool(config.get('ckanext.xloader.ssl_verify', True))
 if not SSL_VERIFY:
     requests.packages.urllib3.disable_warnings()
@@ -82,7 +84,6 @@ def xloader_data_into_datastore(input):
         db.mark_job_as_errored(job_id, str(e))
         job_dict['status'] = 'error'
         job_dict['error'] = str(e)
-        log = logging.getLogger(__name__)
         log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc()))
         errored = True
     except Exception as e:
@@ -90,7 +91,6 @@ def xloader_data_into_datastore(input):
             job_id, traceback.format_tb(sys.exc_info()[2])[-1] + repr(e))
         job_dict['status'] = 'error'
         job_dict['error'] = str(e)
-        log = logging.getLogger(__name__)
         log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc()))
         errored = True
     finally:
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 92e990ed..4da314a8 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -16,7 +16,7 @@
 import ckan.plugins as p
 
 from .job_exceptions import FileCouldNotBeLoadedError, LoaderError
-from .parser import CSV_SAMPLE_LINES, XloaderCSVParser, TypeConverter
+from .parser import CSV_SAMPLE_LINES, TypeConverter
 from .utils import datastore_resource_exists, headers_guess, type_guess
 
 from ckan.plugins.toolkit import config
@@ -238,13 +238,13 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     try:
         file_format = os.path.splitext(table_filepath)[1].strip('.')
         with Stream(table_filepath, format=file_format,
-                    custom_parsers={'csv': XloaderCSVParser}) as stream:
+                    post_parse=[TypeConverter().convert_types]) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
             with Stream(table_filepath, format=file_format,
-                        custom_parsers={'csv': XloaderCSVParser}) as stream:
+                        post_parse=[TypeConverter().convert_types]) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
             raise LoaderError('Tabulator error: {}'.format(e))
diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index d17b2f80..812ccd1f 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -1,154 +1,24 @@
 # -*- coding: utf-8 -*-
-import csv
 import datetime
 from decimal import Decimal, InvalidOperation
-from itertools import chain
 import re
 import six
 
 from ckan.plugins.toolkit import asbool
 from dateutil.parser import isoparser, parser, ParserError
 
-from tabulator import helpers
-from tabulator.parser import Parser
-
 from ckan.plugins.toolkit import config
 
 CSV_SAMPLE_LINES = 1000
 DATE_REGEX = re.compile(r'''^\d{1,4}[-/.\s]\S+[-/.\s]\S+''')
 
 
-class XloaderCSVParser(Parser):
-    """Extends tabulator CSVParser to detect datetime and numeric values.
-    """
-
-    # Public
-
-    options = [
-        'delimiter',
-        'doublequote',
-        'escapechar',
-        'quotechar',
-        'quoting',
-        'skipinitialspace',
-        'lineterminator'
-    ]
-
-    def __init__(self, loader, force_parse=False, **options):
-        super(XloaderCSVParser, self).__init__(loader, force_parse, **options)
-        # Set attributes
-        self.__loader = loader
-        self.__options = options
-        self.__force_parse = force_parse
-        self.__extended_rows = None
-        self.__encoding = None
-        self.__dialect = None
-        self.__chars = None
-
-    @property
-    def closed(self):
-        return self.__chars is None or self.__chars.closed
-
-    def open(self, source, encoding=None):
-        # Close the character stream, if necessary, before reloading it.
-        self.close()
-        self.__chars = self.__loader.load(source, encoding=encoding)
-        self.__encoding = getattr(self.__chars, 'encoding', encoding)
-        if self.__encoding:
-            self.__encoding.lower()
-        self.reset()
-
-    def close(self):
-        if not self.closed:
-            self.__chars.close()
-
-    def reset(self):
-        helpers.reset_stream(self.__chars)
-        self.__extended_rows = self.__iter_extended_rows()
-
-    @property
-    def encoding(self):
-        return self.__encoding
-
-    @property
-    def dialect(self):
-        if self.__dialect:
-            dialect = {
-                'delimiter': self.__dialect.delimiter,
-                'doubleQuote': self.__dialect.doublequote,
-                'lineTerminator': self.__dialect.lineterminator,
-                'quoteChar': self.__dialect.quotechar,
-                'skipInitialSpace': self.__dialect.skipinitialspace,
-            }
-            if self.__dialect.escapechar is not None:
-                dialect['escapeChar'] = self.__dialect.escapechar
-            return dialect
-
-    @property
-    def extended_rows(self):
-        return self.__extended_rows
-
-    # Private
-
-    def __iter_extended_rows(self):
-
-        def type_value(value):
-            """Returns numeric values as Decimal(). Uses dateutil to parse
-            date values. Otherwise, returns values as it receives them
-            (strings).
-            """
-            if value in ('', None):
-                return ''
-
-            return to_number(value) or to_timestamp(value) or value
-
-        sample, dialect = self.__prepare_dialect(self.__chars)
-        items = csv.reader(chain(sample, self.__chars), dialect=dialect)
-        for row_number, item in enumerate(items, start=1):
-            values = []
-            for value in item:
-                value = type_value(value)
-                values.append(value)
-            yield row_number, None, list(values)
-
-    def __prepare_dialect(self, stream):
-
-        # Get sample
-        sample = []
-        while True:
-            try:
-                sample.append(next(stream))
-            except StopIteration:
-                break
-            if len(sample) >= CSV_SAMPLE_LINES:
-                break
-
-        # Get dialect
-        try:
-            separator = ''
-            delimiter = self.__options.get('delimiter', ',\t;|')
-            dialect = csv.Sniffer().sniff(separator.join(sample), delimiter)
-            if not dialect.escapechar:
-                dialect.doublequote = True
-        except csv.Error:
-            class dialect(csv.excel):
-                pass
-        for key, value in self.__options.items():
-            setattr(dialect, key, value)
-        # https://github.com/frictionlessdata/FrictionlessDarwinCore/issues/1
-        if getattr(dialect, 'quotechar', None) == '':
-            setattr(dialect, 'quoting', csv.QUOTE_NONE)
-
-        self.__dialect = dialect
-        return sample, dialect
-
-
 class TypeConverter:
     """ Post-process table cells to convert strings into numbers and timestamps
     as desired.
     """
 
-    def __init__(self, types):
+    def __init__(self, types=None):
         self.types = types
 
     def convert_types(self, extended_rows):
@@ -161,12 +31,18 @@ def convert_types(self, extended_rows):
             for cell_index, cell_value in enumerate(row):
                 if cell_value is None:
                     row[cell_index] = ''
-                if cell_value:
-                    cell_type = self.types[cell_index]
-                    if cell_type == Decimal:
-                        row[cell_index] = to_number(cell_value) or cell_value
-                    elif cell_type == datetime.datetime:
-                        row[cell_index] = to_timestamp(row[cell_index]) or cell_value
+                if not cell_value:
+                    continue
+                cell_type = self.types[cell_index] if self.types else None
+                if cell_type in [Decimal, None]:
+                    converted_value = to_number(cell_value)
+                    if converted_value:
+                        row[cell_index] = converted_value
+                        continue
+                if cell_type in [datetime.datetime, None]:
+                    converted_value = to_timestamp(cell_value)
+                    if converted_value:
+                        row[cell_index] = converted_value
             yield (row_number, headers, row)
 
 
diff --git a/ckanext/xloader/tests/test_parser.py b/ckanext/xloader/tests/test_parser.py
index 67929d9f..ac4047dd 100644
--- a/ckanext/xloader/tests/test_parser.py
+++ b/ckanext/xloader/tests/test_parser.py
@@ -6,7 +6,7 @@
 from datetime import datetime
 
 from tabulator import Stream
-from ckanext.xloader.parser import XloaderCSVParser
+from ckanext.xloader.parser import TypeConverter
 
 csv_filepath = os.path.abspath(
     os.path.join(os.path.dirname(__file__), "samples", "date_formats.csv")
@@ -16,7 +16,7 @@
 class TestParser(object):
     def test_simple(self):
         with Stream(csv_filepath, format='csv',
-                    custom_parsers={'csv': XloaderCSVParser}) as stream:
+                    post_parse=[TypeConverter().convert_types]) as stream:
             assert stream.sample == [
                 [
                     'date',
@@ -49,7 +49,7 @@ def test_simple(self):
     def test_dayfirst(self):
         print('test_dayfirst')
         with Stream(csv_filepath, format='csv',
-                    custom_parsers={'csv': XloaderCSVParser}) as stream:
+                    post_parse=[TypeConverter().convert_types]) as stream:
             assert stream.sample == [
                 [
                     'date',
@@ -82,7 +82,7 @@ def test_dayfirst(self):
     def test_yearfirst(self):
         print('test_yearfirst')
         with Stream(csv_filepath, format='csv',
-                    custom_parsers={'csv': XloaderCSVParser}) as stream:
+                    post_parse=[TypeConverter().convert_types]) as stream:
             assert stream.sample == [
                 [
                     'date',
@@ -115,7 +115,7 @@ def test_yearfirst(self):
     @pytest.mark.ckan_config("ckanext.xloader.parse_dates_yearfirst", True)
     def test_yearfirst_dayfirst(self):
         with Stream(csv_filepath, format='csv',
-                    custom_parsers={'csv': XloaderCSVParser}) as stream:
+                    post_parse=[TypeConverter().convert_types]) as stream:
             assert stream.sample == [
                 [
                     'date',

From d40e24578b2e55b7273de36c9e45b665bdad74b4 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Wed, 11 Oct 2023 10:17:46 +1000
Subject: [PATCH 033/102] [QOLSVC-3224] handle any falsy url_type the same way
 as empty string

---
 ckanext/xloader/helpers.py | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/ckanext/xloader/helpers.py b/ckanext/xloader/helpers.py
index 829b7b74..8b9dee8f 100644
--- a/ckanext/xloader/helpers.py
+++ b/ckanext/xloader/helpers.py
@@ -35,8 +35,12 @@ def is_resource_supported_by_xloader(res_dict, check_access=True):
         user_has_access = toolkit.h.check_access('package_update', {'id': res_dict.get('package_id')})
     else:
         user_has_access = True
-    try:
-        is_supported_url_type = res_dict.get('url_type') not in toolkit.h.datastore_rw_resource_url_types()
-    except AttributeError:
-        is_supported_url_type = (res_dict.get('url_type') == 'upload' or res_dict.get('url_type') == '')
+    url_type = res_dict.get('url_type')
+    if url_type:
+        try:
+            is_supported_url_type = url_type not in toolkit.h.datastore_rw_resource_url_types()
+        except AttributeError:
+            is_supported_url_type = (url_type == 'upload')
+    else:
+        is_supported_url_type = True
     return (is_supported_format or is_datastore_active) and user_has_access and is_supported_url_type

From 589800078b29e35c2315de2e5ca51177f7530e15 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 23 Oct 2023 15:16:47 +1000
Subject: [PATCH 034/102] [QOLSVC-2984] add test for ISO-8859-1 CSV encoding

---
 .../xloader/tests/samples/non_utf8_sample.csv | 267 ++++++++++++++++++
 ckanext/xloader/tests/test_loader.py          |  12 +
 2 files changed, 279 insertions(+)
 create mode 100644 ckanext/xloader/tests/samples/non_utf8_sample.csv

diff --git a/ckanext/xloader/tests/samples/non_utf8_sample.csv b/ckanext/xloader/tests/samples/non_utf8_sample.csv
new file mode 100644
index 00000000..334c1005
--- /dev/null
+++ b/ckanext/xloader/tests/samples/non_utf8_sample.csv
@@ -0,0 +1,267 @@
+"ClientId_ActNo","Owner","Amount","SenderName","DateRec","PCode"
+"206681442213","MS MARIE LOUISE SEXTON <BTML>","477.05","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3206"
+"206681442214","MR DAVID SHEARER","3.79","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2213"
+"206681442215","MRS M SHONK + MR E T SHONK <SHONKY INVESTMENTS>","10.3","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2093"
+"206681442216","MS AGATHA SKOURTIS","108.42","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3025"
+"206681442217","MR JAMES SMITH","108.42","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","4811"
+"206681442218","MRS JILLIAN MELINDA SMITH","602.27","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2752"
+"206681442219","MISS JESSICA SARAH STEAD","174.01","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2040"
+"206681442220","MISS CHAU DONG MINH TANG","542.1","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3065"
+"206681442221","MR TROY TAYLOR","240.69","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","4000"
+"206681442222","MR ANDREW PHILIP THOMPSON","2.17","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2204"
+"206681442223","MR IVAN CONRAD TIMBS","702.02","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","2612"
+"206681442224","MR J WAJNTRAUB + MRS S WAJNTRAUB <WAJNTRAUB SUPER FUND>","542.1","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","3205"
+"206681442225","MR HOWARD GRENVILLE WEBBER","400.61","VIRGIN AUSTRALIA HOLDINGS LIMITED","2012-02-28 00:00:00","4556"
+"206681442226","JANI ILARI KALLA","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6157"
+"206681442227","GARY JOHN & DESLEY L CAHILL","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4816"
+"206681442228","CARMEL ANASTASIA MEAGLIA","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2205"
+"206681442229","ASHLEY & ANNIE BRUGGEMANN","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4671"
+"206681442230","TERRY & MARY RITCHIE","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442231","BODY CORPORATE VILLAGE WAY CTS 19459","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214"
+"206681442232","MATHEW JOHN SHORTLAND","10","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2573"
+"206681442233","TANYA MARIE TOWNSON","10.01","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442234","VENEE ELVA RUSSELL","10.02","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4035"
+"206681442235","ELIZABETH FERNANCE","10.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4223"
+"206681442236","CHARLES JOHN & OLWYN MARTIN","10.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4121"
+"206681442237","ALFRED BRETT SEILER","10.05","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4129"
+"206681442238","LOUISE WOODHAM & NATHAN FREY","10.07","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4400"
+"206681442239","MITRA KHAKBAZ","10.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4005"
+"206681442240","ALLAN EDWARD KILCULLEN","10.1","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817"
+"206681442241","BEVAN JOHN LISTON","10.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442242","KRIS MICHAEL KANKAHAINEN","10.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4107"
+"206681442243","MICHAEL LYNN","10.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4860"
+"206681442244","ALAN RAYMOND & GERAL BURKITT","10.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4228"
+"206681442245","JENNIFER & NEVILLE MARXSEN","10.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4680"
+"206681442246","DARREN MAIN GRANT & LISA MARIE GROSSKOPF","10.2","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4504"
+"206681442247","PEARSON AUTOMOTIVE","10.23","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4064"
+"206681442248","MR SHANE HOPE & MISS YVONNE HILTON","10.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4173"
+"206681442249","CARMEL LESLEY NEILSON & WAYNE MERVYN NEILSON &","10.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4650"
+"206681442250","STEPHEN KENNETH ROBERTSON","10.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442251","SHIH CHE LIN","10.26","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214"
+"206681442252","DAVID BRETT BROWNE","10.29","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4558"
+"206681442253","NEVILLE COLIN WOODHOUSE","10.32","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442254","DARRYN GREGORY & PET ROBIN","10.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178"
+"206681442255","DUDLEY JESSER","10.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442256","MURRAY JOHN & SANDRA DIXON","10.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870"
+"206681442257","SHATHISO JOHNSON BAREKI","10.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215"
+"206681442258","ARTHUR EDWARD & MAUR MACDONALD","10.39","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4390"
+"206681442259","GARY GOLDBERG","10.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2480"
+"206681442260","PHUONG VAN NGO","10.41","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4508"
+"206681442261","JACQUELYN WILSON","10.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3046"
+"206681442262","GARTH TURTON","10.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442263","DAVID JAMES & ANNE M O'ROURKE","10.43","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4701"
+"206681442264","ROBERT RUSSELL & VER MCKENZIE","10.45","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4503"
+"206681442265","ESTATE OF DULCIE L SYKES","10.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215"
+"206681442266","LEESA GAYE OSMOND","10.51","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4671"
+"206681442267","DAVID JOHN & ROSEMAR GILES","10.54","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4303"
+"206681442268","SALLY & AQEEL AHMED","10.56","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442269","JUDITH MARJORY BURGESS","10.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3101"
+"206681442270","TROY ANTONY EWART","10.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4301"
+"206681442271","RODULFO MANOY & GEORGE HAJEK","10.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4152"
+"206681442272","GLEN DUNSTAN","10.66","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3196"
+"206681442273","ANNE RALSTON WRIGHT","10.73","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4825"
+"206681442274","ALAN & NICOLE MAREE JACKSON","10.74","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4720"
+"206681442275","DANIEL MALCOLM BROWN","10.81","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4501"
+"206681442276","JENNIFER DEMERAL","10.82","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214"
+"206681442277","DARREN & LISA GARRETT","10.83","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165"
+"206681442278","LORRAINE & PETER JACKSON","10.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442279","CHERYL MADELINE CAMPBELL","10.86","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4824"
+"206681442280","OLAF PETER PRILL","10.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4305"
+"206681442281","AJAY GIDH","10.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442282","DEBRA JOANNE PRINDABLE","10.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178"
+"206681442283","MATTHEW WILLIAM CLARKE","10.96","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2914"
+"206681442284","MARK STANLEY MCKENZIE","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207"
+"206681442285","TREVOR & JANICE GARWOOD","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4610"
+"206681442286","LISA ANNE BRATINA","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4228"
+"206681442287","MICHAEL GEORGE KIRKWOOD","11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4561"
+"206681442288","STEPHAN & JULIE BAWDEN","11.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4114"
+"206681442289","PETER JOHN BOURKE","11.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4118"
+"206681442290","TYRONE PAGE & ULRIKE","11.07","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4301"
+"206681442291","SIMON ROBERT GRAY","11.08","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4006"
+"206681442292","ALLAN NICHOLAS SCHWARZROCK","11.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4650"
+"206681442293","IVAN J BLAKE & JAINE RIGTER","11.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4220"
+"206681442294","DAVID MATTHEW REGINA CHRISTIE","11.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4151"
+"206681442295","GEOFFREY WAYNE & EVAN GRIGG","11.14","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4720"
+"206681442296","KYLIE JANELLE HARDCASTLE","11.14","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4013"
+"206681442297","PAMELA ANN WELLER","11.15","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4655"
+"206681442298","JASON PATRICK & ELIZ MURPHY","11.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4511"
+"206681442299","MLADEN & VESNA SAJKO","11.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4157"
+"206681442300","DEAN STEPHEN BROCKENSHIRE","11.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2571"
+"206681442301","LISA CHRISTOBEL BOWKER","11.22","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4066"
+"206681442302","MATTHEW RAY EBBAGE","11.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4101"
+"206681442303","BRIAN & GEORGINA WHITLEY","11.25","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4521"
+"206681442304","HAYLEY WESTON","11.25","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4159"
+"206681442305","JAMES PATRICK HOCKING","11.28","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4127"
+"206681442306","ROBERT ANDREW & SARA BROWNHALL","11.29","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442307","EDWARD JAMES DODGSON","11.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442308","MELISSA JOY DODD","11.32","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442309","JOSHUA CALVIN BEGENT","11.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4306"
+"206681442311","DORATHY AMANDA WALTERS","11.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4744"
+"206681442312","RICHARD ROBERTS & KYM RALEIGH","11.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4053"
+"206681442313","SAMARA INSOLL","11.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4212"
+"206681442314","NEIL GREGORY FLESSER","11.49","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4800"
+"206681442315","EUNICE GLADYS WILBRAHAM","11.51","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4570"
+"206681442316","KARA NICOLE MCINNES","11.57","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4503"
+"206681442317","DAVID BLYTH","11.58","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4078"
+"206681442318","KEVIN & MARION KEIR","11.58","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4216"
+"206681442319","FRANCES & CHARLES KEEBLE","11.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4500"
+"206681442320","LYNETTE ANNE & PETER NISSEN","11.6","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442321","DANIEL PETER JOHNSON","11.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442322","ALLAN & EUNICE DELLAWAY","11.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4122"
+"206681442323","CHRISTOPHER JOHN BEEM","11.63","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4101"
+"206681442324","DAVID JAMES & KELLIE POULTON","11.64","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442325","MAVIS CAROLIN SCOTT","11.64","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4018"
+"206681442326","REEGAN & ADAM MARTIN","11.68","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2153"
+"206681442327","DENYSE B BONNEY","11.7","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4811"
+"206681442328","JAMES ANDERSON","11.71","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4220"
+"206681442329","SUSANNAH PINTER","11.72","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4744"
+"206681442330","BRENTON MARK & KAREN GARNETT","11.78","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4306"
+"206681442331","PL CAMELOT VENTURES AS TRUSTEE FOR K F T TRUST NO","11.82","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215"
+"206681442332","RON HENRY SCHMIDT","11.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","830"
+"206681442333","ROSS COCKBURN & AUDREY KILL","11.86","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4010"
+"206681442334","BENJAMIN CLARK","11.88","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4306"
+"206681442335","IRIS LEAH TERESA BAKER","11.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2170"
+"206681442336","MARK JOHN DEEBLE","11.94","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442337","CHRISTINE & BARRY RIGBY","11.94","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2485"
+"206681442338","NATASHA ANN WOODWARD","11.97","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4869"
+"206681442339","BENJAMIN JOHN CANSDALE","11.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4064"
+"206681442340","PETER HERALD","11.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4184"
+"206681442341","SIMON CUSHWAY","11.99","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4154"
+"206681442342","ANTHONY & MICHELLE JOHNSTON","12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4551"
+"206681442343","PAUL HAUCK","12.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4000"
+"206681442344","RONALD ALBERT & PEAR NORTHILL","12.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4413"
+"206681442345","ROBYN ELLEN SOMERS","12.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178"
+"206681442346","ROSE ANN HODGMAN","12.06","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4068"
+"206681442347","JOHN & MARDI BOLTON","12.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165"
+"206681442348","KRYSTYNA RENNIE","12.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4053"
+"206681442349","JOANNE BARSBY","12.12","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442350","BRENDAN JAMES FELSCHOW","12.14","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4508"
+"206681442351","MARTIN WILLIAM HARRISON","12.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870"
+"206681442352","PATRICK HEINEMANN","12.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870"
+"206681442353","ELEKRA & SPENCER RORIE","12.17","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211"
+"206681442354","ROBERT CLIVE & NOELE CROCKER","12.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211"
+"206681442355","DANIEL JOSEPH & DAVI CARMICHAEL","12.21","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4160"
+"206681442356","WENBO JIANG & XIU FAN CHEN","12.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4303"
+"206681442357","NOEL JEFFREY BRADY","12.27","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4550"
+"206681442358","DARREN RICHARD GOSSNER & MATTHEW JOHN ANDERSON","12.29","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4500"
+"206681442359","STEPHEN MICHAEL & MA JOLLY","12.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442360","SHONA & ARCHIE WALLACE","12.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4504"
+"206681442361","ZOFIA HYS","12.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4300"
+"206681442362","PIROSKA KING","12.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4154"
+"206681442363","ARVIN CHAND & AMITA MOHINI","12.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4503"
+"206681442364","WIETSKE GERARDINA & GAUNT","12.38","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4309"
+"206681442365","MARK REGINALD MATTHEWS","12.39","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4811"
+"206681442366","SHARP ARLEEN & CLINTON","12.4","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6020"
+"206681442367","EMOKE & LASZLO & MAR ZSOLDOS","12.41","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4005"
+"206681442368","MARK & KARON KELLER","12.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4702"
+"206681442369","JODIE KATRINA & TONY MCLACHLAN","12.43","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442370","ALAN WARWICK & LINDA LEWIS","12.45","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4670"
+"206681442371","ADRIAN WAYNE LORRAWAY","12.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4702"
+"206681442372","NICHOLE KRISTY MIKLOS","12.53","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4152"
+"206681442373","NATASHA LEANNE HAYES","12.54","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4017"
+"206681442374","KAREN LEE & DARREN J SHEEHAN","12.55","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4516"
+"206681442375","RACHAEL MAY COLLINS-COOK","12.58","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211"
+"206681442376","TAMARA JUNE WEIGHT & SUSANNE ELIZABETH DEVINE","12.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442377","RODNEY GATES","12.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","7015"
+"206681442378","REBECCA & LEE-ANNE SMITH","12.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","830"
+"206681442379","ADAM WILLIAM JOHNSON","12.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4069"
+"206681442380","ZAC ASHLEY & ALEXAND MORGAN","12.63","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165"
+"206681442381","HILARY SEALY","12.64","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4211"
+"206681442382","NAOMI JOHNSTONE & SCOTT LENAN","12.68","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207"
+"206681442383","WAYNE FLICKER","12.7","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2756"
+"206681442384","BRENDA ANDERSON","12.71","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4811"
+"206681442385","MATTHEW JAMES ALLEN","12.71","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4002"
+"206681442386","MARIA-THERESIA ALTENHOFEN-CROSS & JOHN ERI CROSS","12.72","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4570"
+"206681442387","MELODIE ZYLSTRA","12.72","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4570"
+"206681442388","AMANDA & GRAHAM SWALLOW","12.75","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4720"
+"206681442389","GRAEME ROBERT & ROBI DOHERTY","12.75","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4214"
+"206681442390","GILLIAN LEIGH O'SULLIVAN","12.79","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817"
+"206681442391","JULIA MELLICK","12.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4178"
+"206681442392","TOLISIALE & HAMAKO MAHINA","12.87","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4300"
+"206681442393","SIMON JOHN STEVENS","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4305"
+"206681442394","MICHAEL ANTHONY & DE SNELSON","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817"
+"206681442395","QUERIDA JO LOFTES","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4812"
+"206681442396","LORRAINE VICTORIA DIAS","12.89","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4000"
+"206681442397","JOHN MICHAEL TRAVIS LINLEY","12.92","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442398","CAROLINE HENDRY & RICHARD HOPKINS","12.93","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4067"
+"206681442399","JOSH EAGLE","12.95","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4030"
+"206681442400","MARK SHAWN FROST & BELINDA JEAN MARSHALL","12.95","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4221"
+"206681442401","BRENT & GABRIELLE ANTHONY","12.96","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4213"
+"206681442402","RICHARD SADLER","12.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4065"
+"206681442403","GROVE FRUIT JUICE PTY LTD","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4122"
+"206681442404","LEAH SPARKS","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4561"
+"206681442405","JAMES MAURICE & PATR GORDON","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4870"
+"206681442406","MARK JOSEPH SEARS","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4565"
+"206681442407","SOPHIE VICTORIA STEWART & TREVOR MATTHEW ROWE","13","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4055"
+"206681442408","BOBBY JAMES & SIMONE TAYLOR","13.02","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6254"
+"206681442409","PATRICK MICHAEL & ME REEVES","13.08","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4101"
+"206681442410","MAURICE GROGNUZ","13.09","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4670"
+"206681442411","ALAN PIGOTT & ALAN CONDER","13.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2025"
+"206681442412","SAMANTHA & CAMERON SCHELBACH","13.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4309"
+"206681442413","SHERIDAN ANNE ST CLAIR","13.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4164"
+"206681442414","ANDREW CHRISTIE","13.17","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4521"
+"206681442415","MARK ANDREW & MELISS VINTON","13.17","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4508"
+"206681442416","IRWIN DOUGLAS & MARI SORENSEN","13.2","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4305"
+"206681442417","CARLY SUSAN BENNETTS","13.23","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4034"
+"206681442418","RYAN THORNTON","13.24","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2560"
+"206681442419","RICHARD BAILEY","13.26","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3850"
+"206681442420","DAVID IAN & EMILY RU PRYOR","13.27","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4120"
+"206681442421","WILLIAM SINCLAIR","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4514"
+"206681442422","CATHERINE LUCILLE VALENTINE & ROBERT WAREING","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4165"
+"206681442423","RAYMOND JAMES JONES","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4170"
+"206681442424","ANDREW STEWART T/A AWE COMMUNICATIONS","13.3","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207"
+"206681442425","TONY RONALD OSBOURNE","13.35","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4311"
+"206681442426","MARK JOHN & LENY FIG O'HARA","13.35","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4825"
+"206681442427","CECILIA ASHLEY & DAV BUTLER","13.35","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4816"
+"206681442428","WILLIAM LEATHAM","13.36","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4170"
+"206681442429","MAXWELL RAYMOND MATHERS & DENISE MAREE MELLARE","13.44","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4129"
+"206681442430","RENE & JACQUELINE WASSERFUHR","13.44","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4556"
+"206681442431","MICHAEL LEIGH KENNEDY","13.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4610"
+"206681442432","MEDECO MEDICAL CENTRE BEENLEIGH","13.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4207"
+"206681442433","GARY PAUL & GAYE SHELLEY","13.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4510"
+"206681442434","STEVE & BRENDA GEIGER","13.53","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442435","GREGORY BERNARD JAMES","13.53","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4051"
+"206681442436","ROBBIE DEEBLE","13.56","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4740"
+"206681442437","OWEN TRAYNOR","13.56","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","6076"
+"206681442438","TONI MICHELLE & SHAN MORGAN","13.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4717"
+"206681442439","NICOLAS VAN HORTON","13.59","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4220"
+"206681442440","IAN BOWDEN","13.6","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4886"
+"206681442441","QUEENSLAND COUNTRY CREDIT UNION - JIMBOOMBA","13.61","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442442","ALANA FELLINGHAM","13.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4551"
+"206681442443","ALLAN JOHN & CARMEL BETHEL","13.62","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4871"
+"206681442444","PETER WILLIAM & ODET NORMAN","13.63","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442445","EMILY & MATTHEW PARSLOW","13.68","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4173"
+"206681442446","JAMES OI YUEN GOCK","13.69","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2049"
+"206681442447","JODIE ELIZABETH MORRISON","13.7","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4280"
+"206681442448","BELINDA JANE HARNETT-PETERS & RANDALL NEI PETERS","13.74","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4017"
+"206681442449","JULIEN & CHRISTIAN JUVIGNY","13.78","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4215"
+"206681442450","SUSAN JOY MURRAY & THOMAS HOGAN","13.79","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4217"
+"206681442451","PATRICK COLIN & HEAT HARRIS","13.8","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4506"
+"206681442452","LINDY BOTHA","13.84","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4154"
+"206681442453","PATRICIA LORETTA & D KNIGHT","13.85","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4650"
+"206681442454","COWBURN CONSULTING PTY LTD","13.87","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4000"
+"206681442455","SPENCER JAMES HAMILTON","13.9","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4300"
+"206681442456","ANNA LOUISE ROSS","13.95","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4170"
+"206681442457","JOHN HUGH & BOB SUTHERLAND","13.98","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4509"
+"206681442458","ROBERTA MARY MACNEE","13.99","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4567"
+"206681442459","MATTHEW CHRISTENSEN","14.03","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4152"
+"206681442460","TROY & KIRSTY JEFFRIES","14.04","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4370"
+"206681442461","WILLIAM GEORGE BALSDON","14.05","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4878"
+"206681442462","JAIME LISA CAMPBELL & DANIEL BEVERIDGE","14.07","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4216"
+"206681442463","NANCY JOHANNESSON","14.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4505"
+"206681442464","JOSHUA FRANK SEIDL","14.11","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4122"
+"206681442465","DAVID LESTER","14.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4817"
+"206681442466","MATHIAS DONALD","14.16","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4103"
+"206681442467","GLEN EVAN & HAYLEE L MARTIN","14.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4350"
+"206681442468","JOHN GORDON EVANS","14.19","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4814"
+"206681442469","DIANA NOYCE & LAURENCE VIZER T/A","14.2","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4502"
+"206681442470","GREIG MANLEY","14.22","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3040"
+"206681442471","BRENDON ANSELL","14.23","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4171"
+"206681442472","CATHERINE A ROBERTSON & PAUL BROMILEY","14.27","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4064"
+"206681442473","ADAM LEE & SAMANTHA RANKIN","14.28","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4132"
+"206681442474","BERNICE BOYS","14.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4011"
+"206681442475","HAYLEY MICHELLE BURROW","14.34","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2153"
+"206681442476","SIONE FAUMUINA","14.42","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4815"
+"206681442477","GERARD JARMAN","14.44","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","3337"
+"206681442478","DOUGLAS CECIL GOOLEY","14.48","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","2481"
+"206681442479","ANTHONY AUGUSTO HENRIQUES T/A CAF� VILA FRANCA","14.5","SUNCORP GENERAL INSURANCE","2012-03-12 00:00:00","4020"
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index f17e6c10..ffb3dcba 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -1204,6 +1204,18 @@ def test_with_quoted_commas(self, Session):
         )
         assert len(self._get_records(Session, resource_id)) == 3
 
+    def test_with_iso_8859_1(self, Session):
+        csv_filepath = get_sample_filepath("non_utf8_sample.csv")
+        resource = factories.Resource()
+        resource_id = resource['id']
+        loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, resource_id)) == 266
+
     def test_with_mixed_quotes(self, Session):
         csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv")
         resource = factories.Resource()

From af6aea6d7fb57009c20041f667a13d66f3d84d6d Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 23 Oct 2023 15:53:27 +1000
Subject: [PATCH 035/102] [QOLSVC-2984] handle Latin-1 encoding if UTF-8 fails

---
 ckanext/xloader/loader.py | 49 +++++++++++++++++++++++++++++++--------
 1 file changed, 39 insertions(+), 10 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 4da314a8..aabaefbe 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -10,7 +10,7 @@
 
 import psycopg2
 from six.moves import zip
-from tabulator import config as tabulator_config, Stream, TabulatorException
+from tabulator import config as tabulator_config, EncodingError, Stream, TabulatorException
 from unidecode import unidecode
 
 import ckan.plugins as p
@@ -31,18 +31,45 @@
 tabulator_config.CSV_SAMPLE_LINES = CSV_SAMPLE_LINES
 
 
+class UnknownEncodingStream(object):
+    """ Provides a context manager that wraps a Tabulator stream
+    and tries multiple encodings if one fails.
+
+    This is particularly relevant in cases like Latin-1 encoding,
+    which is usually ASCII and thus the sample could be sniffed as UTF-8,
+    only to run into problems later in the file.
+    """
+
+    def __init__(self, filepath, file_format, **kwargs):
+        self.filepath = filepath
+        self.file_format = file_format
+        self.stream_args = kwargs
+
+    def __enter__(self):
+        try:
+            self.stream = Stream(self.filepath, format=self.file_format,
+                                 **self.stream_args).__enter__()
+        except (EncodingError, UnicodeDecodeError):
+            self.stream = Stream(self.filepath, format=self.file_format,
+                                 encoding='latin1', **self.stream_args).__enter__()
+        return self.stream
+
+    def __exit__(self, *args):
+        return self.stream.__exit__(*args)
+
+
 def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     '''Loads a CSV into DataStore. Does not create the indexes.'''
 
     # Determine the header row
     try:
         file_format = os.path.splitext(csv_filepath)[1].strip('.')
-        with Stream(csv_filepath, format=file_format) as stream:
+        with UnknownEncodingStream(csv_filepath, file_format) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
-            with Stream(csv_filepath, format=file_format) as stream:
+            with UnknownEncodingStream(csv_filepath, file_format) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
             raise LoaderError('Tabulator error: {}'.format(e))
@@ -73,7 +100,8 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     logger.info('Ensuring character coding is UTF8')
     f_write = tempfile.NamedTemporaryFile(suffix=file_format, delete=False)
     try:
-        with Stream(csv_filepath, format=file_format, skip_rows=skip_rows) as stream:
+        with UnknownEncodingStream(csv_filepath, file_format,
+                                   skip_rows=skip_rows) as stream:
             stream.save(target=f_write.name, format='csv', encoding='utf-8',
                         delimiter=delimiter)
             csv_filepath = f_write.name
@@ -237,14 +265,14 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     logger.info('Determining column names and types')
     try:
         file_format = os.path.splitext(table_filepath)[1].strip('.')
-        with Stream(table_filepath, format=file_format,
-                    post_parse=[TypeConverter().convert_types]) as stream:
+        with UnknownEncodingStream(table_filepath, file_format,
+                                   post_parse=[TypeConverter().convert_types]) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
-            with Stream(table_filepath, format=file_format,
-                        post_parse=[TypeConverter().convert_types]) as stream:
+            with UnknownEncodingStream(table_filepath, file_format,
+                                       post_parse=[TypeConverter().convert_types]) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
             raise LoaderError('Tabulator error: {}'.format(e))
@@ -281,8 +309,9 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
     type_converter = TypeConverter(types=types)
 
-    with Stream(table_filepath, format=file_format, skip_rows=skip_rows,
-                post_parse=[type_converter.convert_types]) as stream:
+    with UnknownEncodingStream(table_filepath, file_format,
+                               skip_rows=skip_rows,
+                               post_parse=[type_converter.convert_types]) as stream:
         def row_iterator():
             for row in stream:
                 data_row = {}

From 121592bfc7ae46ab16de2bcfac19cdfb29a3371c Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 31 Oct 2023 15:57:34 +1000
Subject: [PATCH 036/102] [QOLSVC-2984] handle Latin-1 encoding during 'save'
 call

- Handling the encoding during stream initialisation isn't enough for this part
---
 ckanext/xloader/loader.py | 19 +++++++++++++------
 1 file changed, 13 insertions(+), 6 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index aabaefbe..f9cb625b 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -30,6 +30,8 @@
 MAX_COLUMN_LENGTH = 63
 tabulator_config.CSV_SAMPLE_LINES = CSV_SAMPLE_LINES
 
+ISO_8859_ENCODING = 'latin1'
+
 
 class UnknownEncodingStream(object):
     """ Provides a context manager that wraps a Tabulator stream
@@ -51,7 +53,7 @@ def __enter__(self):
                                  **self.stream_args).__enter__()
         except (EncodingError, UnicodeDecodeError):
             self.stream = Stream(self.filepath, format=self.file_format,
-                                 encoding='latin1', **self.stream_args).__enter__()
+                                 encoding=ISO_8859_ENCODING, **self.stream_args).__enter__()
         return self.stream
 
     def __exit__(self, *args):
@@ -100,11 +102,16 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     logger.info('Ensuring character coding is UTF8')
     f_write = tempfile.NamedTemporaryFile(suffix=file_format, delete=False)
     try:
-        with UnknownEncodingStream(csv_filepath, file_format,
-                                   skip_rows=skip_rows) as stream:
-            stream.save(target=f_write.name, format='csv', encoding='utf-8',
-                        delimiter=delimiter)
-            csv_filepath = f_write.name
+        save_args = {'target': f_write.name, 'format': 'csv', 'encoding': 'utf-8', 'delimiter': delimiter}
+        try:
+            with UnknownEncodingStream(csv_filepath, file_format,
+                                       skip_rows=skip_rows) as stream:
+                stream.save(**save_args)
+        except (EncodingError, UnicodeDecodeError):
+            with Stream(csv_filepath, format=file_format, encoding=ISO_8859_ENCODING,
+                        skip_rows=skip_rows) as stream:
+                stream.save(**save_args)
+        csv_filepath = f_write.name
 
         # datastore db connection
         engine = get_write_engine()

From 202d1305d78311c52eda6e07917b406bbea0325e Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Thu, 2 Nov 2023 07:06:14 +1000
Subject: [PATCH 037/102] QOLSVC-2984 use chardet library to guess format type
 and use it if its above 70% confident as well as allow fallback

---
 ckanext/xloader/loader.py | 40 ++++++++++++++++++++++++++++++---------
 requirements.txt          |  1 +
 2 files changed, 32 insertions(+), 9 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index f9cb625b..8f73f67d 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -9,6 +9,7 @@
 from decimal import Decimal
 
 import psycopg2
+from chardet.universaldetector import UniversalDetector
 from six.moves import zip
 from tabulator import config as tabulator_config, EncodingError, Stream, TabulatorException
 from unidecode import unidecode
@@ -42,15 +43,21 @@ class UnknownEncodingStream(object):
     only to run into problems later in the file.
     """
 
-    def __init__(self, filepath, file_format, **kwargs):
+    def __init__(self, filepath, file_format, decoding_result, **kwargs):
         self.filepath = filepath
         self.file_format = file_format
         self.stream_args = kwargs
+        self.decoding_result = decoding_result  # {'encoding': 'EUC-JP', 'confidence': 0.99}
 
     def __enter__(self):
         try:
-            self.stream = Stream(self.filepath, format=self.file_format,
-                                 **self.stream_args).__enter__()
+
+            if (self.decoding_result and self.decoding_result['confidence'] and self.decoding_result['confidence'] > 0.7):
+                self.stream = Stream(self.filepath, format=self.file_format, encoding=self.decoding_result['encoding'],
+                                     ** self.stream_args).__enter__()
+            else:
+                self.stream = Stream(self.filepath, format=self.file_format, ** self.stream_args).__enter__()
+
         except (EncodingError, UnicodeDecodeError):
             self.stream = Stream(self.filepath, format=self.file_format,
                                  encoding=ISO_8859_ENCODING, **self.stream_args).__enter__()
@@ -60,18 +67,31 @@ def __exit__(self, *args):
         return self.stream.__exit__(*args)
 
 
+def detect_encoding(file_path):
+    detector = UniversalDetector()
+    with open(file_path, 'rb') as file:
+        for line in file:
+            detector.feed(line)
+            if detector.done:
+                break
+    detector.close()
+    return detector.result  # i.e. {'encoding': 'EUC-JP', 'confidence': 0.99}
+
+
 def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     '''Loads a CSV into DataStore. Does not create the indexes.'''
 
+    decoding_result = detect_encoding(csv_filepath)
+    logger.info("load_csv: Decoded encoding: %s", decoding_result)
     # Determine the header row
     try:
         file_format = os.path.splitext(csv_filepath)[1].strip('.')
-        with UnknownEncodingStream(csv_filepath, file_format) as stream:
+        with UnknownEncodingStream(csv_filepath, file_format, decoding_result=decoding_result) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
-            with UnknownEncodingStream(csv_filepath, file_format) as stream:
+            with UnknownEncodingStream(csv_filepath, file_format, decoding_result=decoding_result) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
             raise LoaderError('Tabulator error: {}'.format(e))
@@ -104,7 +124,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     try:
         save_args = {'target': f_write.name, 'format': 'csv', 'encoding': 'utf-8', 'delimiter': delimiter}
         try:
-            with UnknownEncodingStream(csv_filepath, file_format,
+            with UnknownEncodingStream(csv_filepath, file_format, decoding_result=decoding_result,
                                        skip_rows=skip_rows) as stream:
                 stream.save(**save_args)
         except (EncodingError, UnicodeDecodeError):
@@ -270,15 +290,17 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
 
     # Determine the header row
     logger.info('Determining column names and types')
+    decoding_result = detect_encoding(table_filepath)
+    logger.info("load_table: Decoded encoding: %s", decoding_result)
     try:
         file_format = os.path.splitext(table_filepath)[1].strip('.')
-        with UnknownEncodingStream(table_filepath, file_format,
+        with UnknownEncodingStream(table_filepath, file_format, decoding_result=decoding_result,
                                    post_parse=[TypeConverter().convert_types]) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
-            with UnknownEncodingStream(table_filepath, file_format,
+            with UnknownEncodingStream(table_filepath, file_format, decoding_result=decoding_result,
                                        post_parse=[TypeConverter().convert_types]) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
@@ -317,7 +339,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     type_converter = TypeConverter(types=types)
 
     with UnknownEncodingStream(table_filepath, file_format,
-                               skip_rows=skip_rows,
+                               skip_rows=skip_rows, decoding_result=decoding_result,
                                post_parse=[type_converter.convert_types]) as stream:
         def row_iterator():
             for row in stream:
diff --git a/requirements.txt b/requirements.txt
index 58540beb..fe92b6d7 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -4,3 +4,4 @@ six>=1.12.0
 tabulator==1.53.5
 Unidecode==1.0.22
 python-dateutil>=2.8.2
+chardet==5.2.0
\ No newline at end of file

From cb54151a2dc4ea579bf1f107c82f501e4baf0636 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Thu, 2 Nov 2023 07:06:44 +1000
Subject: [PATCH 038/102] QOLSVC-2984 updates per comments

---
 ckanext/xloader/loader.py | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 8f73f67d..233a46e6 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -75,7 +75,7 @@ def detect_encoding(file_path):
             if detector.done:
                 break
     detector.close()
-    return detector.result  # i.e. {'encoding': 'EUC-JP', 'confidence': 0.99}
+    return detector.result  # e.g. {'encoding': 'EUC-JP', 'confidence': 0.99}
 
 
 def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
@@ -86,12 +86,12 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     # Determine the header row
     try:
         file_format = os.path.splitext(csv_filepath)[1].strip('.')
-        with UnknownEncodingStream(csv_filepath, file_format, decoding_result=decoding_result) as stream:
+        with UnknownEncodingStream(csv_filepath, file_format, decoding_result) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
-            with UnknownEncodingStream(csv_filepath, file_format, decoding_result=decoding_result) as stream:
+            with UnknownEncodingStream(csv_filepath, file_format, decoding_result) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
             raise LoaderError('Tabulator error: {}'.format(e))
@@ -124,7 +124,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     try:
         save_args = {'target': f_write.name, 'format': 'csv', 'encoding': 'utf-8', 'delimiter': delimiter}
         try:
-            with UnknownEncodingStream(csv_filepath, file_format, decoding_result=decoding_result,
+            with UnknownEncodingStream(csv_filepath, file_format, decoding_result,
                                        skip_rows=skip_rows) as stream:
                 stream.save(**save_args)
         except (EncodingError, UnicodeDecodeError):
@@ -294,13 +294,13 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     logger.info("load_table: Decoded encoding: %s", decoding_result)
     try:
         file_format = os.path.splitext(table_filepath)[1].strip('.')
-        with UnknownEncodingStream(table_filepath, file_format, decoding_result=decoding_result,
+        with UnknownEncodingStream(table_filepath, file_format, decoding_result,
                                    post_parse=[TypeConverter().convert_types]) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
-            with UnknownEncodingStream(table_filepath, file_format, decoding_result=decoding_result,
+            with UnknownEncodingStream(table_filepath, file_format, decoding_result,
                                        post_parse=[TypeConverter().convert_types]) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
@@ -338,8 +338,8 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
     type_converter = TypeConverter(types=types)
 
-    with UnknownEncodingStream(table_filepath, file_format,
-                               skip_rows=skip_rows, decoding_result=decoding_result,
+    with UnknownEncodingStream(table_filepath, file_format, decoding_result,
+                               skip_rows=skip_rows,
                                post_parse=[type_converter.convert_types]) as stream:
         def row_iterator():
             for row in stream:

From cd32d61a7ec5e4808f04ccee509128314b8da81a Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Thu, 2 Nov 2023 09:18:30 +1000
Subject: [PATCH 039/102] [QOLSVC-2984] sniff using Windows-1252 encoding
 rather than Latin-1

- Windows-1252 is a superset, which makes it more useful for this purpose
---
 ckanext/xloader/loader.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index f9cb625b..fc5b4ae3 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -30,7 +30,7 @@
 MAX_COLUMN_LENGTH = 63
 tabulator_config.CSV_SAMPLE_LINES = CSV_SAMPLE_LINES
 
-ISO_8859_ENCODING = 'latin1'
+SINGLE_BYTE_ENCODING = 'cp1252'
 
 
 class UnknownEncodingStream(object):
@@ -53,7 +53,7 @@ def __enter__(self):
                                  **self.stream_args).__enter__()
         except (EncodingError, UnicodeDecodeError):
             self.stream = Stream(self.filepath, format=self.file_format,
-                                 encoding=ISO_8859_ENCODING, **self.stream_args).__enter__()
+                                 encoding=SINGLE_BYTE_ENCODING, **self.stream_args).__enter__()
         return self.stream
 
     def __exit__(self, *args):
@@ -108,7 +108,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
                                        skip_rows=skip_rows) as stream:
                 stream.save(**save_args)
         except (EncodingError, UnicodeDecodeError):
-            with Stream(csv_filepath, format=file_format, encoding=ISO_8859_ENCODING,
+            with Stream(csv_filepath, format=file_format, encoding=SINGLE_BYTE_ENCODING,
                         skip_rows=skip_rows) as stream:
                 stream.save(**save_args)
         csv_filepath = f_write.name

From ae354e57d100be8ca0c494e94bebc657373af56b Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Wed, 8 Nov 2023 20:12:14 +0000
Subject: [PATCH 040/102] feat(templates): added missing csrf field;

- Added csrf field to delete button form.
---
 ckanext/xloader/templates/xloader/resource_data.html | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index 0ae1d9b5..11ce3404 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -9,9 +9,11 @@
 
   {% block delete_ds_button %}
     <form class="mb-3 d-inline-block">
+      {{ h.csrf_input() if 'csrf_input' in h }}
       <a href="{{ h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) }}"
         class="btn btn-danger pull-left"
         data-module="confirm-action"
+        data-module-with-data=true
         data-module-content="{{ _('Are you sure you want to delete this DataStore table and Data Dictionary?') }}"
         >{% block delete_datastore_button_text %}<i class="fa fa-remove"></i>{{ _('Delete DataStore table') }}{% endblock %}</a>
     </form>

From 340f629c28a1d39090e75c4a46eb2503cd1295d5 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Wed, 8 Nov 2023 20:15:09 +0000
Subject: [PATCH 041/102] feat(templates): added missing form action;

- Added post action to delete button form.
---
 ckanext/xloader/templates/xloader/resource_data.html | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index 11ce3404..85b334fd 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -5,13 +5,15 @@
 {% block primary_content_inner %}
 
   {% set action = h.url_for('xloader.resource_data', id=pkg.name, resource_id=res.id) %}
+  {% set delete_action = h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) %}
   {% set show_table = true %}
 
   {% block delete_ds_button %}
-    <form class="mb-3 d-inline-block">
+    <form method="post" action="{{ delete_action }}" class="mb-3 d-inline-block">
       {{ h.csrf_input() if 'csrf_input' in h }}
       <a href="{{ h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) }}"
         class="btn btn-danger pull-left"
+        type="submit"
         data-module="confirm-action"
         data-module-with-data=true
         data-module-content="{{ _('Are you sure you want to delete this DataStore table and Data Dictionary?') }}"

From c2059d6666437d6bb443ca66524c749e389a8f5c Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 9 Nov 2023 15:02:40 +0000
Subject: [PATCH 042/102] fix(semantics): removed word table;

- Removed the word table from user visual things.
---
 .../xloader/templates/xloader/confirm_datastore_delete.html   | 2 +-
 ckanext/xloader/templates/xloader/resource_data.html          | 4 ++--
 ckanext/xloader/views.py                                      | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/ckanext/xloader/templates/xloader/confirm_datastore_delete.html b/ckanext/xloader/templates/xloader/confirm_datastore_delete.html
index 06af5ecc..81fca258 100644
--- a/ckanext/xloader/templates/xloader/confirm_datastore_delete.html
+++ b/ckanext/xloader/templates/xloader/confirm_datastore_delete.html
@@ -8,7 +8,7 @@
   <section class="module col-md-6 col-md-offset-3">
     <div class="module-content">
       {% block form %}
-        <p>{{ _('Are you sure you want to delete this DataStore table and Data Dictionary?') }}</p>
+        <p>{{ _('Are you sure you want to delete the DataStore and Data Dictionary?') }}</p>
         <p class="form-actions">
           <form action="{{ h.url_for('xloader.delete_datastore_table', id=package_id, resource_id=resource_id) }}" method="post">
             {{ h.csrf_input() if 'csrf_input' in h }}
diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index 85b334fd..47dd4794 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -16,8 +16,8 @@
         type="submit"
         data-module="confirm-action"
         data-module-with-data=true
-        data-module-content="{{ _('Are you sure you want to delete this DataStore table and Data Dictionary?') }}"
-        >{% block delete_datastore_button_text %}<i class="fa fa-remove"></i>{{ _('Delete DataStore table') }}{% endblock %}</a>
+        data-module-content="{{ _('Are you sure you want to delete the DataStore and Data Dictionary?') }}"
+        >{% block delete_datastore_button_text %}<i class="fa fa-remove"></i>{{ _('Delete from DataStore') }}{% endblock %}</a>
     </form>
   {% endblock %}
 
diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py
index 8a021a8b..95744676 100644
--- a/ckanext/xloader/views.py
+++ b/ckanext/xloader/views.py
@@ -26,7 +26,7 @@ def resource_data(id, resource_id):
     return utils.resource_data(id, resource_id, rows)
 
 
-@xloader.route("/dataset/<id>/delete-datastore-table/<resource_id>", methods=("GET", "POST"))
+@xloader.route("/dataset/<id>/delete-datastore/<resource_id>", methods=("GET", "POST"))
 def delete_datastore_table(id, resource_id):
     if u'cancel' in request.form:
         return h.redirect_to(u'xloader.resource_data', id=id, resource_id=resource_id)
@@ -42,7 +42,7 @@ def delete_datastore_table(id, resource_id):
         except NotAuthorized:
             return abort(403, _(u'Unauthorized to delete resource %s') % resource_id)
 
-        h.flash_notice(_(u'DataStore table and Data Dictionary deleted for resource %s') % resource_id)
+        h.flash_notice(_(u'DataStore and Data Dictionary deleted for resource %s') % resource_id)
 
         return h.redirect_to(
             'xloader.resource_data',

From 93b14b1dabea48add0008e9ee7c5d110b7f28b4c Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 9 Nov 2023 15:06:00 +0000
Subject: [PATCH 043/102] removal(views): replace ckanapi with toolkit;

- Used `get_action` from toolkit instead of ckanapi dependency.
---
 ckanext/xloader/views.py | 12 +++++-------
 requirements.txt         |  1 -
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py
index 95744676..85c55767 100644
--- a/ckanext/xloader/views.py
+++ b/ckanext/xloader/views.py
@@ -1,7 +1,6 @@
 from flask import Blueprint
 
-from ckanapi import LocalCKAN
-from ckan.plugins.toolkit import _, h, g, render, request, abort, NotAuthorized
+from ckan.plugins.toolkit import _, h, g, render, request, abort, NotAuthorized, get_action
 
 import ckanext.xloader.utils as utils
 
@@ -32,13 +31,12 @@ def delete_datastore_table(id, resource_id):
         return h.redirect_to(u'xloader.resource_data', id=id, resource_id=resource_id)
 
     if request.method == 'POST':
-        lc = LocalCKAN(username=g.user)
+        context = {"user": g.user}
 
         try:
-            lc.action.datastore_delete(
-                resource_id=resource_id,
-                force=True,
-            )
+            get_action('datastore_delete')(context, {
+                "resource_id": resource_id,
+                "force": True})
         except NotAuthorized:
             return abort(403, _(u'Unauthorized to delete resource %s') % resource_id)
 
diff --git a/requirements.txt b/requirements.txt
index d5373526..b00db5d8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,3 @@ tabulator==1.53.5
 Unidecode==1.0.22
 python-dateutil>=2.8.2
 chardet==5.2.0
-ckanapi

From beb3be79d84b4a1b0154d4e18099adc765082263 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 9 Nov 2023 15:41:20 +0000
Subject: [PATCH 044/102] feat(dev): code improvements;

- Used `get_action` from toolkit instead of ckanapi dependency.
- Reworked some coe to make it more gooder.
- Removed `url_changed` check.
---
 ckanext/xloader/plugin.py | 34 +++++++++++++++++-----------------
 requirements.txt          |  1 -
 2 files changed, 17 insertions(+), 18 deletions(-)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index e0fe664c..f3ec0ad0 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -81,9 +81,6 @@ def notify(self, entity, operation):
         if operation != DomainObjectOperation.changed or not isinstance(entity, Resource):
             return
 
-        if _should_remove_unsupported_resource_from_datastore(entity):
-            toolkit.enqueue_job(fn=_remove_unsupported_resource_from_datastore, args=[entity.id])
-
         context = {
             "ignore_auth": True,
         }
@@ -93,6 +90,10 @@ def notify(self, entity, operation):
                 "id": entity.id,
             },
         )
+
+        if _should_remove_unsupported_resource_from_datastore(resource_dict):
+            toolkit.enqueue_job(fn=_remove_unsupported_resource_from_datastore, args=[entity.id])
+
         self._submit_to_xloader(resource_dict)
 
     # IResourceController
@@ -202,17 +203,14 @@ def get_helpers(self):
         }
 
 
-def _should_remove_unsupported_resource_from_datastore(res_dict_or_obj):
+def _should_remove_unsupported_resource_from_datastore(res_dict):
     if not toolkit.asbool(toolkit.config.get('ckanext.xloader.clean_datastore_tables', False)):
         return False
-    if isinstance(res_dict_or_obj, Resource):
-        res_dict_or_obj = res_dict_or_obj.__dict__
-    return ((not XLoaderFormats.is_it_an_xloader_format(res_dict_or_obj.get('format', u''))
-                or res_dict_or_obj.get('url_changed', False))
-            and (res_dict_or_obj.get('url_type') == 'upload'
-                or res_dict_or_obj.get('url_type') == '')
-            and (res_dict_or_obj.get('datastore_active', False)
-                or res_dict_or_obj.get('extras', {}).get('datastore_active', False)))
+    return (not XLoaderFormats.is_it_an_xloader_format(res_dict.get('format', u''))
+            and (res_dict.get('url_type') == 'upload'
+                or res_dict.get('url_type') == '')
+            and (res_dict.get('datastore_active', False)
+                or res_dict.get('extras', {}).get('datastore_active', False)))
 
 
 def _remove_unsupported_resource_from_datastore(resource_id):
@@ -222,18 +220,20 @@ def _remove_unsupported_resource_from_datastore(resource_id):
     Double check the resource format. Only supported Xloader formats should have datastore tables.
     If the resource format is not supported, we should delete the datastore tables.
     """
-    lc = ckanapi.LocalCKAN()
+    context = {"ignore_auth": True}
     try:
-        res = lc.action.resource_show(id=resource_id)
+        res = toolkit.get_action('resource_show')(context, {"id": resource_id})
     except toolkit.ObjectNotFound:
         log.error('Resource %s does not exist.' % res['id'])
         return
 
     if _should_remove_unsupported_resource_from_datastore(res):
-        log.info('Unsupported resource format "{}". Deleting datastore tables for resource {}'
-            .format(res.get(u'format', u'').lower(), res['id']))
+        log.info('Unsupported resource format "%s". Deleting datastore tables for resource %s'
+            % (res.get(u'format', u'').lower(), res['id']))
         try:
-            lc.action.datastore_delete(resource_id=res['id'], force=True)
+            toolkit.get_action('datastore_delete')(context, {
+                "resource_id": res['id'],
+                "force": True})
             log.info('Datastore table dropped for resource %s' % res['id'])
         except toolkit.ObjectNotFound:
             log.error('Datastore table for resource %s does not exist' % res['id'])
diff --git a/requirements.txt b/requirements.txt
index d5373526..b00db5d8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,3 @@ tabulator==1.53.5
 Unidecode==1.0.22
 python-dateutil>=2.8.2
 chardet==5.2.0
-ckanapi

From 2e50f7cf4f40ecdd50d018b9ed162e6049e67d80 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 9 Nov 2023 15:43:02 +0000
Subject: [PATCH 045/102] removal(import): removed ckanapi import;

- Removed `ckanapi` import.
---
 ckanext/xloader/plugin.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index f3ec0ad0..a4418e3d 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -7,8 +7,6 @@
 from ckan.model.domain_object import DomainObjectOperation
 from ckan.model.resource import Resource
 
-import ckanapi
-
 from . import action, auth, helpers as xloader_helpers, utils
 from ckanext.xloader.utils import XLoaderFormats
 

From 0a210cf742d55e0be4f53d4e12fb556ec237f665 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 9 Nov 2023 20:07:14 +0000
Subject: [PATCH 046/102] fix(dev): falsy values for datastore and variable
 logging;

- Better logging.
- Added `asbool` for `datastore_active` checks.
---
 ckanext/xloader/plugin.py | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index a4418e3d..2c5e86d1 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -207,8 +207,8 @@ def _should_remove_unsupported_resource_from_datastore(res_dict):
     return (not XLoaderFormats.is_it_an_xloader_format(res_dict.get('format', u''))
             and (res_dict.get('url_type') == 'upload'
                 or res_dict.get('url_type') == '')
-            and (res_dict.get('datastore_active', False)
-                or res_dict.get('extras', {}).get('datastore_active', False)))
+            and (toolkit.asbool(res_dict.get('datastore_active', False))
+                or toolkit.asbool(res_dict.get('extras', {}).get('datastore_active', False))))
 
 
 def _remove_unsupported_resource_from_datastore(resource_id):
@@ -222,16 +222,16 @@ def _remove_unsupported_resource_from_datastore(resource_id):
     try:
         res = toolkit.get_action('resource_show')(context, {"id": resource_id})
     except toolkit.ObjectNotFound:
-        log.error('Resource %s does not exist.' % res['id'])
+        log.error('Resource %s does not exist.', res['id'])
         return
 
     if _should_remove_unsupported_resource_from_datastore(res):
-        log.info('Unsupported resource format "%s". Deleting datastore tables for resource %s'
-            % (res.get(u'format', u'').lower(), res['id']))
+        log.info('Unsupported resource format "%s". Deleting datastore tables for resource %s',
+            res.get(u'format', u'').lower(), res['id'])
         try:
             toolkit.get_action('datastore_delete')(context, {
                 "resource_id": res['id'],
                 "force": True})
-            log.info('Datastore table dropped for resource %s' % res['id'])
+            log.info('Datastore table dropped for resource %s', res['id'])
         except toolkit.ObjectNotFound:
-            log.error('Datastore table for resource %s does not exist' % res['id'])
+            log.error('Datastore table for resource %s does not exist', res['id'])

From 83281322e44c856d55d254ddc865467f915e5101 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Fri, 10 Nov 2023 16:20:36 +0000
Subject: [PATCH 047/102] fix(dev): falsy for url type;

- Added falsy check for `url_type`.
---
 ckanext/xloader/plugin.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index 2c5e86d1..b68900a1 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -204,9 +204,14 @@ def get_helpers(self):
 def _should_remove_unsupported_resource_from_datastore(res_dict):
     if not toolkit.asbool(toolkit.config.get('ckanext.xloader.clean_datastore_tables', False)):
         return False
+    has_url_type = True
+    try:
+        has_url_type = toolkit.asbool(res_dict.get('url_type'))
+    except ValueError:
+        pass
     return (not XLoaderFormats.is_it_an_xloader_format(res_dict.get('format', u''))
             and (res_dict.get('url_type') == 'upload'
-                or res_dict.get('url_type') == '')
+                or not has_url_type)
             and (toolkit.asbool(res_dict.get('datastore_active', False))
                 or toolkit.asbool(res_dict.get('extras', {}).get('datastore_active', False))))
 

From 0bc5f9b841dca006db2e50e194ce1c8741656d74 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Wed, 15 Nov 2023 14:19:06 +0000
Subject: [PATCH 048/102] fix(dev): falsy fix, removed lower from logging;

- Removed `lower` call in logging.
- Fixed falsy check on `url_type`.
---
 ckanext/xloader/plugin.py | 11 +++--------
 1 file changed, 3 insertions(+), 8 deletions(-)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index b68900a1..0ef0ea50 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -204,14 +204,9 @@ def get_helpers(self):
 def _should_remove_unsupported_resource_from_datastore(res_dict):
     if not toolkit.asbool(toolkit.config.get('ckanext.xloader.clean_datastore_tables', False)):
         return False
-    has_url_type = True
-    try:
-        has_url_type = toolkit.asbool(res_dict.get('url_type'))
-    except ValueError:
-        pass
     return (not XLoaderFormats.is_it_an_xloader_format(res_dict.get('format', u''))
             and (res_dict.get('url_type') == 'upload'
-                or not has_url_type)
+                or not res_dict.get('url_type'))
             and (toolkit.asbool(res_dict.get('datastore_active', False))
                 or toolkit.asbool(res_dict.get('extras', {}).get('datastore_active', False))))
 
@@ -227,12 +222,12 @@ def _remove_unsupported_resource_from_datastore(resource_id):
     try:
         res = toolkit.get_action('resource_show')(context, {"id": resource_id})
     except toolkit.ObjectNotFound:
-        log.error('Resource %s does not exist.', res['id'])
+        log.error('Resource %s does not exist.', resource_id)
         return
 
     if _should_remove_unsupported_resource_from_datastore(res):
         log.info('Unsupported resource format "%s". Deleting datastore tables for resource %s',
-            res.get(u'format', u'').lower(), res['id'])
+            res.get(u'format', u''), res['id'])
         try:
             toolkit.get_action('datastore_delete')(context, {
                 "resource_id": res['id'],

From 93ae5529a25a536d1c201807eec60d63408e7827 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 16 Nov 2023 13:24:29 +0000
Subject: [PATCH 049/102] feat(views): check resource owner;

- Confirm resource owner is package id.
---
 ckanext/xloader/views.py | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py
index 85c55767..af3270d7 100644
--- a/ckanext/xloader/views.py
+++ b/ckanext/xloader/views.py
@@ -1,6 +1,6 @@
 from flask import Blueprint
 
-from ckan.plugins.toolkit import _, h, g, render, request, abort, NotAuthorized, get_action
+from ckan.plugins.toolkit import _, h, g, render, request, abort, NotAuthorized, get_action, ObjectNotFound
 
 import ckanext.xloader.utils as utils
 
@@ -30,6 +30,13 @@ def delete_datastore_table(id, resource_id):
     if u'cancel' in request.form:
         return h.redirect_to(u'xloader.resource_data', id=id, resource_id=resource_id)
 
+    try:
+        res_dict = get_action('resource_show')(context, {"id": resource_id})
+        if res_dict.get('package_id') != id:
+            raise ObjectNotFound
+    except ObjectNotFound:
+        return abort(404, _(u'Resource not found'))
+
     if request.method == 'POST':
         context = {"user": g.user}
 

From 2f5314d9e0ad0d1a1272002c46b20d0b3c6cae9b Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 16 Nov 2023 13:42:16 +0000
Subject: [PATCH 050/102] fix(views): syntax, and template;

- Condition delete button behind `datastore_active`.
- Fix context variable in view.
---
 .../templates/xloader/resource_data.html      | 22 ++++++++++---------
 ckanext/xloader/views.py                      |  4 ++--
 2 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index 47dd4794..e5349aae 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -9,16 +9,18 @@
   {% set show_table = true %}
 
   {% block delete_ds_button %}
-    <form method="post" action="{{ delete_action }}" class="mb-3 d-inline-block">
-      {{ h.csrf_input() if 'csrf_input' in h }}
-      <a href="{{ h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) }}"
-        class="btn btn-danger pull-left"
-        type="submit"
-        data-module="confirm-action"
-        data-module-with-data=true
-        data-module-content="{{ _('Are you sure you want to delete the DataStore and Data Dictionary?') }}"
-        >{% block delete_datastore_button_text %}<i class="fa fa-remove"></i>{{ _('Delete from DataStore') }}{% endblock %}</a>
-    </form>
+    {% if res.datastore_active %}
+      <form method="post" action="{{ delete_action }}" class="mb-3 d-inline-block">
+        {{ h.csrf_input() if 'csrf_input' in h }}
+        <a href="{{ h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) }}"
+          class="btn btn-danger pull-left"
+          type="submit"
+          data-module="confirm-action"
+          data-module-with-data=true
+          data-module-content="{{ _('Are you sure you want to delete the DataStore and Data Dictionary?') }}"
+          >{% block delete_datastore_button_text %}<i class="fa fa-remove"></i>{{ _('Delete from DataStore') }}{% endblock %}</a>
+      </form>
+    {% endif %}
   {% endblock %}
 
   {% block upload_ds_button %}
diff --git a/ckanext/xloader/views.py b/ckanext/xloader/views.py
index af3270d7..1988b6d6 100644
--- a/ckanext/xloader/views.py
+++ b/ckanext/xloader/views.py
@@ -30,6 +30,8 @@ def delete_datastore_table(id, resource_id):
     if u'cancel' in request.form:
         return h.redirect_to(u'xloader.resource_data', id=id, resource_id=resource_id)
 
+    context = {"user": g.user}
+
     try:
         res_dict = get_action('resource_show')(context, {"id": resource_id})
         if res_dict.get('package_id') != id:
@@ -38,8 +40,6 @@ def delete_datastore_table(id, resource_id):
         return abort(404, _(u'Resource not found'))
 
     if request.method == 'POST':
-        context = {"user": g.user}
-
         try:
             get_action('datastore_delete')(context, {
                 "resource_id": resource_id,

From 8ea53280befbf8bed060c812d89d34936840521a Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 16 Nov 2023 16:47:42 +0000
Subject: [PATCH 051/102] fix(dev): check url_changed in domain implementation;

- Check `url_changed`.
---
 ckanext/xloader/plugin.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index 30ff579b..392b1cf5 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -77,7 +77,9 @@ def notify(self, entity, operation):
         We want to check if values have changed, namely the url.
         See: ckan/model/modification.py.DomainObjectModificationExtension
         """
-        if operation != DomainObjectOperation.changed or not isinstance(entity, Resource):
+        if operation != DomainObjectOperation.changed \
+        or not isinstance(entity, Resource) \
+        or not getattr(entity, 'url_changed', False):
             return
         context = {
             "ignore_auth": True,

From 783c45d06c602b8ecd1f62333e78706e7f5968a3 Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 24 Nov 2023 11:15:48 +1000
Subject: [PATCH 052/102] fix datastore delete button layout

- Add a space between the label icon and text
- Float the button right so it coexists properly with the Upload button
---
 ckanext/xloader/templates/xloader/resource_data.html | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index e5349aae..c4e3c369 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -10,7 +10,7 @@
 
   {% block delete_ds_button %}
     {% if res.datastore_active %}
-      <form method="post" action="{{ delete_action }}" class="mb-3 d-inline-block">
+      <form method="post" action="{{ delete_action }}" class="mb-3 d-inline-block pull-right">
         {{ h.csrf_input() if 'csrf_input' in h }}
         <a href="{{ h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) }}"
           class="btn btn-danger pull-left"
@@ -18,7 +18,7 @@
           data-module="confirm-action"
           data-module-with-data=true
           data-module-content="{{ _('Are you sure you want to delete the DataStore and Data Dictionary?') }}"
-          >{% block delete_datastore_button_text %}<i class="fa fa-remove"></i>{{ _('Delete from DataStore') }}{% endblock %}</a>
+          >{% block delete_datastore_button_text %}<i class="fa fa-remove"></i> {{ _('Delete from DataStore') }}{% endblock %}</a>
       </form>
     {% endif %}
   {% endblock %}

From 527ea5058f72eedcde754fc661c43b730237e53e Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 22 Dec 2023 11:07:12 +1000
Subject: [PATCH 053/102] [QOLSVC-3914] fix indentation to make Flake8 happy

---
 ckanext/xloader/plugin.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index 392b1cf5..6e65e466 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -78,8 +78,8 @@ def notify(self, entity, operation):
         See: ckan/model/modification.py.DomainObjectModificationExtension
         """
         if operation != DomainObjectOperation.changed \
-        or not isinstance(entity, Resource) \
-        or not getattr(entity, 'url_changed', False):
+                or not isinstance(entity, Resource) \
+                or not getattr(entity, 'url_changed', False):
             return
         context = {
             "ignore_auth": True,

From 6d19b456ad825fa26d3e90245ffabed921e807e3 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 22 Dec 2023 11:13:08 +1000
Subject: [PATCH 054/102] [QOLSVC-3914] truncate tables instead of dropping if
 fields are unchanged

- Detect when field names and types are unaltered, and truncate the table rows instead of dropping and recreating the table.
This should be slightly faster and opens up avenues for further optimisation
- Preserve Tabulator-sniffed types as overrides so they will be applied when switching to fast COPY,
unless the user manually changes the overrides
---
 ckanext/xloader/loader.py | 137 ++++++++++++++++++++++++++++----------
 1 file changed, 101 insertions(+), 36 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 856944e0..e9cf8e21 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -78,6 +78,35 @@ def detect_encoding(file_path):
     return detector.result  # e.g. {'encoding': 'EUC-JP', 'confidence': 0.99}
 
 
+def _fields_match(fields, existing_fields, logger):
+    # drop the generated '_id' field
+    for index in range(len(existing_fields)):
+        if existing_fields[index]['id'] == '_id':
+            existing_fields.pop(index)
+            break
+
+    field_count = len(fields)
+    if field_count != len(existing_fields):
+        logger.info("Fields do not match; there are now %s fields but previously %s", field_count, len(existing_fields))
+        return False
+
+    for index in range(field_count):
+        field_id = fields[index]['id']
+        for existing_index in range(field_count):
+            existing_field_id = existing_fields[existing_index]['id']
+            if field_id == existing_field_id:
+                if fields[index]['type'] == existing_fields[existing_index]['type']:
+                    break
+                else:
+                    logger.info("Fields do not match; new type for %s field is %s but existing type is %s",
+                                field_id, fields[index]["type"], existing_fields[existing_index]['type'])
+                    return False
+        else:
+            logger.info("Fields do not match; no existing entry found for %s", field_id)
+            return False
+    return True
+
+
 def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     '''Loads a CSV into DataStore. Does not create the indexes.'''
 
@@ -140,34 +169,47 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
         existing = datastore_resource_exists(resource_id)
         existing_info = {}
         if existing:
+            existing_fields = existing.get('fields', [])
             existing_info = dict((f['id'], f['info'])
-                                 for f in existing.get('fields', [])
+                                 for f in existing_fields
                                  if 'info' in f)
 
-            '''
-            Delete existing datastore table before proceeding. Otherwise
-            the COPY will append to the existing table. And if
-            the fields have significantly changed, it may also fail.
-            '''
-            logger.info('Deleting "{res_id}" from DataStore.'.format(
-                res_id=resource_id))
-            delete_datastore_resource(resource_id)
-
-        # Columns types are either set (overridden) in the Data Dictionary page
-        # or default to text type (which is robust)
-        fields = [
-            {'id': header_name,
-             'type': existing_info.get(header_name, {})
-                .get('type_override') or 'text',
-             }
-            for header_name in headers]
+            # Column types are either set (overridden) in the Data Dictionary page
+            # or default to text type (which is robust)
+            fields = [
+                {'id': header_name,
+                 'type': existing_info.get(header_name, {})
+                    .get('type_override') or 'text',
+                 }
+                for header_name in headers]
 
-        # Maintain data dictionaries from matching column names
-        if existing_info:
+            # Maintain data dictionaries from matching column names
             for f in fields:
                 if f['id'] in existing_info:
                     f['info'] = existing_info[f['id']]
 
+            '''
+            Delete or truncate existing datastore table before proceeding,
+            depending on whether any fields have changed.
+            Otherwise the COPY will append to the existing table.
+            And if the fields have significantly changed, it may also fail.
+            '''
+            if _fields_match(fields, existing_fields, logger):
+                logger.info('Clearing records for "%s" from DataStore.', resource_id)
+                connection = engine.connect()
+                try:
+                    connection.execute('TRUNCATE TABLE "{}"'.format(resource_id))
+                finally:
+                    connection.close()
+            else:
+                logger.info('Deleting "%s" from DataStore.', resource_id)
+                delete_datastore_resource(resource_id)
+        else:
+            fields = [
+                {'id': header_name,
+                 'type': 'text'}
+                for header_name in headers]
+
         logger.info('Fields: %s', fields)
 
         # Create table
@@ -281,6 +323,18 @@ def create_column_indexes(fields, resource_id, logger):
     logger.info('...column indexes created.')
 
 
+def _save_type_overrides(headers_dicts):
+    # copy 'type' to 'type_override' if it's not the default type (text)
+    # and there isn't already an override in place
+    for h in headers_dicts:
+        if h['type'] != 'text':
+            if 'info' in h:
+                if 'type_override' not in h['info']:
+                    h['info']['type_override'] = h['type']
+            else:
+                h['info'] = {'type_override': h['type']}
+
+
 def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     '''Loads an Excel file (or other tabular data recognized by tabulator)
     into Datastore and creates indexes.
@@ -311,9 +365,10 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     existing = datastore_resource_exists(resource_id)
     existing_info = None
     if existing:
+        existing_fields = existing.get('fields', [])
         existing_info = dict(
             (f['id'], f['info'])
-            for f in existing.get('fields', []) if 'info' in f)
+            for f in existing_fields if 'info' in f)
 
     # Some headers might have been converted from strings to floats and such.
     headers = encode_headers(headers)
@@ -349,16 +404,6 @@ def row_iterator():
                 yield data_row
         result = row_iterator()
 
-        '''
-        Delete existing datstore resource before proceeding. Otherwise
-        'datastore_create' will append to the existing datastore. And if
-        the fields have significantly changed, it may also fail.
-        '''
-        if existing:
-            logger.info('Deleting "{res_id}" from datastore.'.format(
-                res_id=resource_id))
-            delete_datastore_resource(resource_id)
-
         headers_dicts = [dict(id=field[0], type=TYPE_MAPPING[str(field[1])])
                          for field in zip(headers, types)]
 
@@ -372,8 +417,29 @@ def row_iterator():
                     if type_override in list(_TYPE_MAPPING.values()):
                         h['type'] = type_override
 
-        logger.info('Determined headers and types: {headers}'.format(
-            headers=headers_dicts))
+        # preserve any types that we have sniffed unless told otherwise
+        _save_type_overrides(headers_dicts)
+
+        logger.info('Determined headers and types: %s', headers_dicts)
+
+        '''
+        Delete or truncate existing datastore table before proceeding,
+        depending on whether any fields have changed.
+        Otherwise 'datastore_create' will append to the existing datastore.
+        And if the fields have significantly changed, it may also fail.
+        '''
+        if existing:
+            if _fields_match(headers_dicts, existing_fields, logger):
+                logger.info('Clearing records for "%s" from DataStore.', resource_id)
+                engine = get_write_engine()
+                connection = engine.connect()
+                try:
+                    connection.execute('TRUNCATE TABLE "{}"'.format(resource_id))
+                finally:
+                    connection.close()
+            else:
+                logger.info('Deleting "%s" from datastore.', resource_id)
+                delete_datastore_resource(resource_id)
 
         logger.info('Copying to database...')
         count = 0
@@ -382,7 +448,7 @@ def row_iterator():
         non_empty_types = ['timestamp', 'numeric']
         for i, records in enumerate(chunky(result, 250)):
             count += len(records)
-            logger.info('Saving chunk {number}'.format(number=i))
+            logger.info('Saving chunk %s', i)
             for row in records:
                 for column_index, column_name in enumerate(row):
                     if headers_dicts[column_index]['type'] in non_empty_types and row[column_name] == '':
@@ -391,8 +457,7 @@ def row_iterator():
         logger.info('...copying done')
 
     if count:
-        logger.info('Successfully pushed {n} entries to "{res_id}".'.format(
-                    n=count, res_id=resource_id))
+        logger.info('Successfully pushed %s entries to "%s".', count, resource_id)
     else:
         # no datastore table is created
         raise LoaderError('No entries found - nothing to load')

From 8fdfcc9be4cf6fcf5a9baae676bbad6cf4ec03c5 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 22 Dec 2023 11:39:22 +1000
Subject: [PATCH 055/102] [QOLSVC-3914] extract truncation logic to a helper
 function

---
 ckanext/xloader/loader.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index e9cf8e21..b815e7b5 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -107,6 +107,15 @@ def _fields_match(fields, existing_fields, logger):
     return True
 
 
+def _clear_datastore_resource(resource_id):
+    engine = get_write_engine()
+    connection = engine.connect()
+    try:
+        connection.execute('TRUNCATE TABLE "{}"'.format(resource_id))
+    finally:
+        connection.close()
+
+
 def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     '''Loads a CSV into DataStore. Does not create the indexes.'''
 
@@ -196,11 +205,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
             '''
             if _fields_match(fields, existing_fields, logger):
                 logger.info('Clearing records for "%s" from DataStore.', resource_id)
-                connection = engine.connect()
-                try:
-                    connection.execute('TRUNCATE TABLE "{}"'.format(resource_id))
-                finally:
-                    connection.close()
+                _clear_datastore_resource(resource_id)
             else:
                 logger.info('Deleting "%s" from DataStore.', resource_id)
                 delete_datastore_resource(resource_id)
@@ -431,12 +436,7 @@ def row_iterator():
         if existing:
             if _fields_match(headers_dicts, existing_fields, logger):
                 logger.info('Clearing records for "%s" from DataStore.', resource_id)
-                engine = get_write_engine()
-                connection = engine.connect()
-                try:
-                    connection.execute('TRUNCATE TABLE "{}"'.format(resource_id))
-                finally:
-                    connection.close()
+                _clear_datastore_resource(resource_id)
             else:
                 logger.info('Deleting "%s" from datastore.', resource_id)
                 delete_datastore_resource(resource_id)

From c30c73b92be2509cd22de084dca2aaeaee879f76 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 22 Dec 2023 11:53:24 +1000
Subject: [PATCH 056/102] [QOLSVC-3914] add more documentation

---
 ckanext/xloader/loader.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index b815e7b5..bf56431e 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -79,17 +79,22 @@ def detect_encoding(file_path):
 
 
 def _fields_match(fields, existing_fields, logger):
+    ''' Check whether all columns have the same names and types as previously,
+    independent of ordering.
+    '''
     # drop the generated '_id' field
     for index in range(len(existing_fields)):
         if existing_fields[index]['id'] == '_id':
             existing_fields.pop(index)
             break
 
+    # fail fast if number of fields doesn't match
     field_count = len(fields)
     if field_count != len(existing_fields):
         logger.info("Fields do not match; there are now %s fields but previously %s", field_count, len(existing_fields))
         return False
 
+    # ensure each field is present in both collections with the same type
     for index in range(field_count):
         field_id = fields[index]['id']
         for existing_index in range(field_count):
@@ -108,6 +113,8 @@ def _fields_match(fields, existing_fields, logger):
 
 
 def _clear_datastore_resource(resource_id):
+    ''' Delete all records from the datastore table, without dropping the table itself.
+    '''
     engine = get_write_engine()
     connection = engine.connect()
     try:

From 0db407da539bd83489b4e2c43f5f0adc70b4ab2f Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Thu, 28 Dec 2023 11:12:04 +1000
Subject: [PATCH 057/102] fix datastore delete action

- Move variable definitions inside the relevant blocks as they don't seem to be effective otherwise.
This results in the delete button submitting to the current URL, which refreshes the data, instead of deleting,
if the confirm-action JavaScript runs.
---
 ckanext/xloader/templates/xloader/resource_data.html | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/ckanext/xloader/templates/xloader/resource_data.html b/ckanext/xloader/templates/xloader/resource_data.html
index c4e3c369..73b452d8 100644
--- a/ckanext/xloader/templates/xloader/resource_data.html
+++ b/ckanext/xloader/templates/xloader/resource_data.html
@@ -4,15 +4,14 @@
 
 {% block primary_content_inner %}
 
-  {% set action = h.url_for('xloader.resource_data', id=pkg.name, resource_id=res.id) %}
-  {% set delete_action = h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) %}
   {% set show_table = true %}
 
   {% block delete_ds_button %}
     {% if res.datastore_active %}
+      {% set delete_action = h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) %}
       <form method="post" action="{{ delete_action }}" class="mb-3 d-inline-block pull-right">
         {{ h.csrf_input() if 'csrf_input' in h }}
-        <a href="{{ h.url_for('xloader.delete_datastore_table', id=pkg.id, resource_id=res.id) }}"
+        <a href="{{ delete_action }}"
           class="btn btn-danger pull-left"
           type="submit"
           data-module="confirm-action"
@@ -24,7 +23,7 @@
   {% endblock %}
 
   {% block upload_ds_button %}
-    <form method="post" action="{{ action }}" class="datapusher-form mb-3 d-inline-block">
+    <form method="post" action="{{ h.url_for('xloader.resource_data', id=pkg.name, resource_id=res.id) }}" class="datapusher-form mb-3 d-inline-block">
       {{ h.csrf_input() if 'csrf_input' in h }}
       <button class="btn btn-primary" name="save" type="submit">
         <i class="fa fa-cloud-upload"></i> {{ _('Upload to DataStore') }}

From d5583a3d87757229c6bfd54560c56cf3a85fb780 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 2 Jan 2024 11:30:00 +1000
Subject: [PATCH 058/102] [QOLSVC-3914] add unit testing of Tabulator
 populating type overrides

---
 ckanext/xloader/tests/test_loader.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index ffb3dcba..e024b315 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -949,6 +949,14 @@ def test_simple(self, Session):
             u"numeric",
             u"text",
         ]
+        # Check that the sniffed types have been recorded as overrides
+        rec = p.toolkit.get_action("datastore_search")(
+            None, {"resource_id": resource_id, "limit": 0}
+        )
+        fields = [f for f in rec["fields"] if not f["id"].startswith("_")]
+        assert fields[0].get("info", {}).get("type_override", "") == "timestamp"
+        assert fields[1].get("info", {}).get("type_override", "") == "numeric"
+        assert fields[2].get("info", {}).get("type_override", "") == ""
 
     # test disabled by default to avoid adding large file to repo and slow test
     @pytest.mark.skip

From 24033de80f55457368fb86fdab68b0a66c367776 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 12 Jan 2024 09:20:59 +1000
Subject: [PATCH 059/102] [QOLSVC-3902] reduce lock timeout to avoid deadlocks

- When clearing and reloading a resource, shorten the lock timeout so we fail fast instead of deadlocking
---
 ckanext/xloader/loader.py | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index bf56431e..ec7bf249 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -116,11 +116,9 @@ def _clear_datastore_resource(resource_id):
     ''' Delete all records from the datastore table, without dropping the table itself.
     '''
     engine = get_write_engine()
-    connection = engine.connect()
-    try:
-        connection.execute('TRUNCATE TABLE "{}"'.format(resource_id))
-    finally:
-        connection.close()
+    with engine.begin() as conn:
+        conn.execute("SET LOCAL lock_timeout = '5s'")
+        conn.execute('TRUNCATE TABLE "{}"'.format(resource_id))
 
 
 def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):

From b7aa0e50feeea25658d1508d899504abc7bc906d Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 12 Jan 2024 11:39:30 +1000
Subject: [PATCH 060/102] [QOLSVC-3902] replace try-finally structures with
 context manager

- No need to reinvent the 'with' construct
---
 ckanext/xloader/db.py   | 10 +---------
 ckanext/xloader/jobs.py |  5 +----
 2 files changed, 2 insertions(+), 13 deletions(-)

diff --git a/ckanext/xloader/db.py b/ckanext/xloader/db.py
index a3078ea4..a93eb0d8 100644
--- a/ckanext/xloader/db.py
+++ b/ckanext/xloader/db.py
@@ -191,9 +191,7 @@ def add_pending_job(job_id, job_type, api_key,
     if not metadata:
         metadata = {}
 
-    conn = ENGINE.connect()
-    trans = conn.begin()
-    try:
+    with ENGINE.begin() as conn:
         conn.execute(JOBS_TABLE.insert().values(
             job_id=job_id,
             job_type=job_type,
@@ -225,12 +223,6 @@ def add_pending_job(job_id, job_type, api_key,
             )
         if inserts:
             conn.execute(METADATA_TABLE.insert(), inserts)
-        trans.commit()
-    except Exception:
-        trans.rollback()
-        raise
-    finally:
-        conn.close()
 
 
 class InvalidErrorObjectError(Exception):
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 9c6e0a67..ddd39892 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -541,8 +541,7 @@ def __init__(self, task_id, input):
         self.input = input
 
     def emit(self, record):
-        conn = db.ENGINE.connect()
-        try:
+        with db.ENGINE.connect() as conn:
             # Turn strings into unicode to stop SQLAlchemy
             # "Unicode type received non-unicode bind param value" warnings.
             message = str(record.getMessage())
@@ -558,8 +557,6 @@ def emit(self, record):
                 module=module,
                 funcName=funcName,
                 lineno=record.lineno))
-        finally:
-            conn.close()
 
 
 class DatetimeJsonEncoder(json.JSONEncoder):

From d39d5cd584c75fb511cb88a929ee9c617d74e16e Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Wed, 24 Jan 2024 15:22:34 +1000
Subject: [PATCH 061/102] [QOLSVC-3902] retry jobs that fail due to locking
 problems

- When a job fails due to lock-related errors, send it to the back of the queue instead of dropping it
---
 ckanext/xloader/jobs.py | 25 ++++++++++++++++++++++++-
 1 file changed, 24 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index ddd39892..f1d7467f 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -11,13 +11,14 @@
 import traceback
 import sys
 
+from psycopg2 import errors
 from six.moves.urllib.parse import urlsplit
 import requests
 from rq import get_current_job
 import sqlalchemy as sa
 
 from ckan import model
-from ckan.plugins.toolkit import get_action, asbool, ObjectNotFound, config
+from ckan.plugins.toolkit import get_action, asbool, enqueue_job, ObjectNotFound, config
 
 from . import db, loader
 from .job_exceptions import JobError, HTTPError, DataTooBigError, FileCouldNotBeLoadedError
@@ -41,6 +42,15 @@
 CHUNK_SIZE = 16 * 1024  # 16kb
 DOWNLOAD_TIMEOUT = 30
 
+RETRYABLE_ERRORS = (
+    errors.DeadlockDetected,
+    errors.LockNotAvailable,
+    errors.ObjectInUse,
+)
+# Retries can only occur in cases where the datastore entry exists,
+# so use the standard timeout
+RETRIED_JOB_TIMEOUT = config.get('ckanext.xloader.job_timeout', '3600')
+
 
 # input = {
 # 'api_key': user['apikey'],
@@ -87,6 +97,19 @@ def xloader_data_into_datastore(input):
         log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc()))
         errored = True
     except Exception as e:
+        if isinstance(e, RETRYABLE_ERRORS):
+            tries = job_dict['metadata'].get('tries', 0)
+            if tries == 0:
+                log.info("Job %s failed due to temporary error [%s], retrying", job_id, e)
+                job_dict['status'] = 'pending'
+                job_dict['metadata']['tries'] = tries + 1
+                enqueue_job(
+                    xloader_data_into_datastore,
+                    [input],
+                    rq_kwargs=dict(timeout=RETRIED_JOB_TIMEOUT)
+                )
+                return None
+
         db.mark_job_as_errored(
             job_id, traceback.format_tb(sys.exc_info()[2])[-1] + repr(e))
         job_dict['status'] = 'error'

From 325b6f21b8e7f78a46fff72356331d61a16d80ae Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Wed, 24 Jan 2024 16:39:05 +1000
Subject: [PATCH 062/102] [QOLSVC-3902] pre-emptively lock resource for editing
 when updating metadata

- When retrieving the resource extras, set the 'FOR UPDATE' flag so we get a write-lock immediately.
This avoids the risk of deadlocking if two processes try to update the same resource at the same time.
---
 ckanext/xloader/utils.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py
index 994e6754..a51a7f0a 100644
--- a/ckanext/xloader/utils.py
+++ b/ckanext/xloader/utils.py
@@ -113,6 +113,7 @@ def set_resource_metadata(update_dict):
     # better fix
 
     q = model.Session.query(model.Resource). \
+        with_for_update(of=model.Resource). \
         filter(model.Resource.id == update_dict['resource_id'])
     resource = q.one()
 

From edcace556977ff5af1d268642f3557ce777c7dcb Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Thu, 25 Jan 2024 14:53:29 +1000
Subject: [PATCH 063/102] [QOLDEV-708] document the PostgreSQL datestyle
 setting

- This affects the behaviour of COPY, so it is very relevant to XLoader
---
 README.rst | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/README.rst b/README.rst
index b4f113ae..f5a56de7 100644
--- a/README.rst
+++ b/README.rst
@@ -191,6 +191,11 @@ Configuration:
 
 See the extension's `config_declaration.yaml <ckanext/xloader/config_declaration.yaml>`_ file.
 
+You may also wish to configure the database to use your preferred date input style on COPY.
+For example, to make [PostgreSQL](https://www.postgresql.org/docs/current/runtime-config-client.html#RUNTIME-CONFIG-CLIENT-FORMAT)
+expect European (day-first) dates, you could add to ``postgresql.conf``:
+
+    datestyle=ISO,DMY
 
 ------------------------
 Developer installation

From 29de39cf594a36bf5790ca77c1fa4a3b82607d15 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Mon, 29 Jan 2024 21:14:39 +0000
Subject: [PATCH 064/102] feat(misc): config description;

- Added to config declaration description.
---
 ckanext/xloader/config_declaration.yaml | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/ckanext/xloader/config_declaration.yaml b/ckanext/xloader/config_declaration.yaml
index 867cc9ae..66888050 100644
--- a/ckanext/xloader/config_declaration.yaml
+++ b/ckanext/xloader/config_declaration.yaml
@@ -52,6 +52,8 @@ groups:
         description: |
             Use with ckanext.xloader.use_type_guessing to set strict true or false
             for type guessing. If set to False, the types will always fallback to string type.
+
+            Strict means that a type will not be guessed if parsing fails for a single cell in the column.
         type: bool
       - key: ckanext.xloader.max_type_guessing_length
         default: 0

From 8b756c5032c6f70639f525d90ce5203fccb9d7ae Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 30 Jan 2024 16:07:55 +1000
Subject: [PATCH 065/102] [QOLCHG-440] add another test case for type guessing

---
 ckanext/xloader/tests/test_loader.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index e024b315..a0c478ff 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -958,6 +958,24 @@ def test_simple(self, Session):
         assert fields[1].get("info", {}).get("type_override", "") == "numeric"
         assert fields[2].get("info", {}).get("type_override", "") == ""
 
+    def test_simple_large_file(self, Session):
+        csv_filepath = get_sample_filepath("simple-large.csv")
+        resource = factories.Resource()
+        resource_id = resource['id']
+        fields = loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert self._get_column_types(Session, resource_id) == [
+            u"int4",
+            u"tsvector",
+            u"numeric",
+            u"text",
+        ]
+
+
     # test disabled by default to avoid adding large file to repo and slow test
     @pytest.mark.skip
     def test_boston_311_complete(self):

From 8d6c1692afa8e77afe7bbb57fe96b6887a2b86f6 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 30 Jan 2024 17:39:48 +1000
Subject: [PATCH 066/102] [QOLCHG-440] include testing of '0' values in type
 guessing

---
 ckanext/xloader/tests/samples/simple-large.csv | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/tests/samples/simple-large.csv b/ckanext/xloader/tests/samples/simple-large.csv
index 53d3fb24..46c6c3b9 100644
--- a/ckanext/xloader/tests/samples/simple-large.csv
+++ b/ckanext/xloader/tests/samples/simple-large.csv
@@ -1,4 +1,5 @@
 id,text
+0,-
 1,a
 2,b
 3,c
@@ -49997,4 +49998,4 @@ id,text
 49996,x
 49997,y
 49998,z
-49999,a
\ No newline at end of file
+49999,a

From d2d6acf305681be251766718032aaf8267ea5072 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 30 Jan 2024 17:44:01 +1000
Subject: [PATCH 067/102] [QOLCHG-440] allow zero to be recognised as a valid
 number

---
 ckanext/xloader/parser.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/parser.py b/ckanext/xloader/parser.py
index 812ccd1f..11e756cd 100644
--- a/ckanext/xloader/parser.py
+++ b/ckanext/xloader/parser.py
@@ -36,7 +36,9 @@ def convert_types(self, extended_rows):
                 cell_type = self.types[cell_index] if self.types else None
                 if cell_type in [Decimal, None]:
                     converted_value = to_number(cell_value)
-                    if converted_value:
+                    # Can't do a simple truthiness check,
+                    # because 0 is a valid numeric result.
+                    if converted_value is not None:
                         row[cell_index] = converted_value
                         continue
                 if cell_type in [datetime.datetime, None]:

From 523df62ee68641becbb2551f278b5cc37823d6b5 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Wed, 31 Jan 2024 12:07:55 +1000
Subject: [PATCH 068/102] [QOLSVC-4689] add CLI option to process datasets
 immediately, #202

- This will allow eg support staff processing a critical resource immediately while a large job is occupying the queue
---
 ckanext/xloader/cli.py               | 10 +++--
 ckanext/xloader/command.py           | 48 ++++++++++++++---------
 ckanext/xloader/jobs.py              | 58 ++++++++++++++--------------
 ckanext/xloader/tests/test_loader.py |  2 +-
 ckanext/xloader/utils.py             |  4 +-
 5 files changed, 69 insertions(+), 53 deletions(-)

diff --git a/ckanext/xloader/cli.py b/ckanext/xloader/cli.py
index 6b54482a..f4cf8659 100644
--- a/ckanext/xloader/cli.py
+++ b/ckanext/xloader/cli.py
@@ -26,7 +26,9 @@ def status():
 @click.argument(u'dataset-spec')
 @click.option('-y', is_flag=True, default=False, help='Always answer yes to questions')
 @click.option('--dry-run', is_flag=True, default=False, help='Don\'t actually submit any resources')
-def submit(dataset_spec, y, dry_run):
+@click.option('--sync', is_flag=True, default=False,
+              help='Execute immediately instead of enqueueing for asynchronous processing')
+def submit(dataset_spec, y, dry_run, sync):
     """
         xloader submit [options] <dataset-spec>
     """
@@ -34,15 +36,15 @@ def submit(dataset_spec, y, dry_run):
 
     if dataset_spec == 'all':
         cmd._setup_xloader_logger()
-        cmd._submit_all()
+        cmd._submit_all(sync=sync)
     elif dataset_spec == 'all-existing':
         _confirm_or_abort(y, dry_run)
         cmd._setup_xloader_logger()
-        cmd._submit_all_existing()
+        cmd._submit_all_existing(sync=sync)
     else:
         pkg_name_or_id = dataset_spec
         cmd._setup_xloader_logger()
-        cmd._submit_package(pkg_name_or_id)
+        cmd._submit_package(pkg_name_or_id, sync=sync)
 
     if cmd.error_occured:
         print('Finished but saw errors - see above for details')
diff --git a/ckanext/xloader/command.py b/ckanext/xloader/command.py
index 64b79754..d1d687ca 100644
--- a/ckanext/xloader/command.py
+++ b/ckanext/xloader/command.py
@@ -3,6 +3,8 @@
 import sys
 import logging
 import ckan.plugins.toolkit as tk
+
+from ckanext.xloader.jobs import xloader_data_into_datastore_
 from ckanext.xloader.utils import XLoaderFormats
 
 
@@ -23,7 +25,7 @@ def _setup_xloader_logger(self):
         logger.setLevel(logging.DEBUG)
         logger.propagate = False  # in case the config
 
-    def _submit_all_existing(self):
+    def _submit_all_existing(self, sync=False):
         from ckanext.datastore.backend \
             import get_all_resources_ids_in_datastore
         resource_ids = get_all_resources_ids_in_datastore()
@@ -38,9 +40,9 @@ def _submit_all_existing(self):
                 print('  Skipping resource {} found in datastore but not in '
                       'metadata'.format(resource_id))
                 continue
-            self._submit_resource(resource_dict, user, indent=2)
+            self._submit_resource(resource_dict, user, indent=2, sync=sync)
 
-    def _submit_all(self):
+    def _submit_all(self, sync=False):
         # submit every package
         # for each package in the package list,
         #   submit each resource w/ _submit_package
@@ -51,9 +53,9 @@ def _submit_all(self):
         user = tk.get_action('get_site_user')(
             {'ignore_auth': True}, {})
         for p_id in package_list:
-            self._submit_package(p_id, user, indent=2)
+            self._submit_package(p_id, user, indent=2, sync=sync)
 
-    def _submit_package(self, pkg_id, user=None, indent=0):
+    def _submit_package(self, pkg_id, user=None, indent=0, sync=False):
         indentation = ' ' * indent
         if not user:
             user = tk.get_action('get_site_user')(
@@ -73,15 +75,15 @@ def _submit_package(self, pkg_id, user=None, indent=0):
         for resource in pkg['resources']:
             try:
                 resource['package_name'] = pkg['name']  # for debug output
-                self._submit_resource(resource, user, indent=indent + 2)
+                self._submit_resource(resource, user, indent=indent + 2, sync=sync)
             except Exception as e:
                 self.error_occured = True
-                print(e)
+                print(str(e))
                 print(indentation + 'ERROR submitting resource "{}" '.format(
                     resource['id']))
                 continue
 
-    def _submit_resource(self, resource, user, indent=0):
+    def _submit_resource(self, resource, user, indent=0, sync=False):
         '''resource: resource dictionary
         '''
         indentation = ' ' * indent
@@ -99,23 +101,33 @@ def _submit_resource(self, resource, user, indent=0):
                       r=resource))
             return
         dataset_ref = resource.get('package_name', resource['package_id'])
-        print('{indent}Submitting /dataset/{dataset}/resource/{r[id]}\n'
+        print('{indent}{sync_style} /dataset/{dataset}/resource/{r[id]}\n'
               '{indent}           url={r[url]}\n'
               '{indent}           format={r[format]}'
-              .format(dataset=dataset_ref, r=resource, indent=indentation))
+              .format(sync_style='Processing' if sync else 'Submitting',
+                      dataset=dataset_ref, r=resource, indent=indentation))
+        if self.dry_run:
+            print(indentation + '(not submitted - dry-run)')
+            return
         data_dict = {
             'resource_id': resource['id'],
             'ignore_hash': True,
         }
-        if self.dry_run:
-            print(indentation + '(not submitted - dry-run)')
-            return
-        success = tk.get_action('xloader_submit')({'user': user['name']}, data_dict)
-        if success:
-            print(indentation + '...ok')
+        if sync:
+            data_dict['ckan_url'] = tk.config.get('ckan.site_url')
+            input_dict = {
+                'metadata': data_dict,
+                'api_key': 'TODO'
+            }
+            logger = logging.getLogger('ckanext.xloader.cli')
+            xloader_data_into_datastore_(input_dict, None, logger)
         else:
-            print(indentation + 'ERROR submitting resource')
-            self.error_occured = True
+            success = tk.get_action('xloader_submit')({'user': user['name']}, data_dict)
+            if success:
+                print(indentation + '...ok')
+            else:
+                print(indentation + 'ERROR submitting resource')
+                self.error_occured = True
 
     def print_status(self):
         import ckan.lib.jobs as rq_jobs
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index f1d7467f..e7181911 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -86,15 +86,36 @@ def xloader_data_into_datastore(input):
 
     job_id = get_current_job().id
     errored = False
+
+    # Set-up logging to the db
+    handler = StoringHandler(job_id, input)
+    level = logging.DEBUG
+    handler.setLevel(level)
+    logger = logging.getLogger(job_id)
+    handler.setFormatter(logging.Formatter('%(message)s'))
+    logger.addHandler(handler)
+    # also show logs on stderr
+    logger.addHandler(logging.StreamHandler())
+    logger.setLevel(logging.DEBUG)
+
+    db.init(config)
     try:
-        xloader_data_into_datastore_(input, job_dict)
+        # Store details of the job in the db
+        db.add_pending_job(job_id, **input)
+        xloader_data_into_datastore_(input, job_dict, logger)
         job_dict['status'] = 'complete'
         db.mark_job_as_completed(job_id, job_dict)
+    except sa.exc.IntegrityError as e:
+        db.mark_job_as_errored(job_id, str(e))
+        job_dict['status'] = 'error'
+        job_dict['error'] = str(e)
+        log.error('xloader error: job_id %s already exists', job_id)
+        errored = True
     except JobError as e:
         db.mark_job_as_errored(job_id, str(e))
         job_dict['status'] = 'error'
         job_dict['error'] = str(e)
-        log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc()))
+        log.error('xloader error: %s, %s', e, traceback.format_exc())
         errored = True
     except Exception as e:
         if isinstance(e, RETRYABLE_ERRORS):
@@ -114,7 +135,7 @@ def xloader_data_into_datastore(input):
             job_id, traceback.format_tb(sys.exc_info()[2])[-1] + repr(e))
         job_dict['status'] = 'error'
         job_dict['error'] = str(e)
-        log.error('xloader error: {0}, {1}'.format(e, traceback.format_exc()))
+        log.error('xloader error: %s, %s', e, traceback.format_exc())
         errored = True
     finally:
         # job_dict is defined in xloader_hook's docstring
@@ -125,7 +146,7 @@ def xloader_data_into_datastore(input):
     return 'error' if errored else None
 
 
-def xloader_data_into_datastore_(input, job_dict):
+def xloader_data_into_datastore_(input, job_dict, logger):
     '''This function:
     * downloads the resource (metadata) from CKAN
     * downloads the data
@@ -134,26 +155,6 @@ def xloader_data_into_datastore_(input, job_dict):
 
     (datapusher called this function 'push_to_datastore')
     '''
-    job_id = get_current_job().id
-    db.init(config)
-
-    # Store details of the job in the db
-    try:
-        db.add_pending_job(job_id, **input)
-    except sa.exc.IntegrityError:
-        raise JobError('job_id {} already exists'.format(job_id))
-
-    # Set-up logging to the db
-    handler = StoringHandler(job_id, input)
-    level = logging.DEBUG
-    handler.setLevel(level)
-    logger = logging.getLogger(job_id)
-    handler.setFormatter(logging.Formatter('%(message)s'))
-    logger.addHandler(handler)
-    # also show logs on stderr
-    logger.addHandler(logging.StreamHandler())
-    logger.setLevel(logging.DEBUG)
-
     validate_input(input)
 
     data = input['metadata']
@@ -197,10 +198,11 @@ def direct_load():
         loader.calculate_record_count(
             resource_id=resource['id'], logger=logger)
         set_datastore_active(data, resource, logger)
-        job_dict['status'] = 'running_but_viewable'
-        callback_xloader_hook(result_url=input['result_url'],
-                              api_key=api_key,
-                              job_dict=job_dict)
+        if 'result_url' in input:
+            job_dict['status'] = 'running_but_viewable'
+            callback_xloader_hook(result_url=input['result_url'],
+                                  api_key=api_key,
+                                  job_dict=job_dict)
         logger.info('Data now available to users: %s', resource_ckan_url)
         loader.create_column_indexes(
             fields=fields,
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index a0c478ff..96293993 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -962,7 +962,7 @@ def test_simple_large_file(self, Session):
         csv_filepath = get_sample_filepath("simple-large.csv")
         resource = factories.Resource()
         resource_id = resource['id']
-        fields = loader.load_table(
+        loader.load_table(
             csv_filepath,
             resource_id=resource_id,
             mimetype="text/csv",
diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py
index 5c71b156..7e414e79 100644
--- a/ckanext/xloader/utils.py
+++ b/ckanext/xloader/utils.py
@@ -13,6 +13,8 @@
 import ckan.plugins as p
 from ckan.plugins.toolkit import config
 
+from .job_exceptions import JobError
+
 # resource.formats accepted by ckanext-xloader. Must be lowercase here.
 DEFAULT_FORMATS = [
     "csv",
@@ -26,8 +28,6 @@
     "application/vnd.oasis.opendocument.spreadsheet",
 ]
 
-from .job_exceptions import JobError
-
 
 class XLoaderFormats(object):
     formats = None

From 7fabca4e0bd15bc4f3f8d571c44533b74757c70d Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Wed, 31 Jan 2024 16:47:37 +0000
Subject: [PATCH 069/102] fix(syntax): flake8;

- Syntax fixes from flake8.
---
 ckanext/xloader/plugin.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index 0ef0ea50..dcb38c73 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -4,8 +4,10 @@
 
 from ckan import plugins
 from ckan.plugins import toolkit
+
 from ckan.model.domain_object import DomainObjectOperation
 from ckan.model.resource import Resource
+from ckan.model.package import Package
 
 from . import action, auth, helpers as xloader_helpers, utils
 from ckanext.xloader.utils import XLoaderFormats
@@ -69,14 +71,15 @@ def configure(self, config_):
     # IDomainObjectModification
 
     def notify(self, entity, operation):
-        # type: (ckan.model.Package|ckan.model.Resource, DomainObjectOperation) -> None
+        # type: (Package|Resource, DomainObjectOperation) -> None
         """
         Runs before_commit to database for Packages and Resources.
         We only want to check for changed Resources for this.
         We want to check if values have changed, namely the url and the format.
         See: ckan/model/modification.py.DomainObjectModificationExtension
         """
-        if operation != DomainObjectOperation.changed or not isinstance(entity, Resource):
+        if operation != DomainObjectOperation.changed \
+                or not isinstance(entity, Resource):
             return
 
         context = {
@@ -206,9 +209,9 @@ def _should_remove_unsupported_resource_from_datastore(res_dict):
         return False
     return (not XLoaderFormats.is_it_an_xloader_format(res_dict.get('format', u''))
             and (res_dict.get('url_type') == 'upload'
-                or not res_dict.get('url_type'))
+                 or not res_dict.get('url_type'))
             and (toolkit.asbool(res_dict.get('datastore_active', False))
-                or toolkit.asbool(res_dict.get('extras', {}).get('datastore_active', False))))
+                 or toolkit.asbool(res_dict.get('extras', {}).get('datastore_active', False))))
 
 
 def _remove_unsupported_resource_from_datastore(resource_id):
@@ -227,7 +230,7 @@ def _remove_unsupported_resource_from_datastore(resource_id):
 
     if _should_remove_unsupported_resource_from_datastore(res):
         log.info('Unsupported resource format "%s". Deleting datastore tables for resource %s',
-            res.get(u'format', u''), res['id'])
+                 res.get(u'format', u''), res['id'])
         try:
             toolkit.get_action('datastore_delete')(context, {
                 "resource_id": res['id'],

From cef20a919b48853b7f1126a0c49fd6620109780a Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Wed, 31 Jan 2024 18:20:08 +0000
Subject: [PATCH 070/102] feat(tests): added new test;

- Added new test for should remove unsupported resources from datastore.
---
 ckanext/xloader/plugin.py            |  1 +
 ckanext/xloader/tests/test_plugin.py | 60 ++++++++++++++++++++++++++++
 2 files changed, 61 insertions(+)

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index ec234abb..e0ce027e 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -96,6 +96,7 @@ def notify(self, entity, operation):
             toolkit.enqueue_job(fn=_remove_unsupported_resource_from_datastore, args=[entity.id])
 
         if not getattr(entity, 'url_changed', False):
+            # do not submit to xloader if the url has not changed.
             return
 
         self._submit_to_xloader(resource_dict)
diff --git a/ckanext/xloader/tests/test_plugin.py b/ckanext/xloader/tests/test_plugin.py
index 05b83b5b..8988e750 100644
--- a/ckanext/xloader/tests/test_plugin.py
+++ b/ckanext/xloader/tests/test_plugin.py
@@ -9,6 +9,21 @@
 from six import text_type as str
 from ckan.tests import helpers, factories
 from ckan.logic import _actions
+from ckanext.xloader.plugin import _should_remove_unsupported_resource_from_datastore
+
+
+@pytest.fixture
+def mock_toolkit_config(request):
+    with mock.patch('ckan.plugins.toolkit.config.get') as mock_get:
+        mock_get.return_value = request.param
+        yield mock_get
+
+
+@pytest.fixture
+def mock_xloader_formats(request):
+    with mock.patch('ckanext.xloader.utils.XLoaderFormats.is_it_an_xloader_format') as mock_is_xloader_format:
+        mock_is_xloader_format.return_value = request.param
+        yield mock_is_xloader_format
 
 
 @pytest.mark.usefixtures("clean_db", "with_plugins")
@@ -58,6 +73,51 @@ def test_submit_when_url_changes(self, monkeypatch):
 
         assert func.called
 
+    @pytest.mark.parametrize("toolkit_config_value, xloader_formats_value, url_type, datastore_active, expected_result",
+                             [(True, True, 'upload', True, True),  # Test1
+                              (True, False, 'upload', True, False),  # Test2
+                              (False, True, 'upload', True, False),  # Test3
+                              (False, False, 'upload', True, False),  # Test4
+                              (True, True, 'custom_type', True, False),  # Test5
+                              (True, True, 'upload', False, False),  # Test6
+                              (True, True, '', True, True),  # Test7
+                              (True, True, None, True, True),  # Test8
+                             ])
+    def test_should_remove_unsupported_resource_from_datastore(
+        mock_toolkit_config, mock_xloader_formats, toolkit_config_value,
+        xloader_formats_value, url_type, datastore_active, expected_result):
+
+        # Test1: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='upload', datastore_active=True, expected_result=True
+        #   Should pass as it is an Xloader format and supported url type and datastore active.
+        # Test2: clean_datastore_tables=True, is_it_an_xloader_format=False, url_type='upload', datastore_active=True, expected_result=False
+        #   Should fail as it is not a supported Xloader format.
+        # Test3: clean_datastore_tables=False, is_it_an_xloader_format=True, url_type='upload', datastore_active=True, expected_result=False
+        #   Should fail as the config option is turned off.
+        # Test4: clean_datastore_tables=False, is_it_an_xloader_format=False, url_type='upload', datastore_active=True, expected_result=False
+        #   Should fail as the config option is turned off and the Xloader format is not supported.
+        # Test5: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='custom_type', datastore_active=True, expected_result=False
+        #   Should fail as the url_type is not supported.
+        # Test6: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='upload', datastore_active=False, expected_result=False
+        #   Should fail as datastore is inactive.
+        # Test7: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='', datastore_active=True, expected_result=True
+        #   Should pass as it is an Xloader format and supported url type and datastore active.
+        # Test8: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type=None, datastore_active=True, expected_result=True
+        #   Should pass as it is an Xloader format and supported url type as falsy and datastore active.
+
+        # Setup mock data
+        res_dict = {
+            'format': 'some_format',
+            'url_type': url_type,
+            'datastore_active': True,
+            'extras': {'datastore_active': True}
+        }
+
+        # Call the function
+        result = _should_remove_unsupported_resource_from_datastore(res_dict)
+
+        # Assert the result based on the logic paths covered
+        assert result == expected_result
+
     def _pending_task(self, resource_id):
         return {
             "entity_id": resource_id,

From 9c79600a8e36b08ad34ca80d6969f480f8b5a610 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Wed, 31 Jan 2024 16:52:21 +1000
Subject: [PATCH 071/102] [QOLSVC-4689] add option to specify custom execution
 queue

- This is currently only available to sysadmins or via CLI
---
 ckanext/xloader/action.py            |  6 +++---
 ckanext/xloader/auth.py              |  7 +++++++
 ckanext/xloader/cli.py               |  9 +++++----
 ckanext/xloader/command.py           | 16 +++++++++-------
 ckanext/xloader/tests/test_action.py | 20 ++++++++++++++++++++
 5 files changed, 44 insertions(+), 14 deletions(-)

diff --git a/ckanext/xloader/action.py b/ckanext/xloader/action.py
index e45394a9..7cc3d341 100644
--- a/ckanext/xloader/action.py
+++ b/ckanext/xloader/action.py
@@ -46,13 +46,13 @@ def xloader_submit(context, data_dict):
 
     :rtype: bool
     '''
+    p.toolkit.check_access('xloader_submit', context, data_dict)
+    custom_queue = data_dict.pop('queue', rq_jobs.DEFAULT_QUEUE_NAME)
     schema = context.get('schema', ckanext.xloader.schema.xloader_submit_schema())
     data_dict, errors = _validate(data_dict, schema, context)
     if errors:
         raise p.toolkit.ValidationError(errors)
 
-    p.toolkit.check_access('xloader_submit', context, data_dict)
-
     res_id = data_dict['resource_id']
     try:
         resource_dict = p.toolkit.get_action('resource_show')(context, {
@@ -160,7 +160,7 @@ def xloader_submit(context, data_dict):
 
     try:
         job = enqueue_job(
-            jobs.xloader_data_into_datastore, [data], rq_kwargs=dict(timeout=timeout)
+            jobs.xloader_data_into_datastore, [data], queue=custom_queue, rq_kwargs=dict(timeout=timeout)
         )
     except Exception:
         log.exception('Unable to enqueued xloader res_id=%s', res_id)
diff --git a/ckanext/xloader/auth.py b/ckanext/xloader/auth.py
index c40127a0..2547db6d 100644
--- a/ckanext/xloader/auth.py
+++ b/ckanext/xloader/auth.py
@@ -1,7 +1,14 @@
+from ckan import authz
+from ckan.lib import jobs as rq_jobs
+
 import ckanext.datastore.logic.auth as auth
 
 
 def xloader_submit(context, data_dict):
+    # only sysadmins can specify a custom processing queue
+    custom_queue = data_dict.get('queue')
+    if custom_queue and custom_queue != rq_jobs.DEFAULT_QUEUE_NAME:
+        return authz.is_authorized('config_option_update', context, data_dict)
     return auth.datastore_auth(context, data_dict)
 
 
diff --git a/ckanext/xloader/cli.py b/ckanext/xloader/cli.py
index f4cf8659..916db577 100644
--- a/ckanext/xloader/cli.py
+++ b/ckanext/xloader/cli.py
@@ -26,9 +26,10 @@ def status():
 @click.argument(u'dataset-spec')
 @click.option('-y', is_flag=True, default=False, help='Always answer yes to questions')
 @click.option('--dry-run', is_flag=True, default=False, help='Don\'t actually submit any resources')
+@click.option('--queue', help='Queue name for asynchronous processing, unused if executing immediately')
 @click.option('--sync', is_flag=True, default=False,
               help='Execute immediately instead of enqueueing for asynchronous processing')
-def submit(dataset_spec, y, dry_run, sync):
+def submit(dataset_spec, y, dry_run, queue, sync):
     """
         xloader submit [options] <dataset-spec>
     """
@@ -36,15 +37,15 @@ def submit(dataset_spec, y, dry_run, sync):
 
     if dataset_spec == 'all':
         cmd._setup_xloader_logger()
-        cmd._submit_all(sync=sync)
+        cmd._submit_all(sync=sync, queue=queue)
     elif dataset_spec == 'all-existing':
         _confirm_or_abort(y, dry_run)
         cmd._setup_xloader_logger()
-        cmd._submit_all_existing(sync=sync)
+        cmd._submit_all_existing(sync=sync, queue=queue)
     else:
         pkg_name_or_id = dataset_spec
         cmd._setup_xloader_logger()
-        cmd._submit_package(pkg_name_or_id, sync=sync)
+        cmd._submit_package(pkg_name_or_id, sync=sync, queue=queue)
 
     if cmd.error_occured:
         print('Finished but saw errors - see above for details')
diff --git a/ckanext/xloader/command.py b/ckanext/xloader/command.py
index d1d687ca..4c0f2d2f 100644
--- a/ckanext/xloader/command.py
+++ b/ckanext/xloader/command.py
@@ -25,7 +25,7 @@ def _setup_xloader_logger(self):
         logger.setLevel(logging.DEBUG)
         logger.propagate = False  # in case the config
 
-    def _submit_all_existing(self, sync=False):
+    def _submit_all_existing(self, sync=False, queue=None):
         from ckanext.datastore.backend \
             import get_all_resources_ids_in_datastore
         resource_ids = get_all_resources_ids_in_datastore()
@@ -40,9 +40,9 @@ def _submit_all_existing(self, sync=False):
                 print('  Skipping resource {} found in datastore but not in '
                       'metadata'.format(resource_id))
                 continue
-            self._submit_resource(resource_dict, user, indent=2, sync=sync)
+            self._submit_resource(resource_dict, user, indent=2, sync=sync, queue=queue)
 
-    def _submit_all(self, sync=False):
+    def _submit_all(self, sync=False, queue=None):
         # submit every package
         # for each package in the package list,
         #   submit each resource w/ _submit_package
@@ -53,9 +53,9 @@ def _submit_all(self, sync=False):
         user = tk.get_action('get_site_user')(
             {'ignore_auth': True}, {})
         for p_id in package_list:
-            self._submit_package(p_id, user, indent=2, sync=sync)
+            self._submit_package(p_id, user, indent=2, sync=sync, queue=queue)
 
-    def _submit_package(self, pkg_id, user=None, indent=0, sync=False):
+    def _submit_package(self, pkg_id, user=None, indent=0, sync=False, queue=None):
         indentation = ' ' * indent
         if not user:
             user = tk.get_action('get_site_user')(
@@ -75,7 +75,7 @@ def _submit_package(self, pkg_id, user=None, indent=0, sync=False):
         for resource in pkg['resources']:
             try:
                 resource['package_name'] = pkg['name']  # for debug output
-                self._submit_resource(resource, user, indent=indent + 2, sync=sync)
+                self._submit_resource(resource, user, indent=indent + 2, sync=sync, queue=queue)
             except Exception as e:
                 self.error_occured = True
                 print(str(e))
@@ -83,7 +83,7 @@ def _submit_package(self, pkg_id, user=None, indent=0, sync=False):
                     resource['id']))
                 continue
 
-    def _submit_resource(self, resource, user, indent=0, sync=False):
+    def _submit_resource(self, resource, user, indent=0, sync=False, queue=None):
         '''resource: resource dictionary
         '''
         indentation = ' ' * indent
@@ -122,6 +122,8 @@ def _submit_resource(self, resource, user, indent=0, sync=False):
             logger = logging.getLogger('ckanext.xloader.cli')
             xloader_data_into_datastore_(input_dict, None, logger)
         else:
+            if queue:
+                data_dict['queue'] = queue
             success = tk.get_action('xloader_submit')({'user': user['name']}, data_dict)
             if success:
                 print(indentation + '...ok')
diff --git a/ckanext/xloader/tests/test_action.py b/ckanext/xloader/tests/test_action.py
index 71f4ad01..8b0e2729 100644
--- a/ckanext/xloader/tests/test_action.py
+++ b/ckanext/xloader/tests/test_action.py
@@ -4,6 +4,7 @@
 except ImportError:
     import mock
 
+from ckan.plugins.toolkit import NotAuthorized
 from ckan.tests import helpers, factories
 
 from ckanext.xloader.utils import get_xloader_user_apitoken
@@ -30,6 +31,25 @@ def test_submit(self):
             )
             assert 1 == enqueue_mock.call_count
 
+    def test_submit_to_custom_queue_without_auth(self):
+        # check that xloader_submit doesn't allow regular users to change queues
+        user = factories.User()
+        with pytest.raises(NotAuthorized):
+            helpers.call_auth(
+                "xloader_submit",
+                context=dict(user=user["name"], model=None),
+                queue='foo',
+            )
+
+    def test_submit_to_custom_queue_as_sysadmin(self):
+        # check that xloader_submit allows sysadmins to change queues
+        user = factories.Sysadmin()
+        assert helpers.call_auth(
+            "xloader_submit",
+            context=dict(user=user["name"], model=None),
+            queue='foo',
+        ) is True
+
     def test_duplicated_submits(self):
         def submit(res, user):
             return helpers.call_action(

From d7236fe365fa9eb7397b96c3fdf38efb20b2473e Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 1 Feb 2024 15:45:12 +0000
Subject: [PATCH 072/102] fix(tests): finalized test method;

- Finalized fixtures and params for new test method.
---
 ckanext/xloader/tests/test_plugin.py | 63 ++++++++++++++--------------
 1 file changed, 31 insertions(+), 32 deletions(-)

diff --git a/ckanext/xloader/tests/test_plugin.py b/ckanext/xloader/tests/test_plugin.py
index 8988e750..e979d2e2 100644
--- a/ckanext/xloader/tests/test_plugin.py
+++ b/ckanext/xloader/tests/test_plugin.py
@@ -9,14 +9,19 @@
 from six import text_type as str
 from ckan.tests import helpers, factories
 from ckan.logic import _actions
+from ckan.plugins import toolkit
 from ckanext.xloader.plugin import _should_remove_unsupported_resource_from_datastore
 
 
 @pytest.fixture
-def mock_toolkit_config(request):
-    with mock.patch('ckan.plugins.toolkit.config.get') as mock_get:
-        mock_get.return_value = request.param
-        yield mock_get
+def toolkit_config_value(request):
+    _original_config = toolkit.config.copy()
+    toolkit.config['ckanext.xloader.clean_datastore_tables'] = request.param
+    try:
+        yield
+    finally:
+        toolkit.config.clear()
+        toolkit.config.update(_original_config)
 
 
 @pytest.fixture
@@ -73,50 +78,44 @@ def test_submit_when_url_changes(self, monkeypatch):
 
         assert func.called
 
-    @pytest.mark.parametrize("toolkit_config_value, xloader_formats_value, url_type, datastore_active, expected_result",
-                             [(True, True, 'upload', True, True),  # Test1
-                              (True, False, 'upload', True, False),  # Test2
-                              (False, True, 'upload', True, False),  # Test3
-                              (False, False, 'upload', True, False),  # Test4
-                              (True, True, 'custom_type', True, False),  # Test5
-                              (True, True, 'upload', False, False),  # Test6
-                              (True, True, '', True, True),  # Test7
-                              (True, True, None, True, True),  # Test8
-                             ])
+    @pytest.mark.usefixtures("toolkit_config_value", "mock_xloader_formats")
+    @pytest.mark.parametrize("toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result",
+                             [(True, False, 'upload', True, True),  # Test1
+                              (True, True, 'upload', True, False),  # Test2
+                              (False, False, 'upload', True, False),  # Test3
+                              (True, False, 'custom_type', True, False),  # Test4
+                              (True, False, 'upload', False, False),  # Test5
+                              (True, False, '', True, True),  # Test6
+                              (True, False, None, True, True),  # Test7
+                             ], indirect=["toolkit_config_value", "mock_xloader_formats"])
     def test_should_remove_unsupported_resource_from_datastore(
-        mock_toolkit_config, mock_xloader_formats, toolkit_config_value,
-        xloader_formats_value, url_type, datastore_active, expected_result):
+        toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result):
 
         # Test1: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='upload', datastore_active=True, expected_result=True
-        #   Should pass as it is an Xloader format and supported url type and datastore active.
+        #   Should pass as it is not an XLoader format and supported url type and datastore active.
         # Test2: clean_datastore_tables=True, is_it_an_xloader_format=False, url_type='upload', datastore_active=True, expected_result=False
-        #   Should fail as it is not a supported Xloader format.
+        #   Should fail as it is a supported XLoader format.
         # Test3: clean_datastore_tables=False, is_it_an_xloader_format=True, url_type='upload', datastore_active=True, expected_result=False
         #   Should fail as the config option is turned off.
-        # Test4: clean_datastore_tables=False, is_it_an_xloader_format=False, url_type='upload', datastore_active=True, expected_result=False
-        #   Should fail as the config option is turned off and the Xloader format is not supported.
-        # Test5: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='custom_type', datastore_active=True, expected_result=False
+        # Test4: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='custom_type', datastore_active=True, expected_result=False
         #   Should fail as the url_type is not supported.
-        # Test6: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='upload', datastore_active=False, expected_result=False
+        # Test5: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='upload', datastore_active=False, expected_result=False
         #   Should fail as datastore is inactive.
-        # Test7: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='', datastore_active=True, expected_result=True
-        #   Should pass as it is an Xloader format and supported url type and datastore active.
-        # Test8: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type=None, datastore_active=True, expected_result=True
-        #   Should pass as it is an Xloader format and supported url type as falsy and datastore active.
+        # Test6: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='', datastore_active=True, expected_result=True
+        #   Should pass as it is not an XLoader format and supported url type and datastore active.
+        # Test7: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type=None, datastore_active=True, expected_result=True
+        #   Should pass as it is not an XLoader format and supported url type as falsy and datastore active.
 
         # Setup mock data
         res_dict = {
             'format': 'some_format',
             'url_type': url_type,
-            'datastore_active': True,
-            'extras': {'datastore_active': True}
+            'datastore_active': datastore_active,
+            'extras': {'datastore_active': datastore_active}
         }
 
-        # Call the function
-        result = _should_remove_unsupported_resource_from_datastore(res_dict)
-
         # Assert the result based on the logic paths covered
-        assert result == expected_result
+        assert _should_remove_unsupported_resource_from_datastore(res_dict) == expected_result
 
     def _pending_task(self, resource_id):
         return {

From 80b2f4575613dd80c031a9dd68e385a65ca7c485 Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 2 Feb 2024 10:50:03 +1000
Subject: [PATCH 073/102] [QOLSVC-4689] don't retry jobs on custom queues

- Queue name isn't available to the job, and if support staff are directly running it we probably don't need the automatic retry
---
 ckanext/xloader/action.py            | 4 ++++
 ckanext/xloader/jobs.py              | 3 ++-
 ckanext/xloader/tests/test_loader.py | 1 -
 3 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/action.py b/ckanext/xloader/action.py
index 7cc3d341..0e787732 100644
--- a/ckanext/xloader/action.py
+++ b/ckanext/xloader/action.py
@@ -152,6 +152,10 @@ def xloader_submit(context, data_dict):
             'original_url': resource_dict.get('url'),
         }
     }
+    if custom_queue != rq_jobs.DEFAULT_QUEUE_NAME:
+        # Don't automatically retry if it's a custom run
+        data['metadata']['tries'] = jobs.MAX_RETRIES
+
     # Expand timeout for resources that have to be type-guessed
     timeout = config.get(
         'ckanext.xloader.job_timeout',
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index e7181911..a800de64 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -42,6 +42,7 @@
 CHUNK_SIZE = 16 * 1024  # 16kb
 DOWNLOAD_TIMEOUT = 30
 
+MAX_RETRIES = 1
 RETRYABLE_ERRORS = (
     errors.DeadlockDetected,
     errors.LockNotAvailable,
@@ -120,7 +121,7 @@ def xloader_data_into_datastore(input):
     except Exception as e:
         if isinstance(e, RETRYABLE_ERRORS):
             tries = job_dict['metadata'].get('tries', 0)
-            if tries == 0:
+            if tries < MAX_RETRIES:
                 log.info("Job %s failed due to temporary error [%s], retrying", job_id, e)
                 job_dict['status'] = 'pending'
                 job_dict['metadata']['tries'] = tries + 1
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 96293993..e2bf8d7d 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -975,7 +975,6 @@ def test_simple_large_file(self, Session):
             u"text",
         ]
 
-
     # test disabled by default to avoid adding large file to repo and slow test
     @pytest.mark.skip
     def test_boston_311_complete(self):

From 8ea4a4c7dbd7ef6226f92eaeb539fed54773865b Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 2 Feb 2024 11:16:19 +1000
Subject: [PATCH 074/102] [QOLSVC-4689] add descriptive titles to queued
 XLoader jobs

---
 ckanext/xloader/action.py | 4 +++-
 ckanext/xloader/jobs.py   | 5 ++++-
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/action.py b/ckanext/xloader/action.py
index 0e787732..00134fff 100644
--- a/ckanext/xloader/action.py
+++ b/ckanext/xloader/action.py
@@ -164,7 +164,9 @@ def xloader_submit(context, data_dict):
 
     try:
         job = enqueue_job(
-            jobs.xloader_data_into_datastore, [data], queue=custom_queue, rq_kwargs=dict(timeout=timeout)
+            jobs.xloader_data_into_datastore, [data], queue=custom_queue,
+            title="XLoading resource {} into datastore".format(res_id),
+            rq_kwargs=dict(timeout=timeout)
         )
     except Exception:
         log.exception('Unable to enqueued xloader res_id=%s', res_id)
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index a800de64..49c88f49 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -122,12 +122,15 @@ def xloader_data_into_datastore(input):
         if isinstance(e, RETRYABLE_ERRORS):
             tries = job_dict['metadata'].get('tries', 0)
             if tries < MAX_RETRIES:
+                tries = tries + 1
                 log.info("Job %s failed due to temporary error [%s], retrying", job_id, e)
                 job_dict['status'] = 'pending'
-                job_dict['metadata']['tries'] = tries + 1
+                job_dict['metadata']['tries'] = tries
                 enqueue_job(
                     xloader_data_into_datastore,
                     [input],
+                    title="Retrying XLoad of resource {} into datastore, attempt {}".format(
+                        job_dict['metadata']['resource_id'], tries),
                     rq_kwargs=dict(timeout=RETRIED_JOB_TIMEOUT)
                 )
                 return None

From 8b6cab9469cabc0eab71ec80c0c2b6f26f888eb3 Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 2 Feb 2024 12:28:48 +1000
Subject: [PATCH 075/102] replace complex indirect fixtures with simple 'with'
 blocks

- Also move the test case documentation alongside the test data so cases can be added or removed in just one place
---
 ckanext/xloader/tests/test_plugin.py | 68 +++++++++-------------------
 1 file changed, 22 insertions(+), 46 deletions(-)

diff --git a/ckanext/xloader/tests/test_plugin.py b/ckanext/xloader/tests/test_plugin.py
index e979d2e2..8382e68b 100644
--- a/ckanext/xloader/tests/test_plugin.py
+++ b/ckanext/xloader/tests/test_plugin.py
@@ -7,30 +7,12 @@
 except ImportError:
     import mock
 from six import text_type as str
+
 from ckan.tests import helpers, factories
 from ckan.logic import _actions
-from ckan.plugins import toolkit
 from ckanext.xloader.plugin import _should_remove_unsupported_resource_from_datastore
 
 
-@pytest.fixture
-def toolkit_config_value(request):
-    _original_config = toolkit.config.copy()
-    toolkit.config['ckanext.xloader.clean_datastore_tables'] = request.param
-    try:
-        yield
-    finally:
-        toolkit.config.clear()
-        toolkit.config.update(_original_config)
-
-
-@pytest.fixture
-def mock_xloader_formats(request):
-    with mock.patch('ckanext.xloader.utils.XLoaderFormats.is_it_an_xloader_format') as mock_is_xloader_format:
-        mock_is_xloader_format.return_value = request.param
-        yield mock_is_xloader_format
-
-
 @pytest.mark.usefixtures("clean_db", "with_plugins")
 @pytest.mark.ckan_config("ckan.plugins", "datastore xloader")
 class TestNotify(object):
@@ -78,33 +60,24 @@ def test_submit_when_url_changes(self, monkeypatch):
 
         assert func.called
 
-    @pytest.mark.usefixtures("toolkit_config_value", "mock_xloader_formats")
-    @pytest.mark.parametrize("toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result",
-                             [(True, False, 'upload', True, True),  # Test1
-                              (True, True, 'upload', True, False),  # Test2
-                              (False, False, 'upload', True, False),  # Test3
-                              (True, False, 'custom_type', True, False),  # Test4
-                              (True, False, 'upload', False, False),  # Test5
-                              (True, False, '', True, True),  # Test6
-                              (True, False, None, True, True),  # Test7
-                             ], indirect=["toolkit_config_value", "mock_xloader_formats"])
+    @pytest.mark.parametrize("toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result", [
+        # Test1: Should pass as it is an upload with an active datastore entry but an unsupported format
+        (True, False, 'upload', True, True),
+        # Test2: Should fail as it is a supported XLoader format.
+        (True, True, 'upload', True, False),
+        # Test3: Should fail as the config option is turned off.
+        (False, False, 'upload', True, False),
+        # Test4: Should fail as the url_type is not supported.
+        (True, False, 'custom_type', True, False),
+        # Test5: Should fail as datastore is inactive.
+        (True, False, 'upload', False, False),
+        # Test6: Should pass as it is a recognised resource type with an active datastore entry but an unsupported format
+        (True, False, '', True, True),
+        # Test7: Should pass as it is a recognised resource type with an active datastore entry but an unsupported format
+        (True, False, None, True, True),
+    ])
     def test_should_remove_unsupported_resource_from_datastore(
-        toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result):
-
-        # Test1: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='upload', datastore_active=True, expected_result=True
-        #   Should pass as it is not an XLoader format and supported url type and datastore active.
-        # Test2: clean_datastore_tables=True, is_it_an_xloader_format=False, url_type='upload', datastore_active=True, expected_result=False
-        #   Should fail as it is a supported XLoader format.
-        # Test3: clean_datastore_tables=False, is_it_an_xloader_format=True, url_type='upload', datastore_active=True, expected_result=False
-        #   Should fail as the config option is turned off.
-        # Test4: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='custom_type', datastore_active=True, expected_result=False
-        #   Should fail as the url_type is not supported.
-        # Test5: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='upload', datastore_active=False, expected_result=False
-        #   Should fail as datastore is inactive.
-        # Test6: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type='', datastore_active=True, expected_result=True
-        #   Should pass as it is not an XLoader format and supported url type and datastore active.
-        # Test7: clean_datastore_tables=True, is_it_an_xloader_format=True, url_type=None, datastore_active=True, expected_result=True
-        #   Should pass as it is not an XLoader format and supported url type as falsy and datastore active.
+            self, toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result):
 
         # Setup mock data
         res_dict = {
@@ -115,7 +88,10 @@ def test_should_remove_unsupported_resource_from_datastore(
         }
 
         # Assert the result based on the logic paths covered
-        assert _should_remove_unsupported_resource_from_datastore(res_dict) == expected_result
+        with helpers.changed_config('ckanext.xloader.clean_datastore_tables', toolkit_config_value):
+            with mock.patch('ckanext.xloader.utils.XLoaderFormats.is_it_an_xloader_format') as mock_is_xloader_format:
+                mock_is_xloader_format.return_value = mock_xloader_formats
+                assert _should_remove_unsupported_resource_from_datastore(res_dict) == expected_result
 
     def _pending_task(self, resource_id):
         return {

From 9960e60ca8b998b9ae2a87f27f6ed399fc1fa6a0 Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 2 Feb 2024 13:25:04 +1000
Subject: [PATCH 076/102] [QOLSVC-4689] make job titles more machine-readable

---
 ckanext/xloader/action.py | 2 +-
 ckanext/xloader/jobs.py   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/ckanext/xloader/action.py b/ckanext/xloader/action.py
index 00134fff..c0f3f84f 100644
--- a/ckanext/xloader/action.py
+++ b/ckanext/xloader/action.py
@@ -165,7 +165,7 @@ def xloader_submit(context, data_dict):
     try:
         job = enqueue_job(
             jobs.xloader_data_into_datastore, [data], queue=custom_queue,
-            title="XLoading resource {} into datastore".format(res_id),
+            title="xloader_submit: package: {} resource: {}".format(resource_dict.get('package_id'), res_id),
             rq_kwargs=dict(timeout=timeout)
         )
     except Exception:
diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 49c88f49..3ac8ebba 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -129,7 +129,7 @@ def xloader_data_into_datastore(input):
                 enqueue_job(
                     xloader_data_into_datastore,
                     [input],
-                    title="Retrying XLoad of resource {} into datastore, attempt {}".format(
+                    title="retry xloader_data_into_datastore: resource: {} attempt {}".format(
                         job_dict['metadata']['resource_id'], tries),
                     rq_kwargs=dict(timeout=RETRIED_JOB_TIMEOUT)
                 )

From 76055776311380e22d92428012cdc05e546beeea Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Tue, 27 Feb 2024 09:02:45 +1000
Subject: [PATCH 077/102] [QOLSVC-4959] try direct COPY if type guessing fails

- Type guessing could fail eg if the first hundred rows are a consistent type but it changes later.
Mirror the existing fallback behaviour by trying both styles, but in an order determined by the type guessing flag.
---
 ckanext/xloader/jobs.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 3ac8ebba..83b8639e 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -245,7 +245,12 @@ def tabulator_load():
     logger.info("'use_type_guessing' mode is: %s", use_type_guessing)
     try:
         if use_type_guessing:
-            tabulator_load()
+            try:
+                tabulator_load()
+            except JobError as e:
+                logger.warning('Load using tabulator failed: %s', e)
+                logger.info('Trying again with direct COPY')
+                direct_load()
         else:
             try:
                 direct_load()

From b1ef90ac405dc73a4ee42e2f8810fe33035bd221 Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Fri, 8 Mar 2024 15:43:35 +1000
Subject: [PATCH 078/102] [QOLSVC-5123] handle empty cells past end of row

- Excel tends to generate files with lots of extra commas at the end of every row.
The header row ignores them, so data rows need to be able to ignore them as well.
- Also strip trailing whitespace from headers after truncating them, in case the truncation exposed some,
see https://github.com/ckan/ckanext-xloader/pull/210/
---
 ckanext/xloader/helpers.py |  4 ++--
 ckanext/xloader/loader.py  | 20 +++++++++++++++++++-
 2 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/ckanext/xloader/helpers.py b/ckanext/xloader/helpers.py
index 90c70933..5712c81c 100644
--- a/ckanext/xloader/helpers.py
+++ b/ckanext/xloader/helpers.py
@@ -31,8 +31,8 @@ def xloader_status_description(status):
 def is_resource_supported_by_xloader(res_dict, check_access=True):
     is_supported_format = XLoaderFormats.is_it_an_xloader_format(res_dict.get('format'))
     is_datastore_active = res_dict.get('datastore_active', False)
-    user_has_access = not check_access or toolkit.h.check_access('package_update',
-                                                                 {'id':res_dict.get('package_id')})
+    user_has_access = not check_access or toolkit.h.check_access(
+        'package_update', {'id': res_dict.get('package_id')})
     url_type = res_dict.get('url_type')
     if url_type:
         try:
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index f0fc71be..8fdc1e4f 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -403,7 +403,14 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
             }.get(existing_info.get(h, {}).get('type_override'), t)
             for t, h in zip(types, headers)]
 
-    headers = [header.strip()[:MAX_COLUMN_LENGTH] for header in headers if header.strip()]
+    # Strip leading and trailing whitespace, then truncate to maximum length,
+    # then strip again in case the truncation exposed a space.
+    headers = [
+        header.strip()[:MAX_COLUMN_LENGTH].strip()
+        for header in headers
+        if header and header.strip()
+    ]
+    header_count = len(headers)
     type_converter = TypeConverter(types=types)
 
     with UnknownEncodingStream(table_filepath, file_format, decoding_result,
@@ -413,6 +420,17 @@ def row_iterator():
             for row in stream:
                 data_row = {}
                 for index, cell in enumerate(row):
+                    # Handle files that have extra blank cells in heading and body
+                    # eg from Microsoft Excel adding lots of empty cells on export.
+                    # Blank header cells won't generate a column,
+                    # so row length won't match column count.
+                    if index >= header_count:
+                        # error if there's actual data out of bounds, otherwise ignore
+                        if cell:
+                            raise LoaderError("Found data in column %s but resource only has %s header(s)",
+                                              index + 1, header_count)
+                        else:
+                            continue
                     data_row[headers[index]] = cell
                 yield data_row
         result = row_iterator()

From 3e169a866059d5f2e152f1b1009260d3791e32d7 Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 15 Mar 2024 10:48:04 +1000
Subject: [PATCH 079/102] [QOLSVC-5123] skip rows that are completely blank
 instead of erroring out

- Copied from upstream GitHub issue #206
---
 ckanext/xloader/loader.py                            |  4 ++++
 .../tests/samples/sample_with_empty_lines.csv        | 10 ++++++++++
 ckanext/xloader/tests/test_loader.py                 | 12 ++++++++++++
 3 files changed, 26 insertions(+)
 create mode 100644 ckanext/xloader/tests/samples/sample_with_empty_lines.csv

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 8fdc1e4f..e4f523fd 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -148,6 +148,7 @@ def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):
     # Get the list of rows to skip. The rows in the tabulator stream are
     # numbered starting with 1.
     skip_rows = list(range(1, header_offset + 1))
+    skip_rows.append({'type': 'preset', 'value': 'blank'})
 
     # Get the delimiter used in the file
     delimiter = stream.dialect.get('delimiter')
@@ -360,12 +361,14 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     try:
         file_format = os.path.splitext(table_filepath)[1].strip('.')
         with UnknownEncodingStream(table_filepath, file_format, decoding_result,
+                                   skip_rows=[{'type': 'preset', 'value': 'blank'}],
                                    post_parse=[TypeConverter().convert_types]) as stream:
             header_offset, headers = headers_guess(stream.sample)
     except TabulatorException:
         try:
             file_format = mimetype.lower().split('/')[-1]
             with UnknownEncodingStream(table_filepath, file_format, decoding_result,
+                                       skip_rows=[{'type': 'preset', 'value': 'blank'}],
                                        post_parse=[TypeConverter().convert_types]) as stream:
                 header_offset, headers = headers_guess(stream.sample)
         except TabulatorException as e:
@@ -387,6 +390,7 @@ def load_table(table_filepath, resource_id, mimetype='text/csv', logger=None):
     # Get the list of rows to skip. The rows in the tabulator stream are
     # numbered starting with 1. We also want to skip the header row.
     skip_rows = list(range(1, header_offset + 2))
+    skip_rows.append({'type': 'preset', 'value': 'blank'})
 
     TYPES, TYPE_MAPPING = get_types()
     strict_guessing = p.toolkit.asbool(
diff --git a/ckanext/xloader/tests/samples/sample_with_empty_lines.csv b/ckanext/xloader/tests/samples/sample_with_empty_lines.csv
new file mode 100644
index 00000000..abc8a0dc
--- /dev/null
+++ b/ckanext/xloader/tests/samples/sample_with_empty_lines.csv
@@ -0,0 +1,10 @@
+date,temperature,place
+2011-01-01,1,Galway
+2011-01-02,-1,Galway
+2011-01-03,0,Galway
+2011-01-01,6,Berkeley
+
+,,Berkeley
+2011-01-03,5,
+
+
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index e2bf8d7d..2a14248c 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -632,6 +632,18 @@ def test_with_blanks(self, Session):
         )
         assert len(self._get_records(Session, resource_id)) == 3
 
+    def test_with_empty_lines(self, Session):
+        csv_filepath = get_sample_filepath("sample_with_empty_lines.csv")
+        resource = factories.Resource()
+        resource_id = resource['id']
+        loader.load_csv(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, resource_id)) == 6
+
     def test_with_quoted_commas(self, Session):
         csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv")
         resource = factories.Resource()

From 541a343eb5fed197bc73af9d35d8fe2de484fcd8 Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 15 Mar 2024 10:57:53 +1000
Subject: [PATCH 080/102] [QOLSVC-5123] add unit tests for blank cells and
 lines

---
 ckanext/xloader/plugin.py                     |  2 +-
 .../samples/sample_with_extra_blank_cells.csv |  2 ++
 ckanext/xloader/tests/test_loader.py          | 36 +++++++++++++++++++
 3 files changed, 39 insertions(+), 1 deletion(-)
 create mode 100644 ckanext/xloader/tests/samples/sample_with_extra_blank_cells.csv

diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index e0ce027e..fdaa02fa 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -7,7 +7,7 @@
 
 from ckan.model.domain_object import DomainObjectOperation
 from ckan.model.resource import Resource
-from ckan.model.package import Package
+# from ckan.model.package import Package
 
 from . import action, auth, helpers as xloader_helpers, utils
 from ckanext.xloader.utils import XLoaderFormats
diff --git a/ckanext/xloader/tests/samples/sample_with_extra_blank_cells.csv b/ckanext/xloader/tests/samples/sample_with_extra_blank_cells.csv
new file mode 100644
index 00000000..8be1d7de
--- /dev/null
+++ b/ckanext/xloader/tests/samples/sample_with_extra_blank_cells.csv
@@ -0,0 +1,2 @@
+Agency (Dept or Stat Body),Agency address,Contract description/name,Award contract date,Contract value,Supplier name,Supplier address,Variation to contract (Yes/No),Specific confidentiality provision used,Procurement method,Reason for Limited tender,Form of contract,Number of offers sought,Evaluation criteria and weightings,Deliverables,Contract milestones,Contract performance management,,,,,,,,,,,,,,,
+State-wide Operations,"111 Easy St, Duckburg, 40000",con_12345-Social services,01/01/1970,"$123,456",LexCorp,123 Example St ELEMENT CITY 4444,No,No,Selective,,,,,,,,,,,,,,,,,,,,,,
diff --git a/ckanext/xloader/tests/test_loader.py b/ckanext/xloader/tests/test_loader.py
index 2a14248c..46cee393 100644
--- a/ckanext/xloader/tests/test_loader.py
+++ b/ckanext/xloader/tests/test_loader.py
@@ -1229,6 +1229,30 @@ def test_no_entries(self):
                 logger=logger,
             )
 
+    def test_with_blanks(self, Session):
+        csv_filepath = get_sample_filepath("sample_with_blanks.csv")
+        resource = factories.Resource()
+        resource_id = resource['id']
+        loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, resource_id)) == 3
+
+    def test_with_empty_lines(self, Session):
+        csv_filepath = get_sample_filepath("sample_with_empty_lines.csv")
+        resource = factories.Resource()
+        resource_id = resource['id']
+        loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, resource_id)) == 6
+
     def test_with_quoted_commas(self, Session):
         csv_filepath = get_sample_filepath("sample_with_quoted_commas.csv")
         resource = factories.Resource()
@@ -1253,6 +1277,18 @@ def test_with_iso_8859_1(self, Session):
         )
         assert len(self._get_records(Session, resource_id)) == 266
 
+    def test_with_extra_blank_cells(self, Session):
+        csv_filepath = get_sample_filepath("sample_with_extra_blank_cells.csv")
+        resource = factories.Resource()
+        resource_id = resource['id']
+        loader.load_table(
+            csv_filepath,
+            resource_id=resource_id,
+            mimetype="text/csv",
+            logger=logger,
+        )
+        assert len(self._get_records(Session, resource_id)) == 1
+
     def test_with_mixed_quotes(self, Session):
         csv_filepath = get_sample_filepath("sample_with_mixed_quotes.csv")
         resource = factories.Resource()

From a30ce5cac028c0b357e7e0764bf49bb93247870e Mon Sep 17 00:00:00 2001
From: antuarc <carl.antuar@smartservice.qld.gov.au>
Date: Fri, 12 Apr 2024 11:24:00 +1000
Subject: [PATCH 081/102] [QOLSVC-5551] restart all sequences when truncating
 table

---
 ckanext/xloader/loader.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index e4f523fd..45c72cb8 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -119,7 +119,7 @@ def _clear_datastore_resource(resource_id):
     engine = get_write_engine()
     with engine.begin() as conn:
         conn.execute("SET LOCAL lock_timeout = '5s'")
-        conn.execute('TRUNCATE TABLE "{}"'.format(resource_id))
+        conn.execute('TRUNCATE TABLE "{}" RESTART IDENTITY'.format(resource_id))
 
 
 def load_csv(csv_filepath, resource_id, mimetype='text/csv', logger=None):

From e1619f5cd90386a983679cfbbb5f1f66c40c770d Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Mon, 29 Apr 2024 09:49:07 +1000
Subject: [PATCH 082/102] [QOLSVC-3902] make deadlock handling slightly more
 forgiving

- Extend timeout from 5 seconds, which might be tripped by mere slowness, to 15 seconds
- Add an externally visible log message when lock times out
---
 ckanext/xloader/jobs.py   | 1 +
 ckanext/xloader/loader.py | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/ckanext/xloader/jobs.py b/ckanext/xloader/jobs.py
index 83b8639e..85c51936 100644
--- a/ckanext/xloader/jobs.py
+++ b/ckanext/xloader/jobs.py
@@ -124,6 +124,7 @@ def xloader_data_into_datastore(input):
             if tries < MAX_RETRIES:
                 tries = tries + 1
                 log.info("Job %s failed due to temporary error [%s], retrying", job_id, e)
+                logger.info("Job failed due to temporary error [%s], retrying", e)
                 job_dict['status'] = 'pending'
                 job_dict['metadata']['tries'] = tries
                 enqueue_job(
diff --git a/ckanext/xloader/loader.py b/ckanext/xloader/loader.py
index 45c72cb8..40db642a 100644
--- a/ckanext/xloader/loader.py
+++ b/ckanext/xloader/loader.py
@@ -118,7 +118,7 @@ def _clear_datastore_resource(resource_id):
     '''
     engine = get_write_engine()
     with engine.begin() as conn:
-        conn.execute("SET LOCAL lock_timeout = '5s'")
+        conn.execute("SET LOCAL lock_timeout = '15s'")
         conn.execute('TRUNCATE TABLE "{}" RESTART IDENTITY'.format(resource_id))
 
 

From f65a9c13c2477d0c287a0beebfad6f872a4176a5 Mon Sep 17 00:00:00 2001
From: snyk-bot <snyk-bot@snyk.io>
Date: Fri, 24 May 2024 15:12:07 +0000
Subject: [PATCH 083/102] fix: dev-requirements.txt to reduce vulnerabilities

The following vulnerabilities are fixed by pinning transitive dependencies:
- https://snyk.io/vuln/SNYK-PYTHON-REQUESTS-6928867
---
 dev-requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 47fdf35d..a0945b08 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,3 +3,4 @@ mock==2.0.0
 flake8
 pytest-ckan
 pytest-cov
+requests>=2.32.0 # not directly required, pinned by Snyk to avoid a vulnerability

From b78e476846fea3de95604521c6c137ac5c5e42cc Mon Sep 17 00:00:00 2001
From: snyk-bot <snyk-bot@snyk.io>
Date: Sat, 22 Jun 2024 03:23:50 +0000
Subject: [PATCH 084/102] fix: dev-requirements.txt to reduce vulnerabilities

The following vulnerabilities are fixed by pinning transitive dependencies:
- https://snyk.io/vuln/SNYK-PYTHON-URLLIB3-7267250
---
 dev-requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 47fdf35d..ba866124 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,3 +3,4 @@ mock==2.0.0
 flake8
 pytest-ckan
 pytest-cov
+urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability

From 80480862b1a70676835feb811e5682b393d0238a Mon Sep 17 00:00:00 2001
From: snyk-bot <snyk-bot@snyk.io>
Date: Sat, 22 Jun 2024 05:04:10 +0000
Subject: [PATCH 085/102] fix: requirements.txt to reduce vulnerabilities

The following vulnerabilities are fixed by pinning transitive dependencies:
- https://snyk.io/vuln/SNYK-PYTHON-URLLIB3-7267250
---
 requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/requirements.txt b/requirements.txt
index b00db5d8..32130af4 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,4 @@ tabulator==1.53.5
 Unidecode==1.0.22
 python-dateutil>=2.8.2
 chardet==5.2.0
+urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability

From 43fafd672cccb5e97760929e9407fcdacb30569b Mon Sep 17 00:00:00 2001
From: snyk-bot <snyk-bot@snyk.io>
Date: Sat, 6 Jul 2024 04:19:01 +0000
Subject: [PATCH 086/102] fix: requirements.txt to reduce vulnerabilities

The following vulnerabilities are fixed by pinning transitive dependencies:
- https://snyk.io/vuln/SNYK-PYTHON-CERTIFI-3164749
- https://snyk.io/vuln/SNYK-PYTHON-CERTIFI-5805047
- https://snyk.io/vuln/SNYK-PYTHON-IDNA-6597975
- https://snyk.io/vuln/SNYK-PYTHON-REQUESTS-5595532
---
 requirements.txt | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index b00db5d8..c2bc6e6a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,7 +1,9 @@
 ckantoolkit
-requests[security]>=2.11.1
+requests>=2.31.0
 six>=1.12.0
 tabulator==1.53.5
 Unidecode==1.0.22
 python-dateutil>=2.8.2
 chardet==5.2.0
+certifi>=2023.7.22 # not directly required, pinned by Snyk to avoid a vulnerability
+idna>=3.7 # not directly required, pinned by Snyk to avoid a vulnerability

From ece14860ffb5aaf2f770b4677050ce121355a7ad Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Thu, 11 Jul 2024 11:48:55 +1000
Subject: [PATCH 087/102] allow urllib3 to use the 1.x series depending on
 Python version

---
 dev-requirements.txt | 2 +-
 requirements.txt     | 1 -
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index ba866124..41e52750 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,4 +3,4 @@ mock==2.0.0
 flake8
 pytest-ckan
 pytest-cov
-urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability
+urllib3>=1.26.19 # not directly required, pinned by Snyk to avoid a vulnerability
diff --git a/requirements.txt b/requirements.txt
index 32130af4..b00db5d8 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,4 +5,3 @@ tabulator==1.53.5
 Unidecode==1.0.22
 python-dateutil>=2.8.2
 chardet==5.2.0
-urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability

From 129416a2c914dae306beddcea1ff3cfe0a77d815 Mon Sep 17 00:00:00 2001
From: snyk-bot <snyk-bot@snyk.io>
Date: Sat, 13 Jul 2024 03:08:30 +0000
Subject: [PATCH 088/102] fix: requirements.txt to reduce vulnerabilities

The following vulnerabilities are fixed by pinning transitive dependencies:
- https://snyk.io/vuln/SNYK-PYTHON-URLLIB3-7267250
- https://snyk.io/vuln/SNYK-PYTHON-ZIPP-7430899
---
 requirements.txt | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/requirements.txt b/requirements.txt
index b00db5d8..3e9d5117 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,3 +5,5 @@ tabulator==1.53.5
 Unidecode==1.0.22
 python-dateutil>=2.8.2
 chardet==5.2.0
+urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability
+zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability

From 17fbf4398fdd27c1c147f4edb99ef0c73574db47 Mon Sep 17 00:00:00 2001
From: snyk-bot <snyk-bot@snyk.io>
Date: Sat, 13 Jul 2024 03:33:40 +0000
Subject: [PATCH 089/102] fix: dev-requirements.txt to reduce vulnerabilities

The following vulnerabilities are fixed by pinning transitive dependencies:
- https://snyk.io/vuln/SNYK-PYTHON-ZIPP-7430899
---
 dev-requirements.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/dev-requirements.txt b/dev-requirements.txt
index 47fdf35d..7a33bbb6 100644
--- a/dev-requirements.txt
+++ b/dev-requirements.txt
@@ -3,3 +3,4 @@ mock==2.0.0
 flake8
 pytest-ckan
 pytest-cov
+zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability

From d94113414f1ce03cab66be3798d015186bf758fe Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Wed, 17 Jul 2024 15:37:05 +1000
Subject: [PATCH 090/102] limit urllib3 to versions compatible with Python 3.7

---
 requirements.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/requirements.txt b/requirements.txt
index 3e9d5117..1bc1b967 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -5,5 +5,5 @@ tabulator==1.53.5
 Unidecode==1.0.22
 python-dateutil>=2.8.2
 chardet==5.2.0
-urllib3>=2.2.2 # not directly required, pinned by Snyk to avoid a vulnerability
+urllib3>=1.26.19 # not directly required, pinned by Snyk to avoid a vulnerability
 zipp>=3.19.1 # not directly required, pinned by Snyk to avoid a vulnerability

From cc7e33c6514b148db3437ab721e332fb0039d46c Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Tue, 23 Jul 2024 15:35:18 +0000
Subject: [PATCH 091/102] feat(dev): implement IPipeValidation;

- Implement experimental `IPipeValidation` implement class from ckanext-validation.
---
 ckanext/xloader/config_declaration.yaml | 17 +++++
 ckanext/xloader/plugin.py               | 39 ++++++++++-
 ckanext/xloader/tests/test_plugin.py    | 87 +++++++++++++++++++++++++
 ckanext/xloader/utils.py                | 70 +++++++++++++++++++-
 4 files changed, 210 insertions(+), 3 deletions(-)

diff --git a/ckanext/xloader/config_declaration.yaml b/ckanext/xloader/config_declaration.yaml
index 73120218..4bb53c9e 100644
--- a/ckanext/xloader/config_declaration.yaml
+++ b/ckanext/xloader/config_declaration.yaml
@@ -128,6 +128,23 @@ groups:
             to True.
         type: bool
         required: false
+      - key: ckanext.xloader.validation.requires_successful_report
+        default: False
+        example: True
+        description: |
+            Resources are required to pass Validation from the ckanext-validation
+            plugin to be able to get XLoadered.
+        type: bool
+        required: false
+      - key: ckanext.xloader.validation.enforce_schema
+        default: True
+        example: False
+        description: |
+            Resources are expected to have a Validation Schema, or use the default ones if not.
+            If this option is set to `False`, Resources that do not have
+            a Validation Schema will be treated like they do not require Validation.
+            See https://github.com/frictionlessdata/ckanext-validation?tab=readme-ov-file#data-schema
+            for more details.
       - key: ckanext.xloader.clean_datastore_tables
         default: False
         example: True
diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index fdaa02fa..305828f6 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -12,6 +12,12 @@
 from . import action, auth, helpers as xloader_helpers, utils
 from ckanext.xloader.utils import XLoaderFormats
 
+try:
+    from ckanext.validation.interfaces import IPipeValidation
+    HAS_IPIPE_VALIDATION = True
+except ImportError:
+    HAS_IPIPE_VALIDATION = False
+
 try:
     config_declarations = toolkit.blanket.config_declarations
 except AttributeError:
@@ -34,6 +40,8 @@ class xloaderPlugin(plugins.SingletonPlugin):
     plugins.implements(plugins.IResourceController, inherit=True)
     plugins.implements(plugins.IClick)
     plugins.implements(plugins.IBlueprint)
+    if HAS_IPIPE_VALIDATION:
+        plugins.implements(IPipeValidation)
 
     # IClick
     def get_commands(self):
@@ -68,6 +76,21 @@ def configure(self, config_):
                     )
                 )
 
+    # IPipeValidation
+
+    def receive_validation_report(self, validation_report):
+        if utils.requires_successful_validation_report():
+            res_dict = toolkit.get_action('resource_show')({'ignore_auth': True},
+                                                           {'id': validation_report.get('resource_id')})
+            if (toolkit.asbool(toolkit.config.get('ckanext.xloader.validation.enforce_schema', True))
+                or res_dict.get('schema', None)) and validation_report.get('status') != 'success':
+                    # A schema is present, or required to be present
+                    return
+            # if validation is running in async mode, it is running from the redis workers.
+            # thus we need to do sync=True to have Xloader put the job at the front of the queue.
+            sync = toolkit.asbool(toolkit.config.get(u'ckanext.validation.run_on_update_async', True))
+            self._submit_to_xloader(res_dict, sync=sync)
+
     # IDomainObjectModification
 
     def notify(self, entity, operation):
@@ -95,7 +118,16 @@ def notify(self, entity, operation):
         if _should_remove_unsupported_resource_from_datastore(resource_dict):
             toolkit.enqueue_job(fn=_remove_unsupported_resource_from_datastore, args=[entity.id])
 
-        if not getattr(entity, 'url_changed', False):
+        if utils.requires_successful_validation_report():
+            # If the resource requires validation, stop here if validation
+            # has not been performed or did not succeed. The Validation
+            # extension will call resource_patch and this method should
+            # be called again. However, url_changed will not be in the entity
+            # once Validation does the patch.
+            log.debug("Deferring xloading resource %s because the "
+                      "resource did not pass validation yet.", resource_dict.get('id'))
+            return
+        elif not getattr(entity, 'url_changed', False):
             # do not submit to xloader if the url has not changed.
             return
 
@@ -104,6 +136,11 @@ def notify(self, entity, operation):
     # IResourceController
 
     def after_resource_create(self, context, resource_dict):
+        if utils.requires_successful_validation_report():
+            log.debug("Deferring xloading resource %s because the "
+                      "resource did not pass validation yet.", resource_dict.get('id'))
+            return
+
         self._submit_to_xloader(resource_dict)
 
     def before_resource_show(self, resource_dict):
diff --git a/ckanext/xloader/tests/test_plugin.py b/ckanext/xloader/tests/test_plugin.py
index 8382e68b..f22dafbd 100644
--- a/ckanext/xloader/tests/test_plugin.py
+++ b/ckanext/xloader/tests/test_plugin.py
@@ -60,6 +60,93 @@ def test_submit_when_url_changes(self, monkeypatch):
 
         assert func.called
 
+    @pytest.mark.ckan_config("ckanext.xloader.validation.requires_successful_report", True)
+    def test_require_validation(self, monkeypatch):
+        func = mock.Mock()
+        monkeypatch.setitem(_actions, "xloader_submit", func)
+
+        mock_resource_validation_show = mock.Mock()
+        monkeypatch.setitem(_actions, "resource_validation_show", mock_resource_validation_show)
+
+        dataset = factories.Dataset()
+
+        resource = helpers.call_action(
+            "resource_create",
+            {},
+            package_id=dataset["id"],
+            url="http://example.com/file.csv",
+            format="CSV",
+            validation_status='failure',
+        )
+
+        # TODO: test IPipeValidation
+        assert not func.called  # because of the validation_status not being `success`
+        func.called = None # reset
+
+        helpers.call_action(
+            "resource_update",
+            {},
+            id=resource["id"],
+            package_id=dataset["id"],
+            url="http://example.com/file2.csv",
+            format="CSV",
+            validation_status='success',
+        )
+
+        # TODO: test IPipeValidation
+        assert not func.called  # because of the validation_status is `success`
+
+    @pytest.mark.ckan_config("ckanext.xloader.validation.requires_successful_report", True)
+    @pytest.mark.ckan_config("ckanext.xloader.validation.enforce_schema", False)
+    def test_enforce_validation_schema(self, monkeypatch):
+        func = mock.Mock()
+        monkeypatch.setitem(_actions, "xloader_submit", func)
+
+        mock_resource_validation_show = mock.Mock()
+        monkeypatch.setitem(_actions, "resource_validation_show", mock_resource_validation_show)
+
+        dataset = factories.Dataset()
+
+        resource = helpers.call_action(
+            "resource_create",
+            {},
+            package_id=dataset["id"],
+            url="http://example.com/file.csv",
+            schema='',
+            validation_status='',
+        )
+
+        # TODO: test IPipeValidation
+        assert not func.called  # because of the schema being empty
+        func.called = None # reset
+
+        helpers.call_action(
+            "resource_update",
+            {},
+            id=resource["id"],
+            package_id=dataset["id"],
+            url="http://example.com/file2.csv",
+            schema='https://example.com/schema.json',
+            validation_status='failure',
+        )
+
+        # TODO: test IPipeValidation
+        assert not func.called  # because of the validation_status not being `success` and there is a schema
+        func.called = None # reset
+
+        helpers.call_action(
+            "resource_update",
+            {},
+            package_id=dataset["id"],
+            id=resource["id"],
+            url="http://example.com/file3.csv",
+            schema='https://example.com/schema.json',
+            validation_status='success',
+        )
+
+        # TODO: test IPipeValidation
+        assert not func.called  # because of the validation_status is `success` and there is a schema
+
     @pytest.mark.parametrize("toolkit_config_value, mock_xloader_formats, url_type, datastore_active, expected_result", [
         # Test1: Should pass as it is an upload with an active datastore entry but an unsupported format
         (True, False, 'upload', True, True),
diff --git a/ckanext/xloader/utils.py b/ckanext/xloader/utils.py
index e7e79984..574f94cb 100644
--- a/ckanext/xloader/utils.py
+++ b/ckanext/xloader/utils.py
@@ -11,10 +11,15 @@
 from decimal import Decimal
 
 import ckan.plugins as p
-from ckan.plugins.toolkit import config
+from ckan.plugins.toolkit import config, h, _
 
 from .job_exceptions import JobError
 
+from logging import getLogger
+
+
+log = getLogger(__name__)
+
 # resource.formats accepted by ckanext-xloader. Must be lowercase here.
 DEFAULT_FORMATS = [
     "csv",
@@ -46,9 +51,70 @@ def is_it_an_xloader_format(cls, format_):
         return format_.lower() in cls._formats
 
 
+def requires_successful_validation_report():
+    return p.toolkit.asbool(config.get('ckanext.xloader.validation.requires_successful_report', False))
+
+
+def awaiting_validation(res_dict):
+    """
+    Checks the existence of a logic action from the ckanext-validation
+    plugin, thus supporting any extending of the Validation Plugin class.
+    Checks ckanext.xloader.validation.requires_successful_report config
+    option value.
+    Checks ckanext.xloader.validation.enforce_schema config
+    option value. Then checks the Resource's validation_status.
+    """
+    if not requires_successful_validation_report():
+        # validation.requires_successful_report is turned off, return right away
+        return False
+
+    try:
+        # check for one of the main actions from ckanext-validation
+        # in the case that users extend the Validation plugin class
+        # and rename the plugin entry-point.
+        p.toolkit.get_action('resource_validation_show')
+        is_validation_plugin_loaded = True
+    except KeyError:
+        is_validation_plugin_loaded = False
+
+    if not is_validation_plugin_loaded:
+        # the validation plugin is not loaded but required, log a warning
+        log.warning('ckanext.xloader.validation.requires_successful_report requires the ckanext-validation plugin to be activated.')
+        return False
+
+    if (p.toolkit.asbool(config.get('ckanext.xloader.validation.enforce_schema', True))
+            or res_dict.get('schema', None)) and res_dict.get('validation_status', None) != 'success':
+
+        # either validation.enforce_schema is turned on or it is off and there is a schema,
+        # we then explicitly check for the `validation_status` report to be `success``
+        return True
+
+    # at this point, we can assume that the Resource is not waiting for Validation.
+    # or that the Resource does not have a Validation Schema and we are not enforcing schemas.
+    return False
+
+
 def resource_data(id, resource_id, rows=None):
 
     if p.toolkit.request.method == "POST":
+
+        context = {
+            "ignore_auth": True,
+        }
+        resource_dict = p.toolkit.get_action("resource_show")(
+            context,
+            {
+                "id": resource_id,
+            },
+        )
+
+        if awaiting_validation(resource_dict):
+            h.flash_error(_("Cannot upload resource %s to the DataStore "
+                            "because the resource did not pass validation yet.") % resource_id)
+            return p.toolkit.redirect_to(
+                "xloader.resource_data", id=id, resource_id=resource_id
+            )
+
         try:
             p.toolkit.get_action("xloader_submit")(
                 None,
@@ -231,7 +297,7 @@ def type_guess(rows, types=TYPES, strict=False):
     else:
         for i, row in enumerate(rows):
             diff = len(row) - len(guesses)
-            for _ in range(diff):
+            for _i in range(diff):
                 guesses.append(defaultdict(int))
             for i, cell in enumerate(row):
                 # add string guess so that we have at least one guess

From ded839c953cdc5aa9fc6ce7958408f583a10ca9a Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Tue, 15 Oct 2024 14:17:12 +1000
Subject: [PATCH 092/102] Pin setuptools only for 2.9 to allow CICD to pass

---
 .github/workflows/test.yml | 2 ++
 README.rst                 | 2 +-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index 92a95972..a65aed65 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -53,6 +53,8 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
+    - if: ${{ matrix.ckan-version == 2.9 }}
+      run: pip install "setuptools>=44.1.0,<71"
     - name: Install requirements
       run: |
         pip install -r requirements.txt
diff --git a/README.rst b/README.rst
index aed6f4b2..5a4ca6c0 100644
--- a/README.rst
+++ b/README.rst
@@ -128,7 +128,7 @@ CKAN version    Compatibility
 =============== =============
 2.7             no longer supported (last supported version: 0.12.2)
 2.8             no longer supported (last supported version: 0.12.2)
-2.9             yes (Python3) (last supported version for Python 2.7: 0.12.2))
+2.9             yes (Python3) (last supported version for Python 2.7: 0.12.2)), Must: ``pip install "setuptools>=44.1.0,<71"``
 2.10            yes
 2.11            yes
 =============== =============

From 83295126653b63abc76ebb575531afe1ef4aa310 Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Thu, 1 Aug 2024 16:59:36 +0000
Subject: [PATCH 093/102] feat(templates): status badges;

- Added status badges.

Author:    Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
---
 ckanext/xloader/config_declaration.yaml       | 16 ++++
 ckanext/xloader/helpers.py                    | 79 +++++++++++++++++++
 ckanext/xloader/plugin.py                     |  2 +
 .../static/badges/en/datastore-active.svg     |  1 +
 .../static/badges/en/datastore-complete.svg   |  1 +
 .../static/badges/en/datastore-error.svg      |  1 +
 .../static/badges/en/datastore-inactive.svg   |  1 +
 .../static/badges/en/datastore-pending.svg    |  1 +
 .../static/badges/en/datastore-running.svg    |  1 +
 .../static/badges/en/datastore-unknown.svg    |  1 +
 .../static/badges/fr/datastore-active.svg     |  1 +
 .../static/badges/fr/datastore-complete.svg   |  1 +
 .../static/badges/fr/datastore-error.svg      |  1 +
 .../static/badges/fr/datastore-inactive.svg   |  1 +
 .../static/badges/fr/datastore-pending.svg    |  1 +
 .../static/badges/fr/datastore-running.svg    |  1 +
 .../static/badges/fr/datastore-unknown.svg    |  1 +
 .../templates/package/resource_read.html      |  8 ++
 .../package/snippets/resource_info.html       |  6 ++
 .../package/snippets/resource_item.html       |  5 ++
 20 files changed, 130 insertions(+)
 create mode 100644 ckanext/xloader/public/static/badges/en/datastore-active.svg
 create mode 100644 ckanext/xloader/public/static/badges/en/datastore-complete.svg
 create mode 100644 ckanext/xloader/public/static/badges/en/datastore-error.svg
 create mode 100644 ckanext/xloader/public/static/badges/en/datastore-inactive.svg
 create mode 100644 ckanext/xloader/public/static/badges/en/datastore-pending.svg
 create mode 100644 ckanext/xloader/public/static/badges/en/datastore-running.svg
 create mode 100644 ckanext/xloader/public/static/badges/en/datastore-unknown.svg
 create mode 100644 ckanext/xloader/public/static/badges/fr/datastore-active.svg
 create mode 100644 ckanext/xloader/public/static/badges/fr/datastore-complete.svg
 create mode 100644 ckanext/xloader/public/static/badges/fr/datastore-error.svg
 create mode 100644 ckanext/xloader/public/static/badges/fr/datastore-inactive.svg
 create mode 100644 ckanext/xloader/public/static/badges/fr/datastore-pending.svg
 create mode 100644 ckanext/xloader/public/static/badges/fr/datastore-running.svg
 create mode 100644 ckanext/xloader/public/static/badges/fr/datastore-unknown.svg
 create mode 100644 ckanext/xloader/templates/package/snippets/resource_info.html

diff --git a/ckanext/xloader/config_declaration.yaml b/ckanext/xloader/config_declaration.yaml
index 4bb53c9e..9487999d 100644
--- a/ckanext/xloader/config_declaration.yaml
+++ b/ckanext/xloader/config_declaration.yaml
@@ -153,5 +153,21 @@ groups:
           that is not in ckanext.xloader.formats after a Resource is updated.
         type: bool
         required: false
+      - key: ckanext.xloader.show_badges
+        default: True
+        example: False
+        description: |
+          Controls whether or not the status badges display in the front end.
+        type: bool
+        required: false
+      - key: ckanext.xloader.debug_badges
+        default: False
+        example: True
+        description: |
+          Controls whether or not the status badges display all of the statuses. By default,
+          the badges will display "pending", "running", and "error". With debug_badges enabled,
+          they will also display "complete", "active", "inactive", and "unknown".
+        type: bool
+        required: false
 
 
diff --git a/ckanext/xloader/helpers.py b/ckanext/xloader/helpers.py
index 5712c81c..59de42d0 100644
--- a/ckanext/xloader/helpers.py
+++ b/ckanext/xloader/helpers.py
@@ -1,5 +1,7 @@
 import ckan.plugins.toolkit as toolkit
 from ckanext.xloader.utils import XLoaderFormats
+from markupsafe import Markup
+from html import escape as html_escape
 
 
 def xloader_status(resource_id):
@@ -42,3 +44,80 @@ def is_resource_supported_by_xloader(res_dict, check_access=True):
     else:
         is_supported_url_type = True
     return (is_supported_format or is_datastore_active) and user_has_access and is_supported_url_type
+
+
+def xloader_badge(resource):
+    # type: (dict) -> str
+    """
+    Displays a custom badge for the status of Xloader and DataStore for the specified resource.
+    """
+    if not toolkit.asbool(toolkit.config.get('ckanext.xloader.show_badges', True)):
+        return ''
+
+    if not XLoaderFormats.is_it_an_xloader_format(resource.get('format')):
+        # we only want to show badges for supported xloader formats
+        return ''
+
+    is_datastore_active = resource.get('datastore_active', False)
+
+    try:
+        xloader_job = toolkit.get_action("xloader_status")({'ignore_auth': True},
+                                                           {"resource_id": resource.get('id')})
+    except toolkit.ObjectNotFound:
+        xloader_job = {}
+
+    if xloader_job.get('status') == 'complete':
+        # the xloader task is complete, show datastore active or inactive.
+        # xloader will delete the datastore table at the beggining of the job run.
+        # so this will only be true if the job is fully finished.
+        status = 'active' if is_datastore_active else 'inactive'
+    elif xloader_job.get('status') in ['submitting', 'pending', 'running', 'running_but_viewable', 'error']:
+        # the job is running or pending or errored
+        # show the xloader status
+        status = xloader_job.get('status')
+        if status == 'running_but_viewable':
+            # treat running_but_viewable the same as running
+            status = 'running'
+        elif status == 'submitting':
+            # treat submitting the same as pending
+            status = 'pending'
+    else:
+        # we do not know what the status is
+        status = 'unknown'
+
+    messages = {
+        'pending': toolkit._('Data awaiting load to DataStore'),
+        'running': toolkit._('Loading data into DataStore'),
+        'complete': toolkit._('Data loaded into DataStore'),
+        'error': toolkit._('Failed to load data into DataStore'),
+        'active': toolkit._('Data available in DataStore'),
+        'inactive': toolkit._('Resource not active in DataStore'),
+        'unknown': toolkit._('DataStore status unknown'),
+    }
+    debug_level_statuses = ['complete', 'active', 'inactive', 'unknown']
+
+    if status in debug_level_statuses and not toolkit.asbool(toolkit.config.get('ckanext.xloader.debug_badges', False)):
+        return ''
+
+    badge_url = toolkit.h.url_for_static('/static/badges/{lang}/datastore-{status}.svg'.format(
+        lang=toolkit.h.lang(), status=status))
+
+    title = toolkit.h.render_datetime(xloader_job.get('last_updated'), with_hours=True) \
+        if xloader_job.get('last_updated') else ''
+
+    try:
+        toolkit.check_access('resource_update', {'user': toolkit.g.user}, {'id': resource.get('id')})
+        pusher_url = toolkit.h.url_for('xloader.resource_data',
+                                       id=resource.get('package_id'),
+                                       resource_id=resource.get('id'))
+
+        return Markup(u'<a href="{pusher_url}" class="loader-badge"><img src="{badge_url}" alt="{alt}" title="{title}"/></a>'.format(
+            pusher_url=pusher_url,
+            badge_url=badge_url,
+            alt=html_escape(messages[status], quote=True),
+            title=html_escape(title, quote=True)))
+    except toolkit.NotAuthorized:
+        return Markup(u'<span class="loader-badge"><img src="{badge_url}" alt="{alt}" title="{title}"/></span>'.format(
+            badge_url=badge_url,
+            alt=html_escape(messages[status], quote=True),
+            title=html_escape(title, quote=True)))
diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index 4a28c19a..3afc90e9 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -58,6 +58,7 @@ def get_blueprint(self):
 
     def update_config(self, config):
         toolkit.add_template_directory(config, 'templates')
+        toolkit.add_public_directory(config, 'public')
 
     # IConfigurable
 
@@ -242,6 +243,7 @@ def get_helpers(self):
             "xloader_status": xloader_helpers.xloader_status,
             "xloader_status_description": xloader_helpers.xloader_status_description,
             "is_resource_supported_by_xloader": xloader_helpers.is_resource_supported_by_xloader,
+            "xloader_badge": xloader_helpers.xloader_badge,
         }
 
 
diff --git a/ckanext/xloader/public/static/badges/en/datastore-active.svg b/ckanext/xloader/public/static/badges/en/datastore-active.svg
new file mode 100644
index 00000000..4a1d1ce9
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/en/datastore-active.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="106" height="20" role="img" aria-label="datastore: active"><title>datastore: active</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="106" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="43" height="20" fill="#97ca00"/><rect width="106" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="835" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">active</text><text x="835" y="140" transform="scale(.1)" fill="#fff" textLength="330">active</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-complete.svg b/ckanext/xloader/public/static/badges/en/datastore-complete.svg
new file mode 100644
index 00000000..7cbfd824
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/en/datastore-complete.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20" role="img" aria-label="datastore: complete"><title>datastore: complete</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="61" height="20" fill="#007ec6"/><rect width="124" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="925" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">complete</text><text x="925" y="140" transform="scale(.1)" fill="#fff" textLength="510">complete</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-error.svg b/ckanext/xloader/public/static/badges/en/datastore-error.svg
new file mode 100644
index 00000000..fe74ebe3
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/en/datastore-error.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100" height="20" role="img" aria-label="datastore: error"><title>datastore: error</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="100" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="37" height="20" fill="#e05d44"/><rect width="100" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="805" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="270">error</text><text x="805" y="140" transform="scale(.1)" fill="#fff" textLength="270">error</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-inactive.svg b/ckanext/xloader/public/static/badges/en/datastore-inactive.svg
new file mode 100644
index 00000000..5de623d0
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/en/datastore-inactive.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="116" height="20" role="img" aria-label="datastore: inactive"><title>datastore: inactive</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="116" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="53" height="20" fill="#fe7d37"/><rect width="116" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="885" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">inactive</text><text x="885" y="140" transform="scale(.1)" fill="#fff" textLength="430">inactive</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-pending.svg b/ckanext/xloader/public/static/badges/en/datastore-pending.svg
new file mode 100644
index 00000000..49867a51
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/en/datastore-pending.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="118" height="20" role="img" aria-label="datastore: pending"><title>datastore: pending</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="118" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="55" height="20" fill="#9f9f9f"/><rect width="118" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="895" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">pending</text><text x="895" y="140" transform="scale(.1)" fill="#fff" textLength="450">pending</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-running.svg b/ckanext/xloader/public/static/badges/en/datastore-running.svg
new file mode 100644
index 00000000..ab3cce1e
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/en/datastore-running.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="116" height="20" role="img" aria-label="datastore: running"><title>datastore: running</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="116" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="53" height="20" fill="#dfb317"/><rect width="116" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="885" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">running</text><text x="885" y="140" transform="scale(.1)" fill="#fff" textLength="430">running</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-unknown.svg b/ckanext/xloader/public/static/badges/en/datastore-unknown.svg
new file mode 100644
index 00000000..a190db75
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/en/datastore-unknown.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20" role="img" aria-label="datastore: unknown"><title>datastore: unknown</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="61" height="20" fill="#9f9f9f"/><rect width="124" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="925" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">unknown</text><text x="925" y="140" transform="scale(.1)" fill="#fff" textLength="510">unknown</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-active.svg b/ckanext/xloader/public/static/badges/fr/datastore-active.svg
new file mode 100644
index 00000000..4a1d1ce9
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/fr/datastore-active.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="106" height="20" role="img" aria-label="datastore: active"><title>datastore: active</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="106" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="43" height="20" fill="#97ca00"/><rect width="106" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="835" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">active</text><text x="835" y="140" transform="scale(.1)" fill="#fff" textLength="330">active</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-complete.svg b/ckanext/xloader/public/static/badges/fr/datastore-complete.svg
new file mode 100644
index 00000000..f0e226d4
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/fr/datastore-complete.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20" role="img" aria-label="datastore: complète"><title>datastore: complète</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="61" height="20" fill="#007ec6"/><rect width="124" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="925" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">complète</text><text x="925" y="140" transform="scale(.1)" fill="#fff" textLength="510">complète</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-error.svg b/ckanext/xloader/public/static/badges/fr/datastore-error.svg
new file mode 100644
index 00000000..48f14fb6
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/fr/datastore-error.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="108" height="20" role="img" aria-label="datastore: erreur"><title>datastore: erreur</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="108" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="45" height="20" fill="#e05d44"/><rect width="108" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="845" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="350">erreur</text><text x="845" y="140" transform="scale(.1)" fill="#fff" textLength="350">erreur</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-inactive.svg b/ckanext/xloader/public/static/badges/fr/datastore-inactive.svg
new file mode 100644
index 00000000..5de623d0
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/fr/datastore-inactive.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="116" height="20" role="img" aria-label="datastore: inactive"><title>datastore: inactive</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="116" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="53" height="20" fill="#fe7d37"/><rect width="116" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="885" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">inactive</text><text x="885" y="140" transform="scale(.1)" fill="#fff" textLength="430">inactive</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-pending.svg b/ckanext/xloader/public/static/badges/fr/datastore-pending.svg
new file mode 100644
index 00000000..29acb160
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/fr/datastore-pending.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="130" height="20" role="img" aria-label="datastore: en attente"><title>datastore: en attente</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="130" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="67" height="20" fill="#9f9f9f"/><rect width="130" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="955" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="570">en attente</text><text x="955" y="140" transform="scale(.1)" fill="#fff" textLength="570">en attente</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-running.svg b/ckanext/xloader/public/static/badges/fr/datastore-running.svg
new file mode 100644
index 00000000..230db15f
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/fr/datastore-running.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="188" height="20" role="img" aria-label="datastore: en cours d'exécution"><title>datastore: en cours d'exécution</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="188" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="125" height="20" fill="#dfb317"/><rect width="188" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="1245" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1150">en cours d'exécution</text><text x="1245" y="140" transform="scale(.1)" fill="#fff" textLength="1150">en cours d'exécution</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-unknown.svg b/ckanext/xloader/public/static/badges/fr/datastore-unknown.svg
new file mode 100644
index 00000000..91f4285f
--- /dev/null
+++ b/ckanext/xloader/public/static/badges/fr/datastore-unknown.svg
@@ -0,0 +1 @@
+<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="122" height="20" role="img" aria-label="datastore: inconnue"><title>datastore: inconnue</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="122" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="59" height="20" fill="#9f9f9f"/><rect width="122" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="915" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="490">inconnue</text><text x="915" y="140" transform="scale(.1)" fill="#fff" textLength="490">inconnue</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/templates/package/resource_read.html b/ckanext/xloader/templates/package/resource_read.html
index b227a58f..29eb9310 100644
--- a/ckanext/xloader/templates/package/resource_read.html
+++ b/ckanext/xloader/templates/package/resource_read.html
@@ -1,5 +1,13 @@
 {% ckan_extends %}
 
+{% block resource_read_url %}
+  {% set badge = h.xloader_badge(res) %}
+  {% if badge %}
+    {{ badge }}<br/><br/>
+  {% endif %}
+  {{ super() }}
+{% endblock %}
+
 {% block action_manage %}
   {{ super() }}
   {% if h.is_resource_supported_by_xloader(res) %}
diff --git a/ckanext/xloader/templates/package/snippets/resource_info.html b/ckanext/xloader/templates/package/snippets/resource_info.html
new file mode 100644
index 00000000..2bdea4d3
--- /dev/null
+++ b/ckanext/xloader/templates/package/snippets/resource_info.html
@@ -0,0 +1,6 @@
+{% ckan_extends %}
+
+{% block resource_info %}
+  {{ super() }}
+  {{ h.xloader_badge(res) }}
+{% endblock %}
diff --git a/ckanext/xloader/templates/package/snippets/resource_item.html b/ckanext/xloader/templates/package/snippets/resource_item.html
index 70bf99c4..8226bd1f 100644
--- a/ckanext/xloader/templates/package/snippets/resource_item.html
+++ b/ckanext/xloader/templates/package/snippets/resource_item.html
@@ -13,3 +13,8 @@
   {% endif %}
   {{ super() }}
 {% endblock %}
+
+{% block resource_item_title %}
+  {{ super() }}
+  {{ h.xloader_badge(res) }}
+{% endblock %}

From 29b1c7b8f2854a766fd682a59cc4d5065e7c28ba Mon Sep 17 00:00:00 2001
From: Jesse Vickery <jesse.vickery@tbs-sct.gc.ca>
Date: Fri, 9 Aug 2024 14:21:05 +0000
Subject: [PATCH 094/102] fix(logic): reverse debug logic;

- Reverse debug badge logic.
---
 ckanext/xloader/helpers.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/ckanext/xloader/helpers.py b/ckanext/xloader/helpers.py
index 59de42d0..864d0731 100644
--- a/ckanext/xloader/helpers.py
+++ b/ckanext/xloader/helpers.py
@@ -86,17 +86,19 @@ def xloader_badge(resource):
         status = 'unknown'
 
     messages = {
+        # Default messages
         'pending': toolkit._('Data awaiting load to DataStore'),
         'running': toolkit._('Loading data into DataStore'),
-        'complete': toolkit._('Data loaded into DataStore'),
         'error': toolkit._('Failed to load data into DataStore'),
+        # Debug messages
+        'complete': toolkit._('Data loaded into DataStore'),
         'active': toolkit._('Data available in DataStore'),
         'inactive': toolkit._('Resource not active in DataStore'),
         'unknown': toolkit._('DataStore status unknown'),
     }
-    debug_level_statuses = ['complete', 'active', 'inactive', 'unknown']
+    basic_statuses = ['pending', 'running', 'error']
 
-    if status in debug_level_statuses and not toolkit.asbool(toolkit.config.get('ckanext.xloader.debug_badges', False)):
+    if status not in basic_statuses and not toolkit.asbool(toolkit.config.get('ckanext.xloader.debug_badges', False)):
         return ''
 
     badge_url = toolkit.h.url_for_static('/static/badges/{lang}/datastore-{status}.svg'.format(

From f29e1792c669d833b3f8d29191dc0af85e8fdcc9 Mon Sep 17 00:00:00 2001
From: william dutton <will.dutt@gmail.com>
Date: Tue, 15 Oct 2024 16:54:57 +1000
Subject: [PATCH 095/102] doc: Add additional ckan config options

---
 README.rst | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/README.rst b/README.rst
index 5a4ca6c0..4ec2e11b 100644
--- a/README.rst
+++ b/README.rst
@@ -203,6 +203,20 @@ expect European (day-first) dates, you could add to ``postgresql.conf``:
 
     datestyle=ISO,DMY
 
+External Database credentials for datastore
+
+     ``ckanext.xloader.jobs_db.uri = postgresql://ckan_default:pass@localhost/ckan_default``
+
+API Key requires for xloader interaction CKAN 2.10 onwards, to generate  ``TOKEN=ckan -c /etc/ckan/default/production.ini user token add $ACCOUNT xloader | tail -1 | tr -d '[:space:]')``
+
+     ``ckanext.xloader.api_token = <your-CKAN-generated-API-Token>``
+
+Badge notification on what xloader is doing
+
+     ``ckanext.xloader.show_badges = True|False (default True)``
+
+     ``ckanext.xloader.debug_badges = True|False (default False)``
+
 ------------------------
 Developer installation
 ------------------------

From 2a4f965014bfe006abd13eed8c9d0c72ebf1a072 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Thu, 24 Oct 2024 14:35:41 +1000
Subject: [PATCH 096/102] fix: Move from static image to css badge which allows
 i18n more easily

---
 ckanext/xloader/helpers.py                    | 26 ++++++---
 ckanext/xloader/plugin.py                     |  2 +-
 .../static/badges/en/datastore-active.svg     |  1 -
 .../static/badges/en/datastore-complete.svg   |  1 -
 .../static/badges/en/datastore-error.svg      |  1 -
 .../static/badges/en/datastore-inactive.svg   |  1 -
 .../static/badges/en/datastore-pending.svg    |  1 -
 .../static/badges/en/datastore-running.svg    |  1 -
 .../static/badges/en/datastore-unknown.svg    |  1 -
 .../static/badges/fr/datastore-active.svg     |  1 -
 .../static/badges/fr/datastore-complete.svg   |  1 -
 .../static/badges/fr/datastore-error.svg      |  1 -
 .../static/badges/fr/datastore-inactive.svg   |  1 -
 .../static/badges/fr/datastore-pending.svg    |  1 -
 .../static/badges/fr/datastore-running.svg    |  1 -
 .../static/badges/fr/datastore-unknown.svg    |  1 -
 ckanext/xloader/webassets/css/xloader.css     | 56 +++++++++++++++++++
 ckanext/xloader/webassets/webassets.yml       |  4 ++
 18 files changed, 78 insertions(+), 24 deletions(-)
 delete mode 100644 ckanext/xloader/public/static/badges/en/datastore-active.svg
 delete mode 100644 ckanext/xloader/public/static/badges/en/datastore-complete.svg
 delete mode 100644 ckanext/xloader/public/static/badges/en/datastore-error.svg
 delete mode 100644 ckanext/xloader/public/static/badges/en/datastore-inactive.svg
 delete mode 100644 ckanext/xloader/public/static/badges/en/datastore-pending.svg
 delete mode 100644 ckanext/xloader/public/static/badges/en/datastore-running.svg
 delete mode 100644 ckanext/xloader/public/static/badges/en/datastore-unknown.svg
 delete mode 100644 ckanext/xloader/public/static/badges/fr/datastore-active.svg
 delete mode 100644 ckanext/xloader/public/static/badges/fr/datastore-complete.svg
 delete mode 100644 ckanext/xloader/public/static/badges/fr/datastore-error.svg
 delete mode 100644 ckanext/xloader/public/static/badges/fr/datastore-inactive.svg
 delete mode 100644 ckanext/xloader/public/static/badges/fr/datastore-pending.svg
 delete mode 100644 ckanext/xloader/public/static/badges/fr/datastore-running.svg
 delete mode 100644 ckanext/xloader/public/static/badges/fr/datastore-unknown.svg
 create mode 100644 ckanext/xloader/webassets/css/xloader.css
 create mode 100644 ckanext/xloader/webassets/webassets.yml

diff --git a/ckanext/xloader/helpers.py b/ckanext/xloader/helpers.py
index 864d0731..a08d5b66 100644
--- a/ckanext/xloader/helpers.py
+++ b/ckanext/xloader/helpers.py
@@ -101,9 +101,6 @@ def xloader_badge(resource):
     if status not in basic_statuses and not toolkit.asbool(toolkit.config.get('ckanext.xloader.debug_badges', False)):
         return ''
 
-    badge_url = toolkit.h.url_for_static('/static/badges/{lang}/datastore-{status}.svg'.format(
-        lang=toolkit.h.lang(), status=status))
-
     title = toolkit.h.render_datetime(xloader_job.get('last_updated'), with_hours=True) \
         if xloader_job.get('last_updated') else ''
 
@@ -113,13 +110,24 @@ def xloader_badge(resource):
                                        id=resource.get('package_id'),
                                        resource_id=resource.get('id'))
 
-        return Markup(u'<a href="{pusher_url}" class="loader-badge"><img src="{badge_url}" alt="{alt}" title="{title}"/></a>'.format(
+        return Markup(u'''
+    <a href="{pusher_url}" class="loader-badge" title="{title}" >
+        <span class="prefix">{prefix}</span>
+        <span class="status {status}">{status_title}</span>
+    </a>'''.format(
             pusher_url=pusher_url,
-            badge_url=badge_url,
-            alt=html_escape(messages[status], quote=True),
+            prefix=toolkit._('datastore'),
+            status=status,
+            status_title=html_escape(messages[status], quote=True),
             title=html_escape(title, quote=True)))
     except toolkit.NotAuthorized:
-        return Markup(u'<span class="loader-badge"><img src="{badge_url}" alt="{alt}" title="{title}"/></span>'.format(
-            badge_url=badge_url,
-            alt=html_escape(messages[status], quote=True),
+        return Markup(u'''
+    <span class="loader-badge" title="{title}">
+        <span class="prefix">{prefix}</span>
+        <span class="status {status}">{status_title}</span>
+    </span>
+    '''.format(
+            prefix=toolkit._('datastore'),
+            status=status,
+            status_title=html_escape(messages[status], quote=True),
             title=html_escape(title, quote=True)))
diff --git a/ckanext/xloader/plugin.py b/ckanext/xloader/plugin.py
index 3afc90e9..d916cc54 100644
--- a/ckanext/xloader/plugin.py
+++ b/ckanext/xloader/plugin.py
@@ -58,7 +58,7 @@ def get_blueprint(self):
 
     def update_config(self, config):
         toolkit.add_template_directory(config, 'templates')
-        toolkit.add_public_directory(config, 'public')
+        toolkit.add_resource(u'webassets', 'ckanext-xloader')
 
     # IConfigurable
 
diff --git a/ckanext/xloader/public/static/badges/en/datastore-active.svg b/ckanext/xloader/public/static/badges/en/datastore-active.svg
deleted file mode 100644
index 4a1d1ce9..00000000
--- a/ckanext/xloader/public/static/badges/en/datastore-active.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="106" height="20" role="img" aria-label="datastore: active"><title>datastore: active</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="106" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="43" height="20" fill="#97ca00"/><rect width="106" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="835" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">active</text><text x="835" y="140" transform="scale(.1)" fill="#fff" textLength="330">active</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-complete.svg b/ckanext/xloader/public/static/badges/en/datastore-complete.svg
deleted file mode 100644
index 7cbfd824..00000000
--- a/ckanext/xloader/public/static/badges/en/datastore-complete.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20" role="img" aria-label="datastore: complete"><title>datastore: complete</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="61" height="20" fill="#007ec6"/><rect width="124" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="925" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">complete</text><text x="925" y="140" transform="scale(.1)" fill="#fff" textLength="510">complete</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-error.svg b/ckanext/xloader/public/static/badges/en/datastore-error.svg
deleted file mode 100644
index fe74ebe3..00000000
--- a/ckanext/xloader/public/static/badges/en/datastore-error.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="100" height="20" role="img" aria-label="datastore: error"><title>datastore: error</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="100" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="37" height="20" fill="#e05d44"/><rect width="100" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="805" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="270">error</text><text x="805" y="140" transform="scale(.1)" fill="#fff" textLength="270">error</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-inactive.svg b/ckanext/xloader/public/static/badges/en/datastore-inactive.svg
deleted file mode 100644
index 5de623d0..00000000
--- a/ckanext/xloader/public/static/badges/en/datastore-inactive.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="116" height="20" role="img" aria-label="datastore: inactive"><title>datastore: inactive</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="116" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="53" height="20" fill="#fe7d37"/><rect width="116" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="885" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">inactive</text><text x="885" y="140" transform="scale(.1)" fill="#fff" textLength="430">inactive</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-pending.svg b/ckanext/xloader/public/static/badges/en/datastore-pending.svg
deleted file mode 100644
index 49867a51..00000000
--- a/ckanext/xloader/public/static/badges/en/datastore-pending.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="118" height="20" role="img" aria-label="datastore: pending"><title>datastore: pending</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="118" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="55" height="20" fill="#9f9f9f"/><rect width="118" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="895" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="450">pending</text><text x="895" y="140" transform="scale(.1)" fill="#fff" textLength="450">pending</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-running.svg b/ckanext/xloader/public/static/badges/en/datastore-running.svg
deleted file mode 100644
index ab3cce1e..00000000
--- a/ckanext/xloader/public/static/badges/en/datastore-running.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="116" height="20" role="img" aria-label="datastore: running"><title>datastore: running</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="116" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="53" height="20" fill="#dfb317"/><rect width="116" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="885" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">running</text><text x="885" y="140" transform="scale(.1)" fill="#fff" textLength="430">running</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/en/datastore-unknown.svg b/ckanext/xloader/public/static/badges/en/datastore-unknown.svg
deleted file mode 100644
index a190db75..00000000
--- a/ckanext/xloader/public/static/badges/en/datastore-unknown.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20" role="img" aria-label="datastore: unknown"><title>datastore: unknown</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="61" height="20" fill="#9f9f9f"/><rect width="124" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="925" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">unknown</text><text x="925" y="140" transform="scale(.1)" fill="#fff" textLength="510">unknown</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-active.svg b/ckanext/xloader/public/static/badges/fr/datastore-active.svg
deleted file mode 100644
index 4a1d1ce9..00000000
--- a/ckanext/xloader/public/static/badges/fr/datastore-active.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="106" height="20" role="img" aria-label="datastore: active"><title>datastore: active</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="106" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="43" height="20" fill="#97ca00"/><rect width="106" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="835" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="330">active</text><text x="835" y="140" transform="scale(.1)" fill="#fff" textLength="330">active</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-complete.svg b/ckanext/xloader/public/static/badges/fr/datastore-complete.svg
deleted file mode 100644
index f0e226d4..00000000
--- a/ckanext/xloader/public/static/badges/fr/datastore-complete.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="124" height="20" role="img" aria-label="datastore: complète"><title>datastore: complète</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="124" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="61" height="20" fill="#007ec6"/><rect width="124" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="925" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="510">complète</text><text x="925" y="140" transform="scale(.1)" fill="#fff" textLength="510">complète</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-error.svg b/ckanext/xloader/public/static/badges/fr/datastore-error.svg
deleted file mode 100644
index 48f14fb6..00000000
--- a/ckanext/xloader/public/static/badges/fr/datastore-error.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="108" height="20" role="img" aria-label="datastore: erreur"><title>datastore: erreur</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="108" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="45" height="20" fill="#e05d44"/><rect width="108" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="845" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="350">erreur</text><text x="845" y="140" transform="scale(.1)" fill="#fff" textLength="350">erreur</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-inactive.svg b/ckanext/xloader/public/static/badges/fr/datastore-inactive.svg
deleted file mode 100644
index 5de623d0..00000000
--- a/ckanext/xloader/public/static/badges/fr/datastore-inactive.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="116" height="20" role="img" aria-label="datastore: inactive"><title>datastore: inactive</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="116" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="53" height="20" fill="#fe7d37"/><rect width="116" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="885" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="430">inactive</text><text x="885" y="140" transform="scale(.1)" fill="#fff" textLength="430">inactive</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-pending.svg b/ckanext/xloader/public/static/badges/fr/datastore-pending.svg
deleted file mode 100644
index 29acb160..00000000
--- a/ckanext/xloader/public/static/badges/fr/datastore-pending.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="130" height="20" role="img" aria-label="datastore: en attente"><title>datastore: en attente</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="130" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="67" height="20" fill="#9f9f9f"/><rect width="130" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="955" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="570">en attente</text><text x="955" y="140" transform="scale(.1)" fill="#fff" textLength="570">en attente</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-running.svg b/ckanext/xloader/public/static/badges/fr/datastore-running.svg
deleted file mode 100644
index 230db15f..00000000
--- a/ckanext/xloader/public/static/badges/fr/datastore-running.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="188" height="20" role="img" aria-label="datastore: en cours d'exécution"><title>datastore: en cours d'exécution</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="188" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="125" height="20" fill="#dfb317"/><rect width="188" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="1245" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="1150">en cours d'exécution</text><text x="1245" y="140" transform="scale(.1)" fill="#fff" textLength="1150">en cours d'exécution</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/public/static/badges/fr/datastore-unknown.svg b/ckanext/xloader/public/static/badges/fr/datastore-unknown.svg
deleted file mode 100644
index 91f4285f..00000000
--- a/ckanext/xloader/public/static/badges/fr/datastore-unknown.svg
+++ /dev/null
@@ -1 +0,0 @@
-<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" width="122" height="20" role="img" aria-label="datastore: inconnue"><title>datastore: inconnue</title><linearGradient id="s" x2="0" y2="100%"><stop offset="0" stop-color="#bbb" stop-opacity=".1"/><stop offset="1" stop-opacity=".1"/></linearGradient><clipPath id="r"><rect width="122" height="20" rx="3" fill="#fff"/></clipPath><g clip-path="url(#r)"><rect width="63" height="20" fill="#555"/><rect x="63" width="59" height="20" fill="#9f9f9f"/><rect width="122" height="20" fill="url(#s)"/></g><g fill="#fff" text-anchor="middle" font-family="Verdana,Geneva,DejaVu Sans,sans-serif" text-rendering="geometricPrecision" font-size="110"><text aria-hidden="true" x="325" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="530">datastore</text><text x="325" y="140" transform="scale(.1)" fill="#fff" textLength="530">datastore</text><text aria-hidden="true" x="915" y="150" fill="#010101" fill-opacity=".3" transform="scale(.1)" textLength="490">inconnue</text><text x="915" y="140" transform="scale(.1)" fill="#fff" textLength="490">inconnue</text></g></svg>
\ No newline at end of file
diff --git a/ckanext/xloader/webassets/css/xloader.css b/ckanext/xloader/webassets/css/xloader.css
new file mode 100644
index 00000000..591b098f
--- /dev/null
+++ b/ckanext/xloader/webassets/css/xloader.css
@@ -0,0 +1,56 @@
+.loader-badge {
+  margin-left: 10px;
+  background: #555;
+  color: #fff;
+  border-radius: 3px;
+  display: inline-block;
+  font-size: 14px;
+  vertical-align: middle;
+  font-weight: 400;
+  line-height: 1.2;
+}
+
+.loader-badge:hover,
+.loader-badge:focus {
+  color: #fff;
+}
+
+.prefix,
+.status {
+  display: inline-block;
+  padding: 2px 6px;
+}
+
+.loader-badge .status {
+  border-top-right-radius: 3px;
+  border-bottom-right-radius: 3px;
+}
+
+.loader-badge .status.active {
+    background: #97C50F;
+}
+
+.loader-badge .status.complete {
+    background: #1081C2;
+}
+
+.loader-badge .status.error {
+    background: #D9634D;
+}
+
+.loader-badge .status.inactive {
+  background: #F27E3F;
+}
+
+.loader-badge .status.pending {
+  background: #9B9B9B;
+}
+
+.loader-badge .status.running {
+    background: #D8B124;
+}
+
+.loader-badge .status.unknown {
+  background: #9D9D9D;
+}
+
diff --git a/ckanext/xloader/webassets/webassets.yml b/ckanext/xloader/webassets/webassets.yml
new file mode 100644
index 00000000..5beaf6b6
--- /dev/null
+++ b/ckanext/xloader/webassets/webassets.yml
@@ -0,0 +1,4 @@
+main-css:
+  output: ckanext-xloader/%(version)s_xloader.css
+  contents:
+    - css/xloader.css

From 5a527ef4a3636c806c290d6f94bb99b494ce6042 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Fri, 25 Oct 2024 06:40:07 +1000
Subject: [PATCH 097/102] fix: webassets not loaded on snippets, correct
 display vs hover title, remove display bug from badge display

---
 ckanext/xloader/helpers.py                    | 36 +++++++++++++------
 .../templates/package/resource_read.html      |  3 ++
 .../package/snippets/resource_info.html       |  2 ++
 .../package/snippets/resource_item.html       |  2 ++
 ckanext/xloader/webassets/css/xloader.css     |  1 +
 5 files changed, 33 insertions(+), 11 deletions(-)

diff --git a/ckanext/xloader/helpers.py b/ckanext/xloader/helpers.py
index a08d5b66..25e6ba83 100644
--- a/ckanext/xloader/helpers.py
+++ b/ckanext/xloader/helpers.py
@@ -85,7 +85,19 @@ def xloader_badge(resource):
         # we do not know what the status is
         status = 'unknown'
 
-    messages = {
+    status_translations = {
+        # Default messages
+        'pending': toolkit._('Pending'),
+        'running': toolkit._('Running'),
+        'error': toolkit._('Error'),
+        # Debug messages
+        'complete': toolkit._('Complete'),
+        'active': toolkit._('Active'),
+        'inactive': toolkit._('Inactive'),
+        'unknown': toolkit._('Unknown'),
+    }
+
+    status_descriptions = {
         # Default messages
         'pending': toolkit._('Data awaiting load to DataStore'),
         'running': toolkit._('Loading data into DataStore'),
@@ -101,8 +113,8 @@ def xloader_badge(resource):
     if status not in basic_statuses and not toolkit.asbool(toolkit.config.get('ckanext.xloader.debug_badges', False)):
         return ''
 
-    title = toolkit.h.render_datetime(xloader_job.get('last_updated'), with_hours=True) \
-        if xloader_job.get('last_updated') else ''
+    last_updated = toolkit.h.render_datetime(xloader_job.get('last_updated'), with_hours=True) \
+        if xloader_job.get('last_updated') else toolkit._('Last Updated Not Available')
 
     try:
         toolkit.check_access('resource_update', {'user': toolkit.g.user}, {'id': resource.get('id')})
@@ -111,23 +123,25 @@ def xloader_badge(resource):
                                        resource_id=resource.get('id'))
 
         return Markup(u'''
-    <a href="{pusher_url}" class="loader-badge" title="{title}" >
+    <a href="{pusher_url}" class="loader-badge" title="{title}: {status_description}" >
         <span class="prefix">{prefix}</span>
-        <span class="status {status}">{status_title}</span>
+        <span class="status {status}">{status_display}</span>
     </a>'''.format(
             pusher_url=pusher_url,
             prefix=toolkit._('datastore'),
             status=status,
-            status_title=html_escape(messages[status], quote=True),
-            title=html_escape(title, quote=True)))
+            status_display=html_escape(status_translations[status], quote=True),
+            status_description=html_escape(status_descriptions[status], quote=True),
+            title=html_escape(last_updated, quote=True)))
     except toolkit.NotAuthorized:
         return Markup(u'''
-    <span class="loader-badge" title="{title}">
+    <span class="loader-badge" title="{title}: {status_description}">
         <span class="prefix">{prefix}</span>
-        <span class="status {status}">{status_title}</span>
+        <span class="status {status}">{status_display}</span>
     </span>
     '''.format(
             prefix=toolkit._('datastore'),
             status=status,
-            status_title=html_escape(messages[status], quote=True),
-            title=html_escape(title, quote=True)))
+            status_display=html_escape(status_translations[status], quote=True),
+            status_description=html_escape(status_descriptions[status], quote=True),
+            title=html_escape(last_updated, quote=True)))
diff --git a/ckanext/xloader/templates/package/resource_read.html b/ckanext/xloader/templates/package/resource_read.html
index 29eb9310..efa0dc94 100644
--- a/ckanext/xloader/templates/package/resource_read.html
+++ b/ckanext/xloader/templates/package/resource_read.html
@@ -1,5 +1,6 @@
 {% ckan_extends %}
 
+
 {% block resource_read_url %}
   {% set badge = h.xloader_badge(res) %}
   {% if badge %}
@@ -21,3 +22,5 @@
   {% endif %}
   {{ super() }}
 {% endblock %}
+
+{% asset 'ckanext-xloader/main-css' %}
diff --git a/ckanext/xloader/templates/package/snippets/resource_info.html b/ckanext/xloader/templates/package/snippets/resource_info.html
index 2bdea4d3..af371d96 100644
--- a/ckanext/xloader/templates/package/snippets/resource_info.html
+++ b/ckanext/xloader/templates/package/snippets/resource_info.html
@@ -4,3 +4,5 @@
   {{ super() }}
   {{ h.xloader_badge(res) }}
 {% endblock %}
+
+{% asset 'ckanext-xloader/main-css' %}
diff --git a/ckanext/xloader/templates/package/snippets/resource_item.html b/ckanext/xloader/templates/package/snippets/resource_item.html
index 8226bd1f..49d8a14f 100644
--- a/ckanext/xloader/templates/package/snippets/resource_item.html
+++ b/ckanext/xloader/templates/package/snippets/resource_item.html
@@ -18,3 +18,5 @@
   {{ super() }}
   {{ h.xloader_badge(res) }}
 {% endblock %}
+
+{% asset 'ckanext-xloader/main-css' %}
diff --git a/ckanext/xloader/webassets/css/xloader.css b/ckanext/xloader/webassets/css/xloader.css
index 591b098f..e6ad04ed 100644
--- a/ckanext/xloader/webassets/css/xloader.css
+++ b/ckanext/xloader/webassets/css/xloader.css
@@ -8,6 +8,7 @@
   vertical-align: middle;
   font-weight: 400;
   line-height: 1.2;
+  text-decoration: none;
 }
 
 .loader-badge:hover,

From a7138038165cf32a15240cb4f292d230c44e719a Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Fri, 25 Oct 2024 08:50:29 +1000
Subject: [PATCH 098/102] fix: asset inclusion must be in the block being
 rendered, not on the outer that is not rendered

---
 ckanext/xloader/templates/package/resource_read.html         | 3 ++-
 .../xloader/templates/package/snippets/resource_info.html    | 3 +--
 .../xloader/templates/package/snippets/resource_item.html    | 3 ++-
 ckanext/xloader/webassets/css/xloader.css                    | 5 ++++-
 4 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/ckanext/xloader/templates/package/resource_read.html b/ckanext/xloader/templates/package/resource_read.html
index efa0dc94..939cad62 100644
--- a/ckanext/xloader/templates/package/resource_read.html
+++ b/ckanext/xloader/templates/package/resource_read.html
@@ -5,6 +5,7 @@
   {% set badge = h.xloader_badge(res) %}
   {% if badge %}
     {{ badge }}<br/><br/>
+    {% asset 'ckanext-xloader/main-css' %}
   {% endif %}
   {{ super() }}
 {% endblock %}
@@ -23,4 +24,4 @@
   {{ super() }}
 {% endblock %}
 
-{% asset 'ckanext-xloader/main-css' %}
+
diff --git a/ckanext/xloader/templates/package/snippets/resource_info.html b/ckanext/xloader/templates/package/snippets/resource_info.html
index af371d96..cfe00c58 100644
--- a/ckanext/xloader/templates/package/snippets/resource_info.html
+++ b/ckanext/xloader/templates/package/snippets/resource_info.html
@@ -3,6 +3,5 @@
 {% block resource_info %}
   {{ super() }}
   {{ h.xloader_badge(res) }}
+  {% asset 'ckanext-xloader/main-css' %}
 {% endblock %}
-
-{% asset 'ckanext-xloader/main-css' %}
diff --git a/ckanext/xloader/templates/package/snippets/resource_item.html b/ckanext/xloader/templates/package/snippets/resource_item.html
index 49d8a14f..6fe9efe6 100644
--- a/ckanext/xloader/templates/package/snippets/resource_item.html
+++ b/ckanext/xloader/templates/package/snippets/resource_item.html
@@ -17,6 +17,7 @@
 {% block resource_item_title %}
   {{ super() }}
   {{ h.xloader_badge(res) }}
+  {% asset 'ckanext-xloader/main-css' %}
 {% endblock %}
 
-{% asset 'ckanext-xloader/main-css' %}
+
diff --git a/ckanext/xloader/webassets/css/xloader.css b/ckanext/xloader/webassets/css/xloader.css
index e6ad04ed..f0cc39d4 100644
--- a/ckanext/xloader/webassets/css/xloader.css
+++ b/ckanext/xloader/webassets/css/xloader.css
@@ -8,7 +8,10 @@
   vertical-align: middle;
   font-weight: 400;
   line-height: 1.2;
-  text-decoration: none;
+}
+
+a.loader-badge {
+ text-decoration: none;
 }
 
 .loader-badge:hover,

From 5fa19cb8d7ae37532a1cb2a5b75c13b4cf4ba8e9 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Tue, 29 Oct 2024 13:36:09 +1000
Subject: [PATCH 099/102] fix: action_manage instead of action_manage_inner for
 standart access to Datastore

---
 ckanext/xloader/templates/package/resource_read.html | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/ckanext/xloader/templates/package/resource_read.html b/ckanext/xloader/templates/package/resource_read.html
index c99dcec2..8ce87bc1 100644
--- a/ckanext/xloader/templates/package/resource_read.html
+++ b/ckanext/xloader/templates/package/resource_read.html
@@ -4,13 +4,13 @@
 {% block resource_read_url %}
   {% set badge = h.xloader_badge(res) %}
   {% if badge %}
-    {{ badge }}<br/><br/>
+    {{ badge }}<br/>
     {% asset 'ckanext-xloader/main-css' %}
   {% endif %}
   {{ super() }}
 {% endblock %}
 
-{% block action_manage_inner %}
+{% block action_manage %}
   {{ super() }}
   {% if h.is_resource_supported_by_xloader(res) %}
     <li>{% link_for _('DataStore'), named_route='xloader.resource_data', id=pkg.name, resource_id=res.id, class_='btn btn-light', icon='cloud-upload' %}</li>
@@ -23,5 +23,3 @@
   {% endif %}
   {{ super() }}
 {% endblock %}
-
-

From 6cc48b7eaad287755b9db9fdba18b1782e29886e Mon Sep 17 00:00:00 2001
From: ThrawnCA <shell_layer-github@yahoo.com.au>
Date: Wed, 30 Oct 2024 10:35:14 +1000
Subject: [PATCH 100/102] feat: limit PyPI publish to upstream repo

---
 .github/workflows/publish.yml | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 917856a5..34f986e8 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -32,6 +32,7 @@ on:
 
 jobs:
   lint:
+    if: github.repository == 'ckan/ckanext-xloader'
     runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v4
@@ -93,6 +94,12 @@ jobs:
     - name: Run tests
       run: pytest --ckan-ini=test.ini --cov=ckanext.xloader --disable-warnings ckanext/xloader/tests
 
+  publishSkipped:
+    if: github.repository != 'ckan/ckanext-xloader'
+    steps:
+      - run: |
+        echo "Skipping PyPI publish on downstream repository"
+
   publish:
     needs: test
     permissions:

From d36caceb6146eba96c53b80eca8e0b992a1461fe Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Thu, 21 Nov 2024 13:10:51 +1000
Subject: [PATCH 101/102] fix: run docker container as root user not as ckan
 for cicd compatability

---
 .github/workflows/publish.yml | 1 +
 .github/workflows/test.yml    | 1 +
 2 files changed, 2 insertions(+)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 98c473a0..747acc57 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -55,6 +55,7 @@ jobs:
     runs-on: ubuntu-latest
     container:
       image: ckan/ckan-dev:${{ matrix.ckan-version }}
+      options: --user root
     services:
       solr:
         image: ckan/ckan-solr:${{ matrix.ckan-version }}-solr9
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index a65aed65..fe16cc17 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -30,6 +30,7 @@ jobs:
     runs-on: ubuntu-latest
     container:
       image: ckan/ckan-dev:${{ matrix.ckan-version }}
+      options: --user root
     services:
       solr:
         image: ckan/ckan-solr:${{ matrix.ckan-version }}-solr9

From 2ef1ccbe53c7bad336782396ea170a6d720d7932 Mon Sep 17 00:00:00 2001
From: William Dutton <william.dutton@qld.gov.au>
Date: Thu, 21 Nov 2024 13:29:24 +1000
Subject: [PATCH 102/102] chore: use more unique tag, include test sumary in
 github actions

---
 .github/workflows/publish.yml | 14 ++++++++++---
 .github/workflows/test.yml    | 39 ++++++++++++++++++++++++++++++-----
 2 files changed, 45 insertions(+), 8 deletions(-)

diff --git a/.github/workflows/publish.yml b/.github/workflows/publish.yml
index 747acc57..3b44198d 100644
--- a/.github/workflows/publish.yml
+++ b/.github/workflows/publish.yml
@@ -48,13 +48,21 @@ jobs:
     needs: lint
     strategy:
       matrix:
-        ckan-version: ["2.11", "2.10", 2.9]
+        include: #ckan-image see https://github.com/ckan/ckan-docker-base, ckan-version controls other image tags
+          - ckan-version: "2.11"
+            ckan-image: "2.11-py3.10"
+          - ckan-version: "2.10"
+            ckan-image: "2.10-py3.10"
+          - ckan-version: "2.9"
+            ckan-image: "2.9-py3.9"
+          #- ckan-version: "master" Publish does not care about master
+          #  ckan-image: "master"
       fail-fast: false
 
     name: CKAN ${{ matrix.ckan-version }}
     runs-on: ubuntu-latest
     container:
-      image: ckan/ckan-dev:${{ matrix.ckan-version }}
+      image: ckan/ckan-dev:${{ matrix.ckan-image }}
       options: --user root
     services:
       solr:
@@ -99,7 +107,7 @@ jobs:
     if: github.repository != 'ckan/ckanext-xloader'
     steps:
       - run: |
-        echo "Skipping PyPI publish on downstream repository"
+          echo "Skipping PyPI publish on downstream repository"
 
   publish:
     needs: test
diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml
index fe16cc17..53ec0c5f 100644
--- a/.github/workflows/test.yml
+++ b/.github/workflows/test.yml
@@ -23,13 +23,25 @@ jobs:
     needs: lint
     strategy:
       matrix:
-        ckan-version: ["2.11", "2.10", 2.9]
+        include: #ckan-image see https://github.com/ckan/ckan-docker-base, ckan-version controls other image tags
+          - ckan-version: "2.11"
+            ckan-image: "2.11-py3.10"
+            experimental: false
+          - ckan-version: "2.10"
+            ckan-image: "2.10-py3.10"
+            experimental: false
+          - ckan-version: "2.9"
+            ckan-image: "2.9-py3.9"
+            experimental: false
+          - ckan-version: "master"
+            ckan-image: "master"
+            experimental: true  # master is unstable, good to know if we are compatible or not
       fail-fast: false
 
     name: CKAN ${{ matrix.ckan-version }}
     runs-on: ubuntu-latest
     container:
-      image: ckan/ckan-dev:${{ matrix.ckan-version }}
+      image: ckan/ckan-dev:${{ matrix.ckan-image }}
       options: --user root
     services:
       solr:
@@ -54,9 +66,15 @@ jobs:
 
     steps:
     - uses: actions/checkout@v4
-    - if: ${{ matrix.ckan-version == 2.9 }}
+      continue-on-error: ${{ matrix.experimental }}
+
+    - name: Pin setuptools for ckan 2.9 only
+      if: ${{ matrix.ckan-version == 2.9 }}
       run: pip install "setuptools>=44.1.0,<71"
+      continue-on-error: ${{ matrix.experimental }}
+
     - name: Install requirements
+      continue-on-error: ${{ matrix.experimental }}
       run: |
         pip install -r requirements.txt
         pip install -r dev-requirements.txt
@@ -64,8 +82,19 @@ jobs:
         pip install -U requests[security]
         # Replace default path to CKAN core config file with the one on the container
         sed -i -e 's/use = config:.*/use = config:\/srv\/app\/src\/ckan\/test-core.ini/' test.ini
-    - name: Setup extension (CKAN >= 2.9)
+
+    - name: Setup extension
+      continue-on-error: ${{ matrix.experimental }}
       run: |
         ckan -c test.ini db init
+
     - name: Run tests
-      run: pytest --ckan-ini=test.ini --cov=ckanext.xloader --disable-warnings ckanext/xloader/tests
+      continue-on-error: ${{ matrix.experimental }}
+      run: pytest --ckan-ini=test.ini --cov=ckanext.xloader --disable-warnings ckanext/xloader/tests --junit-xml=/tmp/artifacts/junit/results.xml
+
+    - name: Test Summary
+      uses: test-summary/action@v2
+      continue-on-error: ${{ matrix.experimental }}
+      with:
+        paths: "/tmp/artifacts/junit/*.xml"
+      if: always()
\ No newline at end of file