diff --git a/crate_anon/anonymise/anonymise.py b/crate_anon/anonymise/anonymise.py
index c8c304ab..fd712145 100644
--- a/crate_anon/anonymise/anonymise.py
+++ b/crate_anon/anonymise/anonymise.py
@@ -40,6 +40,9 @@
from cardinal_pythonlib.datetimefunc import get_now_utc_pendulum
from cardinal_pythonlib.sqlalchemy.core_query import count_star, exists_plain
+from cardinal_pythonlib.sqlalchemy.insert_on_duplicate import (
+ insert_with_upsert_if_supported,
+)
from cardinal_pythonlib.sqlalchemy.schema import (
add_index,
get_column_names,
@@ -74,7 +77,6 @@
)
from crate_anon.common.parallel import is_my_job_by_hash, is_my_job_by_int
from crate_anon.common.sql import matches_tabledef
-from crate_anon.common.sqlalchemy import insert_with_upsert_if_supported
log = logging.getLogger(__name__)
diff --git a/crate_anon/anonymise/tests/researcher_report_tests.py b/crate_anon/anonymise/tests/researcher_report_tests.py
index dc1e5a6e..3a1dda44 100644
--- a/crate_anon/anonymise/tests/researcher_report_tests.py
+++ b/crate_anon/anonymise/tests/researcher_report_tests.py
@@ -117,20 +117,27 @@ def index_of_list_substring(items: List[str], substr: str) -> int:
patient_found = False
note_found = False
for page in reader.pages:
- lines = page.extract_text().splitlines()
+ lines = page.extract_text().replace("\t", " ").splitlines()
+ # Sometimes spaces come back as tabs; fix that.
+
rows_index = index_of_list_substring(
lines,
"Number of rows in this table:",
)
+ # The label text here is from
+ # crate_anon/anonymise/templates/researcher_report/table.html.
+
+ if rows_index < 0:
+ continue
- if rows_index > 0:
- num_rows = int(lines[rows_index + 1])
+ num_rows = int(lines[rows_index + 1])
+ table_name = lines[0]
- if lines[0] == "patient":
+ if table_name == "patient":
patient_found = True
self.assertEqual(num_rows, self.num_patients)
- elif lines[0] == "note":
+ elif table_name == "note":
note_found = True
self.assertEqual(
num_rows, self.num_patients * self.notes_per_patient
diff --git a/crate_anon/common/sqlalchemy.py b/crate_anon/common/sqlalchemy.py
deleted file mode 100644
index 7a1af517..00000000
--- a/crate_anon/common/sqlalchemy.py
+++ /dev/null
@@ -1,130 +0,0 @@
-"""
-crate_anon/common/sqlalchemy.py
-
-===============================================================================
-
- Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
- Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
-
- This file is part of CRATE.
-
- CRATE is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- CRATE is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with CRATE. If not, see .
-
-===============================================================================
-
-**Additional SQLAlchemy assistance functions.**
-
-"""
-
-# =============================================================================
-# Imports
-# =============================================================================
-
-import logging
-from typing import Dict
-
-from cardinal_pythonlib.sqlalchemy.dialect import get_dialect_name
-from sqlalchemy.dialects.mysql import insert as insert_mysql
-from sqlalchemy.engine.interfaces import Dialect
-from sqlalchemy.schema import Table
-from sqlalchemy.orm.session import Session
-from sqlalchemy.sql.expression import Insert
-
-log = logging.getLogger(__name__)
-
-
-# =============================================================================
-# INSERT ... ON DUPLICATE KEY UPDATE
-# =============================================================================
-
-
-def insert_with_upsert_if_supported(
- table: Table,
- values: Dict,
- session: Session = None,
- dialect: Dialect = None,
-) -> Insert:
- """
- Creates an "upsert" (INSERT ... ON DUPLICATE KEY UPDATE) statment if
- possible (e.g. MySQL/MariaDB). Failing that, returns an INSERT statement.
-
- Args:
- table:
- SQLAlchemy Table in which to insert values.
- values:
- Values to insert (column: value dictionary).
- session:
- Session from which to extract a dialect.
- dialect:
- Explicit dialect.
-
- Previously (prior to 2025-01-05 and prior to SQLAlchemy 2), we did this:
-
- .. code-block:: python
-
- q = sqla_table.insert_on_duplicate().values(destvalues)
-
- This "insert_on_duplicate" member was available because
- crate_anon/anonymise/config.py ran monkeypatch_TableClause(), from
- cardinal_pythonlib.sqlalchemy.insert_on_duplicate. The function did dialect
- detection via "@compiles(InsertOnDuplicate, SqlaDialectName.MYSQL)". But
- it did nasty text-based hacking to get the column names.
-
- However, SQLAlchemy now supports "upsert" for MySQL:
- https://docs.sqlalchemy.org/en/20/dialects/mysql.html#insert-on-duplicate-key-update-upsert
-
- Note the varying argument forms possible.
-
- The only other question: if the dialect is not MySQL, will the reference to
- insert_stmt.on_duplicate_key_update crash or just not do anything? To test:
-
- .. code-block:: python
-
- from sqlalchemy import table
- t = table("tablename")
- destvalues = {"a": 1}
-
- insert_stmt = t.insert().values(destvalues)
- on_dup_key_stmt = insert_stmt.on_duplicate_key_update(destvalues)
-
- This does indeed crash (AttributeError: 'Insert' object has no attribute
- 'on_duplicate_key_update'). In contrast, this works:
-
- .. code-block:: python
-
- from sqlalchemy.dialects.mysql import insert as insert_mysql
-
- insert2 = insert_mysql(t).values(destvalues)
- on_dup_key2 = insert2.on_duplicate_key_update(destvalues)
-
- Note also that an insert() statement doesn't gain a
- "on_duplicate_key_update" attribute just because MySQL is used (the insert
- statement doesn't know that yet).
-
- The old way was good for dialect detection but ugly for textual analysis of
- the query. The new way is more elegant in the query, but less for dialect
- detection. Overall, new way likely preferable.
-
- """
- if bool(session) + bool(dialect) != 1:
- raise ValueError(
- f"Must specify exactly one of: {session=}, {dialect=}"
- )
- dialect_name = get_dialect_name(dialect or session)
- if dialect_name == "mysql":
- return (
- insert_mysql(table).values(values).on_duplicate_key_update(values)
- )
- else:
- return table.insert().values(values)
diff --git a/crate_anon/common/tests/sqlalchemy_tests.py b/crate_anon/common/tests/sqlalchemy_tests.py
deleted file mode 100644
index aa52f0bb..00000000
--- a/crate_anon/common/tests/sqlalchemy_tests.py
+++ /dev/null
@@ -1,93 +0,0 @@
-"""
-crate_anon/common/tests/sqlalchemy_tests.py
-
-===============================================================================
-
- Copyright (C) 2015, University of Cambridge, Department of Psychiatry.
- Created by Rudolf Cardinal (rnc1001@cam.ac.uk).
-
- This file is part of CRATE.
-
- CRATE is free software: you can redistribute it and/or modify
- it under the terms of the GNU General Public License as published by
- the Free Software Foundation, either version 3 of the License, or
- (at your option) any later version.
-
- CRATE is distributed in the hope that it will be useful,
- but WITHOUT ANY WARRANTY; without even the implied warranty of
- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
- GNU General Public License for more details.
-
- You should have received a copy of the GNU General Public License
- along with CRATE. If not, see .
-
-===============================================================================
-
-Unit testing.
-
-"""
-
-# =============================================================================
-# Imports
-# =============================================================================
-
-import logging
-from unittest import TestCase
-
-from sqlalchemy import Column, String, Integer, create_engine
-from sqlalchemy.dialects.mysql.base import MySQLDialect
-from sqlalchemy.orm import declarative_base
-from sqlalchemy.orm.session import Session
-from sqlalchemy.exc import IntegrityError
-
-from crate_anon.common.sqlalchemy import insert_with_upsert_if_supported
-
-log = logging.getLogger(__name__)
-
-
-# =============================================================================
-# Unit tests
-# =============================================================================
-
-
-class SqlAlchemyTests(TestCase):
-
- def test_insert_with_upsert_if_supported_syntax(self) -> None:
- # noinspection PyPep8Naming
- Base = declarative_base()
-
- class OrmObject(Base):
- __tablename__ = "sometable"
- id = Column(Integer, primary_key=True)
- name = Column(String)
-
- sqlite_engine = create_engine("sqlite://", echo=True, future=True)
- Base.metadata.create_all(sqlite_engine)
-
- session = Session(sqlite_engine)
-
- d1 = dict(id=1, name="One")
- d2 = dict(id=2, name="Two")
-
- table = OrmObject.__table__
-
- insert_1 = table.insert().values(d1)
- insert_2 = table.insert().values(d2)
- session.execute(insert_1)
- session.execute(insert_2)
- with self.assertRaises(IntegrityError):
- session.execute(insert_1)
-
- upsert_1 = insert_with_upsert_if_supported(
- table=table, values=d1, session=session
- )
- odku = "ON DUPLICATE KEY UPDATE"
- self.assertFalse(odku in str(upsert_1))
-
- upsert_2 = insert_with_upsert_if_supported(
- table=table, values=d1, dialect=MySQLDialect()
- )
- self.assertTrue(odku in str(upsert_2))
-
- # We can't test fully here without a MySQL connection.
- # But syntax tested separately in upsert_test_1.sql
diff --git a/crate_anon/common/tests/upsert_test_1.sql b/crate_anon/common/tests/upsert_test_1.sql
index cf6151a1..e69de29b 100644
--- a/crate_anon/common/tests/upsert_test_1.sql
+++ b/crate_anon/common/tests/upsert_test_1.sql
@@ -1,15 +0,0 @@
--- For MySQL:
-
-USE crud; -- pre-existing database
-
-CREATE TABLE ut (a INTEGER PRIMARY KEY, b INTEGER);
-
-INSERT INTO ut (a, b) VALUES (1, 101); -- OK
-INSERT INTO ut (a, b) VALUES (2, 102); -- OK
-
-INSERT INTO ut (a, b) VALUES (1, 101); -- fails; duplicate key
-
-INSERT INTO ut (a, b) VALUES (1, 101) ON DUPLICATE KEY UPDATE a = 1, b = 103;
- -- succeeds and changes only one row
-
-SELECT * FROM ut;
diff --git a/crate_anon/preprocess/preprocess_pcmis.py b/crate_anon/preprocess/preprocess_pcmis.py
index 9e72ee2f..dd9a4dda 100755
--- a/crate_anon/preprocess/preprocess_pcmis.py
+++ b/crate_anon/preprocess/preprocess_pcmis.py
@@ -860,9 +860,7 @@ def process_table(
# ---------------------------------------------------------------------
# SQL Server requires Table-bound columns in order to generate DDL:
if adding_crate_pk:
- crate_pk_col = make_bigint_autoincrement_column(
- CRATE_COL_PK, engine.dialect
- )
+ crate_pk_col = make_bigint_autoincrement_column(CRATE_COL_PK)
table.append_column(crate_pk_col)
add_columns(engine, table, [crate_pk_col])
ensure_columns_present(
diff --git a/crate_anon/preprocess/preprocess_rio.py b/crate_anon/preprocess/preprocess_rio.py
index 238e09f9..c0a76e94 100755
--- a/crate_anon/preprocess/preprocess_rio.py
+++ b/crate_anon/preprocess/preprocess_rio.py
@@ -584,9 +584,7 @@ def process_patient_table(
# ... can't do NOT NULL; need to populate it
required_cols.append(rio_pk)
else: # RCEP type, or no PK in RiO
- crate_pk_col = make_bigint_autoincrement_column(
- CRATE_COL_PK, engine.dialect
- )
+ crate_pk_col = make_bigint_autoincrement_column(CRATE_COL_PK)
# ... autopopulates
crate_rio_number_col = Column(
CRATE_COL_RIO_NUMBER, BigInteger, nullable=True
@@ -693,9 +691,7 @@ def process_nonpatient_table(
if other_pk_col: # table has a primary key already
crate_pk_col = Column(CRATE_COL_PK, BigInteger, nullable=True)
else:
- crate_pk_col = make_bigint_autoincrement_column(
- CRATE_COL_PK, engine.dialect
- )
+ crate_pk_col = make_bigint_autoincrement_column(CRATE_COL_PK)
table.append_column(crate_pk_col) # must be Table-bound, as above
add_columns(engine, table, [crate_pk_col])
if not configoptions.print_sql_only:
diff --git a/crate_anon/preprocess/preprocess_systmone.py b/crate_anon/preprocess/preprocess_systmone.py
index 76b9e348..d35fc931 100755
--- a/crate_anon/preprocess/preprocess_systmone.py
+++ b/crate_anon/preprocess/preprocess_systmone.py
@@ -266,9 +266,7 @@ def preprocess_systmone(
# Create step #1
if not drop_danger_drop and table_needs_pk:
- crate_pk_col = make_bigint_autoincrement_column(
- CRATE_COL_PK, engine.dialect
- )
+ crate_pk_col = make_bigint_autoincrement_column(CRATE_COL_PK)
# SQL Server requires Table-bound columns in order to generate DDL:
table.append_column(crate_pk_col)
add_columns(engine, table, [crate_pk_col])
diff --git a/devnotes/2025_01_sqlalchemy2.txt b/devnotes/2025_01_sqlalchemy2.txt
index 501d263f..7859e5b0 100644
--- a/devnotes/2025_01_sqlalchemy2.txt
+++ b/devnotes/2025_01_sqlalchemy2.txt
@@ -156,6 +156,5 @@ Migration to 2.0 Step Seven - Test against a SQLAlchemy 2.0 Release
~~~
-todo: CURRENTLY ON: cardinal_pythonlib / pytest -k orm_query_tests --log-cli-level INFO
-
-todo: *** also # noqa \n
+todo: CURRENTLY ON:
+ STEP THREE TESTS
diff --git a/setup.py b/setup.py
index 5abcbc3c..843dee79 100755
--- a/setup.py
+++ b/setup.py
@@ -69,7 +69,7 @@
"appdirs==1.4.4", # where to store some temporary data
"arrow==0.15.7", # [pin exact version from cardinal_pythonlib]
"beautifulsoup4==4.9.1", # [pin exact version from cardinal_pythonlib]
- "cardinal_pythonlib==1.1.28", # RNC libraries
+ "cardinal_pythonlib==2.0.0", # RNC libraries
"cairosvg==2.7.0", # work with SVG files
"celery==5.2.7", # back-end scheduling
"chardet==3.0.4", # character encoding detection for cardinal_pythonlib