diff --git a/crate_anon/anonymise/anonymise.py b/crate_anon/anonymise/anonymise.py index c8c304ab..fd712145 100644 --- a/crate_anon/anonymise/anonymise.py +++ b/crate_anon/anonymise/anonymise.py @@ -40,6 +40,9 @@ from cardinal_pythonlib.datetimefunc import get_now_utc_pendulum from cardinal_pythonlib.sqlalchemy.core_query import count_star, exists_plain +from cardinal_pythonlib.sqlalchemy.insert_on_duplicate import ( + insert_with_upsert_if_supported, +) from cardinal_pythonlib.sqlalchemy.schema import ( add_index, get_column_names, @@ -74,7 +77,6 @@ ) from crate_anon.common.parallel import is_my_job_by_hash, is_my_job_by_int from crate_anon.common.sql import matches_tabledef -from crate_anon.common.sqlalchemy import insert_with_upsert_if_supported log = logging.getLogger(__name__) diff --git a/crate_anon/anonymise/tests/researcher_report_tests.py b/crate_anon/anonymise/tests/researcher_report_tests.py index dc1e5a6e..3a1dda44 100644 --- a/crate_anon/anonymise/tests/researcher_report_tests.py +++ b/crate_anon/anonymise/tests/researcher_report_tests.py @@ -117,20 +117,27 @@ def index_of_list_substring(items: List[str], substr: str) -> int: patient_found = False note_found = False for page in reader.pages: - lines = page.extract_text().splitlines() + lines = page.extract_text().replace("\t", " ").splitlines() + # Sometimes spaces come back as tabs; fix that. + rows_index = index_of_list_substring( lines, "Number of rows in this table:", ) + # The label text here is from + # crate_anon/anonymise/templates/researcher_report/table.html. + + if rows_index < 0: + continue - if rows_index > 0: - num_rows = int(lines[rows_index + 1]) + num_rows = int(lines[rows_index + 1]) + table_name = lines[0] - if lines[0] == "patient": + if table_name == "patient": patient_found = True self.assertEqual(num_rows, self.num_patients) - elif lines[0] == "note": + elif table_name == "note": note_found = True self.assertEqual( num_rows, self.num_patients * self.notes_per_patient diff --git a/crate_anon/common/sqlalchemy.py b/crate_anon/common/sqlalchemy.py deleted file mode 100644 index 7a1af517..00000000 --- a/crate_anon/common/sqlalchemy.py +++ /dev/null @@ -1,130 +0,0 @@ -""" -crate_anon/common/sqlalchemy.py - -=============================================================================== - - Copyright (C) 2015, University of Cambridge, Department of Psychiatry. - Created by Rudolf Cardinal (rnc1001@cam.ac.uk). - - This file is part of CRATE. - - CRATE is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - CRATE is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with CRATE. If not, see . - -=============================================================================== - -**Additional SQLAlchemy assistance functions.** - -""" - -# ============================================================================= -# Imports -# ============================================================================= - -import logging -from typing import Dict - -from cardinal_pythonlib.sqlalchemy.dialect import get_dialect_name -from sqlalchemy.dialects.mysql import insert as insert_mysql -from sqlalchemy.engine.interfaces import Dialect -from sqlalchemy.schema import Table -from sqlalchemy.orm.session import Session -from sqlalchemy.sql.expression import Insert - -log = logging.getLogger(__name__) - - -# ============================================================================= -# INSERT ... ON DUPLICATE KEY UPDATE -# ============================================================================= - - -def insert_with_upsert_if_supported( - table: Table, - values: Dict, - session: Session = None, - dialect: Dialect = None, -) -> Insert: - """ - Creates an "upsert" (INSERT ... ON DUPLICATE KEY UPDATE) statment if - possible (e.g. MySQL/MariaDB). Failing that, returns an INSERT statement. - - Args: - table: - SQLAlchemy Table in which to insert values. - values: - Values to insert (column: value dictionary). - session: - Session from which to extract a dialect. - dialect: - Explicit dialect. - - Previously (prior to 2025-01-05 and prior to SQLAlchemy 2), we did this: - - .. code-block:: python - - q = sqla_table.insert_on_duplicate().values(destvalues) - - This "insert_on_duplicate" member was available because - crate_anon/anonymise/config.py ran monkeypatch_TableClause(), from - cardinal_pythonlib.sqlalchemy.insert_on_duplicate. The function did dialect - detection via "@compiles(InsertOnDuplicate, SqlaDialectName.MYSQL)". But - it did nasty text-based hacking to get the column names. - - However, SQLAlchemy now supports "upsert" for MySQL: - https://docs.sqlalchemy.org/en/20/dialects/mysql.html#insert-on-duplicate-key-update-upsert - - Note the varying argument forms possible. - - The only other question: if the dialect is not MySQL, will the reference to - insert_stmt.on_duplicate_key_update crash or just not do anything? To test: - - .. code-block:: python - - from sqlalchemy import table - t = table("tablename") - destvalues = {"a": 1} - - insert_stmt = t.insert().values(destvalues) - on_dup_key_stmt = insert_stmt.on_duplicate_key_update(destvalues) - - This does indeed crash (AttributeError: 'Insert' object has no attribute - 'on_duplicate_key_update'). In contrast, this works: - - .. code-block:: python - - from sqlalchemy.dialects.mysql import insert as insert_mysql - - insert2 = insert_mysql(t).values(destvalues) - on_dup_key2 = insert2.on_duplicate_key_update(destvalues) - - Note also that an insert() statement doesn't gain a - "on_duplicate_key_update" attribute just because MySQL is used (the insert - statement doesn't know that yet). - - The old way was good for dialect detection but ugly for textual analysis of - the query. The new way is more elegant in the query, but less for dialect - detection. Overall, new way likely preferable. - - """ - if bool(session) + bool(dialect) != 1: - raise ValueError( - f"Must specify exactly one of: {session=}, {dialect=}" - ) - dialect_name = get_dialect_name(dialect or session) - if dialect_name == "mysql": - return ( - insert_mysql(table).values(values).on_duplicate_key_update(values) - ) - else: - return table.insert().values(values) diff --git a/crate_anon/common/tests/sqlalchemy_tests.py b/crate_anon/common/tests/sqlalchemy_tests.py deleted file mode 100644 index aa52f0bb..00000000 --- a/crate_anon/common/tests/sqlalchemy_tests.py +++ /dev/null @@ -1,93 +0,0 @@ -""" -crate_anon/common/tests/sqlalchemy_tests.py - -=============================================================================== - - Copyright (C) 2015, University of Cambridge, Department of Psychiatry. - Created by Rudolf Cardinal (rnc1001@cam.ac.uk). - - This file is part of CRATE. - - CRATE is free software: you can redistribute it and/or modify - it under the terms of the GNU General Public License as published by - the Free Software Foundation, either version 3 of the License, or - (at your option) any later version. - - CRATE is distributed in the hope that it will be useful, - but WITHOUT ANY WARRANTY; without even the implied warranty of - MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - GNU General Public License for more details. - - You should have received a copy of the GNU General Public License - along with CRATE. If not, see . - -=============================================================================== - -Unit testing. - -""" - -# ============================================================================= -# Imports -# ============================================================================= - -import logging -from unittest import TestCase - -from sqlalchemy import Column, String, Integer, create_engine -from sqlalchemy.dialects.mysql.base import MySQLDialect -from sqlalchemy.orm import declarative_base -from sqlalchemy.orm.session import Session -from sqlalchemy.exc import IntegrityError - -from crate_anon.common.sqlalchemy import insert_with_upsert_if_supported - -log = logging.getLogger(__name__) - - -# ============================================================================= -# Unit tests -# ============================================================================= - - -class SqlAlchemyTests(TestCase): - - def test_insert_with_upsert_if_supported_syntax(self) -> None: - # noinspection PyPep8Naming - Base = declarative_base() - - class OrmObject(Base): - __tablename__ = "sometable" - id = Column(Integer, primary_key=True) - name = Column(String) - - sqlite_engine = create_engine("sqlite://", echo=True, future=True) - Base.metadata.create_all(sqlite_engine) - - session = Session(sqlite_engine) - - d1 = dict(id=1, name="One") - d2 = dict(id=2, name="Two") - - table = OrmObject.__table__ - - insert_1 = table.insert().values(d1) - insert_2 = table.insert().values(d2) - session.execute(insert_1) - session.execute(insert_2) - with self.assertRaises(IntegrityError): - session.execute(insert_1) - - upsert_1 = insert_with_upsert_if_supported( - table=table, values=d1, session=session - ) - odku = "ON DUPLICATE KEY UPDATE" - self.assertFalse(odku in str(upsert_1)) - - upsert_2 = insert_with_upsert_if_supported( - table=table, values=d1, dialect=MySQLDialect() - ) - self.assertTrue(odku in str(upsert_2)) - - # We can't test fully here without a MySQL connection. - # But syntax tested separately in upsert_test_1.sql diff --git a/crate_anon/common/tests/upsert_test_1.sql b/crate_anon/common/tests/upsert_test_1.sql index cf6151a1..e69de29b 100644 --- a/crate_anon/common/tests/upsert_test_1.sql +++ b/crate_anon/common/tests/upsert_test_1.sql @@ -1,15 +0,0 @@ --- For MySQL: - -USE crud; -- pre-existing database - -CREATE TABLE ut (a INTEGER PRIMARY KEY, b INTEGER); - -INSERT INTO ut (a, b) VALUES (1, 101); -- OK -INSERT INTO ut (a, b) VALUES (2, 102); -- OK - -INSERT INTO ut (a, b) VALUES (1, 101); -- fails; duplicate key - -INSERT INTO ut (a, b) VALUES (1, 101) ON DUPLICATE KEY UPDATE a = 1, b = 103; - -- succeeds and changes only one row - -SELECT * FROM ut; diff --git a/crate_anon/preprocess/preprocess_pcmis.py b/crate_anon/preprocess/preprocess_pcmis.py index 9e72ee2f..dd9a4dda 100755 --- a/crate_anon/preprocess/preprocess_pcmis.py +++ b/crate_anon/preprocess/preprocess_pcmis.py @@ -860,9 +860,7 @@ def process_table( # --------------------------------------------------------------------- # SQL Server requires Table-bound columns in order to generate DDL: if adding_crate_pk: - crate_pk_col = make_bigint_autoincrement_column( - CRATE_COL_PK, engine.dialect - ) + crate_pk_col = make_bigint_autoincrement_column(CRATE_COL_PK) table.append_column(crate_pk_col) add_columns(engine, table, [crate_pk_col]) ensure_columns_present( diff --git a/crate_anon/preprocess/preprocess_rio.py b/crate_anon/preprocess/preprocess_rio.py index 238e09f9..c0a76e94 100755 --- a/crate_anon/preprocess/preprocess_rio.py +++ b/crate_anon/preprocess/preprocess_rio.py @@ -584,9 +584,7 @@ def process_patient_table( # ... can't do NOT NULL; need to populate it required_cols.append(rio_pk) else: # RCEP type, or no PK in RiO - crate_pk_col = make_bigint_autoincrement_column( - CRATE_COL_PK, engine.dialect - ) + crate_pk_col = make_bigint_autoincrement_column(CRATE_COL_PK) # ... autopopulates crate_rio_number_col = Column( CRATE_COL_RIO_NUMBER, BigInteger, nullable=True @@ -693,9 +691,7 @@ def process_nonpatient_table( if other_pk_col: # table has a primary key already crate_pk_col = Column(CRATE_COL_PK, BigInteger, nullable=True) else: - crate_pk_col = make_bigint_autoincrement_column( - CRATE_COL_PK, engine.dialect - ) + crate_pk_col = make_bigint_autoincrement_column(CRATE_COL_PK) table.append_column(crate_pk_col) # must be Table-bound, as above add_columns(engine, table, [crate_pk_col]) if not configoptions.print_sql_only: diff --git a/crate_anon/preprocess/preprocess_systmone.py b/crate_anon/preprocess/preprocess_systmone.py index 76b9e348..d35fc931 100755 --- a/crate_anon/preprocess/preprocess_systmone.py +++ b/crate_anon/preprocess/preprocess_systmone.py @@ -266,9 +266,7 @@ def preprocess_systmone( # Create step #1 if not drop_danger_drop and table_needs_pk: - crate_pk_col = make_bigint_autoincrement_column( - CRATE_COL_PK, engine.dialect - ) + crate_pk_col = make_bigint_autoincrement_column(CRATE_COL_PK) # SQL Server requires Table-bound columns in order to generate DDL: table.append_column(crate_pk_col) add_columns(engine, table, [crate_pk_col]) diff --git a/devnotes/2025_01_sqlalchemy2.txt b/devnotes/2025_01_sqlalchemy2.txt index 501d263f..7859e5b0 100644 --- a/devnotes/2025_01_sqlalchemy2.txt +++ b/devnotes/2025_01_sqlalchemy2.txt @@ -156,6 +156,5 @@ Migration to 2.0 Step Seven - Test against a SQLAlchemy 2.0 Release ~~~ -todo: CURRENTLY ON: cardinal_pythonlib / pytest -k orm_query_tests --log-cli-level INFO - -todo: *** also # noqa \n +todo: CURRENTLY ON: + STEP THREE TESTS diff --git a/setup.py b/setup.py index 5abcbc3c..843dee79 100755 --- a/setup.py +++ b/setup.py @@ -69,7 +69,7 @@ "appdirs==1.4.4", # where to store some temporary data "arrow==0.15.7", # [pin exact version from cardinal_pythonlib] "beautifulsoup4==4.9.1", # [pin exact version from cardinal_pythonlib] - "cardinal_pythonlib==1.1.28", # RNC libraries + "cardinal_pythonlib==2.0.0", # RNC libraries "cairosvg==2.7.0", # work with SVG files "celery==5.2.7", # back-end scheduling "chardet==3.0.4", # character encoding detection for cardinal_pythonlib