diff --git a/.github/workflows/data/clickhouse/matrix.yml b/.github/workflows/data/clickhouse/matrix.yml index 9c8c558ba..15c6bffd5 100644 --- a/.github/workflows/data/clickhouse/matrix.yml +++ b/.github/workflows/data/clickhouse/matrix.yml @@ -25,16 +25,16 @@ matrix: clickhouse-version: 23.6.1-alpine <<: *max full: - # the lowest supported Clickhouse version by JDBC driver + # Clickhouse version with proper DateTime > DateTime64 comparison - clickhouse-image: yandex/clickhouse-server - clickhouse-version: '20.7' + clickhouse-version: '21.1' <<: *min - clickhouse-image: clickhouse/clickhouse-server clickhouse-version: 23.6.1-alpine <<: *max nightly: - clickhouse-image: yandex/clickhouse-server - clickhouse-version: '20.7' + clickhouse-version: '21.1' <<: *min - clickhouse-image: clickhouse/clickhouse-server clickhouse-version: latest-alpine diff --git a/docs/changelog/next_release/267.breaking.rst b/docs/changelog/next_release/267.breaking.rst new file mode 100644 index 000000000..5ce301393 --- /dev/null +++ b/docs/changelog/next_release/267.breaking.rst @@ -0,0 +1,26 @@ +Serialize DateTimeHWM to Clickhouse's ``DateTime64(6)`` (precision up to microseconds) instead of ``DateTime`` (precision up to seconds). + +For Clickhouse below 21.1 comparing column of type ``DateTime`` with a value of type ``DateTime64`` was not supported, returning an empty dataframe. +To avoid this, replace: + +.. code:: python + + DBReader( + ..., + hwm=DBReader.AutoDetectHWM( + name="my_hwm", + expression="hwm_column", # <-- + ), + ) + +with: + +.. code:: python + + DBReader( + ..., + hwm=DBReader.AutoDetectHWM( + name="my_hwm", + expression="CAST(hwm_column AS DateTime64)", # <-- + ), + ) diff --git a/docs/connection/db_connection/clickhouse/prerequisites.rst b/docs/connection/db_connection/clickhouse/prerequisites.rst index 654add047..f7ade0341 100644 --- a/docs/connection/db_connection/clickhouse/prerequisites.rst +++ b/docs/connection/db_connection/clickhouse/prerequisites.rst @@ -6,7 +6,7 @@ Prerequisites Version Compatibility --------------------- -* Clickhouse server versions: 20.7 or higher +* Clickhouse server versions: 21.1 or higher * Spark versions: 2.3.x - 3.5.x * Java versions: 8 - 20 diff --git a/onetl/connection/db_connection/clickhouse/dialect.py b/onetl/connection/db_connection/clickhouse/dialect.py index 187b2e787..2c03620d3 100644 --- a/onetl/connection/db_connection/clickhouse/dialect.py +++ b/onetl/connection/db_connection/clickhouse/dialect.py @@ -26,9 +26,11 @@ def get_min_value(self, value: Any) -> str: return f"minOrNull({result})" def _serialize_datetime(self, value: datetime) -> str: - result = value.strftime("%Y-%m-%d %H:%M:%S") - return f"CAST('{result}' AS DateTime)" + # this requires at least Clickhouse 21.1, see: + # https://github.com/ClickHouse/ClickHouse/issues/16655 + result = value.strftime("%Y-%m-%d %H:%M:%S.%f") + return f"toDateTime64('{result}', 6)" def _serialize_date(self, value: date) -> str: result = value.strftime("%Y-%m-%d") - return f"CAST('{result}' AS Date)" + return f"toDate('{result}')" diff --git a/tests/fixtures/processing/clickhouse.py b/tests/fixtures/processing/clickhouse.py index 2b3e4cec1..bf0b2f3e7 100644 --- a/tests/fixtures/processing/clickhouse.py +++ b/tests/fixtures/processing/clickhouse.py @@ -20,7 +20,7 @@ class ClickhouseProcessing(BaseProcessing): "text_string": "String", "hwm_int": "Int32", "hwm_date": "Date", - "hwm_datetime": "DateTime", + "hwm_datetime": "DateTime64(6)", "float_value": "Float32", } diff --git a/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_clickhouse.py b/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_clickhouse.py index 77ec071b3..67e67b065 100644 --- a/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_clickhouse.py +++ b/tests/tests_integration/tests_strategy_integration/tests_incremental_strategy_integration/test_strategy_increment_clickhouse.py @@ -302,12 +302,37 @@ def test_clickhouse_strategy_incremental_explicit_hwm_type( ColumnDateHWM, lambda x: x.isoformat(), ), + pytest.param( + "hwm_date", + "CAST(text_string AS Date32)", + ColumnDateHWM, + lambda x: x.isoformat(), + marks=pytest.mark.xfail(reason="Date32 type was added in ClickHouse 21.9"), + ), ( "hwm_datetime", "CAST(text_string AS DateTime)", ColumnDateTimeHWM, lambda x: x.isoformat(), ), + ( + "hwm_datetime", + "CAST(text_string AS DateTime64)", + ColumnDateTimeHWM, + lambda x: x.isoformat(), + ), + ( + "hwm_datetime", + "CAST(text_string AS DateTime64(3))", + ColumnDateTimeHWM, + lambda x: x.isoformat(), + ), + ( + "hwm_datetime", + "CAST(text_string AS DateTime64(6))", + ColumnDateTimeHWM, + lambda x: x.isoformat(), + ), ], ) def test_clickhouse_strategy_incremental_with_hwm_expr(