diff --git a/.github/workflows/ci.yaml b/.github/workflows/ci.yaml index bf10bac..9a1d142 100644 --- a/.github/workflows/ci.yaml +++ b/.github/workflows/ci.yaml @@ -9,7 +9,7 @@ on: paths: - .github/workflows/** - src/** - - setup.cfg + - dev/** - tox.ini jobs: @@ -29,7 +29,6 @@ jobs: - "3.11" - "3.10" - "3.9" - - "3.8" steps: - name: Check out repository code uses: actions/checkout@v4 diff --git a/CHANGELOG.md b/CHANGELOG.md new file mode 100644 index 0000000..67b0cfa --- /dev/null +++ b/CHANGELOG.md @@ -0,0 +1,29 @@ +# Changelog + +## [v0.8.0](https://github.com/zifter/clickhouse-migrations/tree/v0.8.0) (2024-08-18) + +[Full Changelog](https://github.com/zifter/clickhouse-migrations/compare/v0.7.1...v0.8.0) + +**What's Changed:** +- Add option --fake/--no-fake, which can help update schema_version without executing statements from migration files #27. Done by @zifter in https://github.com/zifter/clickhouse-migrations/pull/28 +- Add option --migrations which can help to specify explicitly migrations to apply. Done by @zifter in https://github.com/zifter/clickhouse-migrations/pull/28 + +**Breaking changes:** +- Drop python 3.8 support. Done by @zifter in https://github.com/zifter/clickhouse-migrations/pull/28 +- Option --multi-statement, --dry-run, --secure now working without passing value. Just use --multi-statement/--no-multi-statement, --dry-run/--no-dry-run, --secure/--no-secure for enabling or disabling option. Done by @zifter in https://github.com/zifter/clickhouse-migrations/pull/28 + + +## [v0.7.1](https://github.com/zifter/clickhouse-migrations/tree/v0.7.1) (2024-07-01) + +[Full Changelog](https://github.com/zifter/clickhouse-migrations/compare/v0.7.0...v0.7.1) + +**What's Changed:** +- Allow default db name #24. Done by @zifter in https://github.com/zifter/clickhouse-migrations/pull/26 + + +## [v0.7.0](https://github.com/zifter/clickhouse-migrations/tree/v0.7.0) (2024-07-01) + +[Full Changelog](https://github.com/zifter/clickhouse-migrations/compare/v0.6.0...v0.7.0) + +**What's Changed:** +- #24 Allow connection string for initialization of ClickhouseCluster. Done by @zifter in https://github.com/zifter/clickhouse-migrations/pull/25 diff --git a/README.md b/README.md index 12a403c..df92c25 100644 --- a/README.md +++ b/README.md @@ -50,17 +50,20 @@ cluster = ClickhouseCluster(db_host, db_user, db_password) cluster.migrate(db_name, migrations_home, cluster_name=None,create_db_if_no_exists=True, multi_statement=True) ``` -Parameter | Description | Default --------|-------------------------------------------------------------------|--------- -db_host | Clickhouse database hostname | localhost -db_port | Clickhouse database port | 9000 -db_user | Clickhouse user | default -db_password | Clichouse password | default -db_name| Clickhouse database name | None -migrations_home | Path to list of migration files | -cluster_name | Name of Clickhouse topology cluster from | None -create_db_if_no_exists | If the `db_name` is not present, enabling this will create the db | True -multi_statement | Allow multiple statements in migration files | True +Parameter | Description | Default +-------|-----------------------------------------------------------------------------------------------------|--------- +db_host | Clickhouse database hostname | localhost +db_port | Clickhouse database port | 9000 +db_user | Clickhouse user | default +db_password | Clichouse password | default +db_name| Clickhouse database name | None +migration_path | Path to list of migration files | +migrations | Explicit list of migrations to apply | [] +cluster_name | Name of Clickhouse topology cluster from | None +create_db_if_no_exists | If the `db_name` is not present, enabling this will create the db | True +multi_statement | Allow multiple statements in migration files | True +secure | Use secure connection | False +fake | Marks the migrations as applied but without actually running the SQL to change your database schema | False ### Notes The Clickhouse driver does not natively support executing multipe statements in a single query. diff --git a/pyproject.toml b/pyproject.toml index 3ea730b..b754b59 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,9 @@ authors = [ urls.Homepage = "https://github.com/zifter/clickhouse-migrations" urls.Source = "https://github.com/zifter/clickhouse-migrations" urls.Tracker = "https://github.com/zifter/clickhouse-migrations/issues" -requires-python = ">=3.7, <4" +urls.Changelog = "https://github.com/zifter/clickhouse-migrations/blob/main/CHANGELOG.md" + +requires-python = ">=3.9, <4" keywords = [ "clickhouse", "migrations", @@ -17,7 +19,6 @@ license = {text = "MIT"} classifiers = [ "Intended Audience :: Developers", "Programming Language :: Python", - "Programming Language :: Python :: 3.8", "Programming Language :: Python :: 3.9", "Programming Language :: Python :: 3.10", "Programming Language :: Python :: 3.11", diff --git a/src/clickhouse_migrations/clickhouse_cluster.py b/src/clickhouse_migrations/clickhouse_cluster.py index bd27b0b..f439513 100644 --- a/src/clickhouse_migrations/clickhouse_cluster.py +++ b/src/clickhouse_migrations/clickhouse_cluster.py @@ -1,5 +1,5 @@ from pathlib import Path -from typing import List, Optional +from typing import List, Optional, Union from clickhouse_driver import Client @@ -87,16 +87,18 @@ def show_tables(self, db_name): def migrate( self, db_name: Optional[str], - migration_path: Path, + migration_path: Union[Path, str], cluster_name: Optional[str] = None, create_db_if_no_exists: bool = True, multi_statement: bool = True, dryrun: bool = False, + explicit_migrations: Optional[List[str]] = None, + fake: bool = False, ): db_name = db_name if db_name is not None else self.default_db_name storage = MigrationStorage(migration_path) - migrations = storage.migrations() + migrations = storage.migrations(explicit_migrations) return self.apply_migrations( db_name, @@ -105,6 +107,7 @@ def migrate( create_db_if_no_exists=create_db_if_no_exists, multi_statement=multi_statement, dryrun=dryrun, + fake=fake, ) def apply_migrations( @@ -115,6 +118,7 @@ def apply_migrations( cluster_name: Optional[str] = None, create_db_if_no_exists: bool = True, multi_statement: bool = True, + fake: bool = False, ) -> List[Migration]: if create_db_if_no_exists: if cluster_name is None: @@ -125,4 +129,4 @@ def apply_migrations( with self.connection(db_name) as conn: migrator = Migrator(conn, dryrun) migrator.init_schema(cluster_name) - return migrator.apply_migration(migrations, multi_statement) + return migrator.apply_migration(migrations, multi_statement, fake=fake) diff --git a/src/clickhouse_migrations/command_line.py b/src/clickhouse_migrations/command_line.py index 78c5228..5f10666 100644 --- a/src/clickhouse_migrations/command_line.py +++ b/src/clickhouse_migrations/command_line.py @@ -1,8 +1,10 @@ +import argparse import logging import os import sys from argparse import ArgumentParser from pathlib import Path +from typing import List from clickhouse_migrations.clickhouse_cluster import ClickhouseCluster from clickhouse_migrations.defaults import ( @@ -12,6 +14,8 @@ DB_USER, MIGRATIONS_DIR, ) +from clickhouse_migrations.migration import Migration +from clickhouse_migrations.migrator import Migrator def log_level(value: str) -> str: @@ -35,6 +39,7 @@ def get_context(args): parser = ArgumentParser() parser.register("type", bool, cast_to_bool) + default_migrations = os.environ.get("MIGRATIONS", "") # detect configuration parser.add_argument( "--db-url", @@ -74,8 +79,9 @@ def get_context(args): ) parser.add_argument( "--multi-statement", - default=os.environ.get("MULTI_STATEMENT", "1"), + default=cast_to_bool(os.environ.get("MULTI_STATEMENT", "1")), type=bool, + action=argparse.BooleanOptionalAction, help="Path to list of migration files", ) parser.add_argument( @@ -91,24 +97,40 @@ def get_context(args): ) parser.add_argument( "--dry-run", - default=os.environ.get("DRY_RUN", "0"), + default=cast_to_bool(os.environ.get("DRY_RUN", "0")), type=bool, + action=argparse.BooleanOptionalAction, help="Dry run mode", ) + parser.add_argument( + "--fake", + default=cast_to_bool(os.environ.get("FAKE", "0")), + type=bool, + action=argparse.BooleanOptionalAction, + help="Marks the migrations as applied, " + "but without actually running the SQL to change your database schema.", + ) + parser.add_argument( + "--migrations", + default=default_migrations.split(",") if default_migrations else [], + type=str, + nargs="+", + help="Explicit list of migrations to apply. " + "Specify file name, file stem or migration version like 001_init.sql, 002_test2, 003, 4", + ) parser.add_argument( "--secure", - default=os.environ.get("SECURE", "0"), + default=cast_to_bool(os.environ.get("SECURE", "0")), type=bool, - help="Secure connection", + action=argparse.BooleanOptionalAction, + help="Use secure connection", ) return parser.parse_args(args) -def migrate(ctx) -> int: - logging.basicConfig(level=ctx.log_level, style="{", format="{levelname}:{message}") - - cluster = ClickhouseCluster( +def create_cluster(ctx) -> ClickhouseCluster: + return ClickhouseCluster( db_host=ctx.db_host, db_port=ctx.db_port, db_user=ctx.db_user, @@ -116,15 +138,34 @@ def migrate(ctx) -> int: db_url=ctx.db_url, secure=ctx.secure, ) - cluster.migrate( + + +def do_migrate(cluster, ctx) -> List[Migration]: + return cluster.migrate( db_name=ctx.db_name, migration_path=ctx.migrations_dir, + explicit_migrations=ctx.migrations, cluster_name=ctx.cluster_name, multi_statement=ctx.multi_statement, dryrun=ctx.dry_run, + fake=ctx.fake, ) - return 0 + + +def do_query_applied_migrations(cluster, ctx) -> List[Migration]: + with cluster.connection(ctx.db_name) as conn: + migrator = Migrator(conn, True) + return migrator.query_applied_migrations() + + +def migrate(ctx) -> List[Migration]: + logging.basicConfig(level=ctx.log_level, style="{", format="{levelname}:{message}") + + cluster = create_cluster(ctx) + migrations = do_migrate(cluster, ctx) + return migrations def main() -> int: - return migrate(get_context(sys.argv[1:])) # pragma: no cover + migrate(get_context(sys.argv[1:])) # pragma: no cover + return 0 # pragma: no cover diff --git a/src/clickhouse_migrations/migration.py b/src/clickhouse_migrations/migration.py index a3b39be..1336e69 100644 --- a/src/clickhouse_migrations/migration.py +++ b/src/clickhouse_migrations/migration.py @@ -2,14 +2,14 @@ import os from collections import namedtuple from pathlib import Path -from typing import List +from typing import List, Optional, Union Migration = namedtuple("Migration", ["version", "md5", "script"]) class MigrationStorage: - def __init__(self, storage_dir: Path): - self.storage_dir: Path = storage_dir + def __init__(self, storage_dir: Union[Path, str]): + self.storage_dir: Path = Path(storage_dir) def filenames(self) -> List[Path]: l: List[Path] = [] @@ -19,17 +19,28 @@ def filenames(self) -> List[Path]: return l - def migrations(self) -> List[Migration]: + def migrations( + self, explicit_migrations: Optional[List[str]] = None + ) -> List[Migration]: migrations: List[Migration] = [] for full_path in self.filenames(): + version_string = full_path.name.split("_")[0] + version_number = int(version_string) migration = Migration( - version=int(full_path.name.split("_")[0]), + version=version_number, script=str(full_path.read_text(encoding="utf8")), md5=hashlib.md5(full_path.read_bytes()).hexdigest(), ) - migrations.append(migration) + if ( + not explicit_migrations + or full_path.name in explicit_migrations + or full_path.stem in explicit_migrations + or version_string in explicit_migrations + or str(version_number) in explicit_migrations + ): + migrations.append(migration) migrations.sort(key=lambda m: m.version) diff --git a/src/clickhouse_migrations/migrator.py b/src/clickhouse_migrations/migrator.py index d764c0f..06b58df 100644 --- a/src/clickhouse_migrations/migrator.py +++ b/src/clickhouse_migrations/migrator.py @@ -31,11 +31,14 @@ def init_schema(self, cluster_name: Optional[str] = None): self._execute(single_schema if cluster_name is None else cluster_schema) def query_applied_migrations(self) -> List[Migration]: + self.optimize_schema_table() + query = """SELECT version, script, md5 - FROM schema_versions""" + FROM schema_versions + ORDER BY version""" result = self._execute(query, with_column_types=True) column_names = [c[0] for c in result[len(result) - 1]] @@ -90,29 +93,61 @@ def migrations_to_apply(self, incoming: List[Migration]) -> List[Migration]: return sorted(to_apply, key=lambda x: x.version) def apply_migration( - self, migrations: List[Migration], multi_statement: bool + self, + migrations: List[Migration], + multi_statement: bool, + fake: bool = False, ) -> List[Migration]: - new_migrations = self.migrations_to_apply(migrations) + migrations_to_process = ( + migrations if fake else self.migrations_to_apply(migrations) + ) - logging.info("Total migrations to apply: %d", len(new_migrations)) + logging.info("Total migrations to apply: %d", len(migrations_to_process)) - if not new_migrations: + if not migrations_to_process: return [] - for migration in new_migrations: + for migration in migrations_to_process: logging.info("Execute migration %s", migration) statements = self.script_to_statements(migration.script, multi_statement) logging.info("Migration contains %s statements to apply", len(statements)) for statement in statements: - if not self._dryrun: - self._execute(statement) - else: + if fake: + logging.warning( + "Fake mode, statement will be skipped: %s", statement + ) + elif self._dryrun: logging.info("Dry run mode, would have executed: %s", statement) + else: + self._execute(statement) logging.info("Migration applied, need to update schema version table.") - if not self._dryrun: + if fake: + logging.debug("update schema versions because fake option is enabled") + self._execute( + "ALTER TABLE schema_versions DELETE WHERE version = %(version)s;", + { + "version": migration.version, + }, + ) + self._execute( + "INSERT INTO schema_versions(version, script, md5) VALUES", + [ + { + "version": migration.version, + "script": migration.script, + "md5": migration.md5, + } + ], + ) + elif self._dryrun: + logging.debug( + "Skip updating schema versions because dry run is enabled" + ) + else: + logging.debug("Insert new schemas") self._execute( "INSERT INTO schema_versions(version, script, md5) VALUES", [ @@ -126,7 +161,10 @@ def apply_migration( logging.info("Migration is fully applied.") - return new_migrations + return migrations_to_process + + def optimize_schema_table(self): + self._execute("OPTIMIZE TABLE schema_versions FINAL;") def _execute(self, statement, *args, **kwargs): logging.debug(statement) diff --git a/src/tests/complex_migrations_changed/001_init.sql b/src/tests/complex_migrations_changed/001_init.sql new file mode 100644 index 0000000..4846729 --- /dev/null +++ b/src/tests/complex_migrations_changed/001_init.sql @@ -0,0 +1,3 @@ +-- Add some comments for because wtf +CREATE TABLE sample11(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() +ORDER BY tuple(); diff --git a/src/tests/complex_migrations_changed/002_test2.sql b/src/tests/complex_migrations_changed/002_test2.sql new file mode 100644 index 0000000..af364bc --- /dev/null +++ b/src/tests/complex_migrations_changed/002_test2.sql @@ -0,0 +1,7 @@ +-- Add some comments for because wtf + +CREATE TABLE sample21(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() +ORDER BY tuple(); + +CREATE TABLE sample22(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() +ORDER BY tuple(); \ No newline at end of file diff --git a/src/tests/complex_migrations_changed/003_third_test.sql b/src/tests/complex_migrations_changed/003_third_test.sql new file mode 100644 index 0000000..a1f2369 --- /dev/null +++ b/src/tests/complex_migrations_changed/003_third_test.sql @@ -0,0 +1,10 @@ +-- Add some comments for because wtf + +CREATE TABLE sample31(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() +ORDER BY tuple(); + +CREATE TABLE sample32(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() +ORDER BY tuple(); + +CREATE TABLE sample33(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() +ORDER BY tuple() \ No newline at end of file diff --git a/src/tests/complex_migrations_changed/010_migrations_is_not_in_row.sql b/src/tests/complex_migrations_changed/010_migrations_is_not_in_row.sql new file mode 100644 index 0000000..4a15a02 --- /dev/null +++ b/src/tests/complex_migrations_changed/010_migrations_is_not_in_row.sql @@ -0,0 +1,4 @@ +-- Add some comments for because wtf + +CREATE TABLE sample101(id UInt32, name String) ENGINE MergeTree PARTITION BY tuple() +ORDER BY tuple() \ No newline at end of file diff --git a/src/tests/conftest.py b/src/tests/conftest.py index 3cdb607..43313ec 100644 --- a/src/tests/conftest.py +++ b/src/tests/conftest.py @@ -4,7 +4,7 @@ @pytest.fixture -def cluster(): +def cluster() -> ClickhouseCluster: return ClickhouseCluster(db_host="localhost", db_user="default", db_password="") diff --git a/src/tests/test_clickhouse_migration.py b/src/tests/test_clickhouse_migration.py index cc56a3e..3701d2c 100644 --- a/src/tests/test_clickhouse_migration.py +++ b/src/tests/test_clickhouse_migration.py @@ -1,24 +1,32 @@ import tempfile from pathlib import Path +from time import sleep import pytest from clickhouse_driver.errors import ServerException -from clickhouse_migrations.command_line import get_context, migrate +from clickhouse_migrations.clickhouse_cluster import ClickhouseCluster +from clickhouse_migrations.command_line import ( + create_cluster, + do_migrate, + do_query_applied_migrations, + get_context, + migrate, +) from clickhouse_migrations.exceptions import MigrationException from clickhouse_migrations.migration import Migration TESTS_DIR = Path(__file__).parent -def test_empty_list_of_migrations_ok(cluster): +def test_empty_list_of_migrations_ok(cluster: ClickhouseCluster): with tempfile.TemporaryDirectory("empty_dir") as temp_dir: applied = cluster.migrate("pytest", temp_dir) assert len(applied) == 0 -def test_deleted_migrations_exception(cluster): +def test_deleted_migrations_exception(cluster: ClickhouseCluster): cluster.init_schema("pytest") with cluster.connection("pytest") as conn: @@ -31,7 +39,7 @@ def test_deleted_migrations_exception(cluster): cluster.apply_migrations("pytest", []) -def test_missing_migration_exception(cluster): +def test_missing_migration_exception(cluster: ClickhouseCluster): cluster.init_schema("pytest") with cluster.connection("pytest") as conn: @@ -48,7 +56,7 @@ def test_missing_migration_exception(cluster): cluster.apply_migrations("pytest", migrations) -def test_modified_committed_migrations_exception(cluster): +def test_modified_committed_migrations_exception(cluster: ClickhouseCluster): cluster.init_schema("pytest") with cluster.connection("pytest") as conn: @@ -65,7 +73,7 @@ def test_modified_committed_migrations_exception(cluster): cluster.apply_migrations("pytest", migrations) -def test_apply_new_migration_ok(cluster): +def test_apply_new_migration_ok(cluster: ClickhouseCluster): cluster.init_schema("pytest") with cluster.connection("pytest") as conn: @@ -84,7 +92,7 @@ def test_apply_new_migration_ok(cluster): assert results[0] == migrations[-1] -def test_apply_two_new_migration_ok(cluster): +def test_apply_two_new_migration_ok(cluster: ClickhouseCluster): cluster.init_schema("pytest") with cluster.connection("pytest") as conn: @@ -113,7 +121,7 @@ def test_apply_two_new_migration_ok(cluster): assert results[2] == migrations[-1] -def test_should_migrate_empty_database(cluster): +def test_should_migrate_empty_database(cluster: ClickhouseCluster): cluster.create_db("pytest") tables = cluster.show_tables("pytest") @@ -127,7 +135,7 @@ def test_should_migrate_empty_database(cluster): assert tables[1] == "schema_versions" -def test_migrations_folder_is_empty_ok(cluster): +def test_migrations_folder_is_empty_ok(cluster: ClickhouseCluster): with tempfile.TemporaryDirectory("empty_dir") as temp_dir: cluster.migrate("pytest", temp_dir) @@ -160,7 +168,7 @@ def test_main_pass_db_name_ok(): def test_main_pass_db_url_ok(): - migrate( + migrations = migrate( get_context( [ "--db-url", @@ -170,14 +178,93 @@ def test_main_pass_db_url_ok(): ] ) ) + assert len(migrations) == 1 + + +def test_check_explicit_migrations_1_ok(): + migrations = migrate( + get_context( + [ + "--db-url", + "clickhouse://default:@localhost:9000/pytest", + "--migrations-dir", + str(TESTS_DIR / "complex_migrations"), + "--migrations", + "001_init", + "002", + "3", + ] + ) + ) + assert len(migrations) == 3 + +def test_check_explicit_migrations_2_ok(): + migrations = migrate( + get_context( + [ + "--db-url", + "clickhouse://default:@localhost:9000/pytest", + "--migrations-dir", + str(TESTS_DIR / "complex_migrations"), + "--migrations", + "001_init.sql", + "2", + ] + ) + ) + assert len(migrations) == 2 + + +def test_fake_ok(): + # apply first migrations + ctx = get_context( + [ + "--db-url", + "clickhouse://default:@localhost:9000/pytest", + "--migrations-dir", + str(TESTS_DIR / "complex_migrations"), + ] + ) + cluster = create_cluster(ctx) + migrations = do_migrate(cluster, ctx) + applied_migrations = do_query_applied_migrations(cluster, ctx) + + assert len(migrations) == 4 + assert migrations == applied_migrations + + # just run the same but with fake flag + ctx = get_context( + [ + "--db-url", + "clickhouse://default:@localhost:9000/pytest", + "--migrations-dir", + str(TESTS_DIR / "complex_migrations"), + "--fake", + ] + ) + migrations = do_migrate(cluster, ctx) + applied_migrations = do_query_applied_migrations(cluster, ctx) + + assert len(migrations) == 4 + assert migrations == applied_migrations + + # run with changed md5 sum + ctx = get_context( + [ + "--db-url", + "clickhouse://default:@localhost:9000/pytest", + "--migrations-dir", + str(TESTS_DIR / "complex_migrations_changed"), + "--fake", + ] + ) + migrations = do_migrate(cluster, ctx) -def test_check_multistatement_arg(): - context = get_context(["--multi-statement", "false"]) - assert context.multi_statement is False + # because of async applying some changes, we need to wait a bit + sleep(1) - context = get_context(["--multi-statement", "True"]) - assert context.multi_statement is True + applied_migrations = do_query_applied_migrations(cluster, ctx) - context = get_context(["--multi-statement", "0"]) - assert context.multi_statement is False + assert len(migrations) == 4 + assert migrations == applied_migrations diff --git a/src/tests/test_command_line.py b/src/tests/test_command_line.py new file mode 100644 index 0000000..f3e3332 --- /dev/null +++ b/src/tests/test_command_line.py @@ -0,0 +1,34 @@ +from pathlib import Path + +from clickhouse_migrations.command_line import get_context + +TESTS_DIR = Path(__file__).parent + + +def test_check_multistatement_arg(): + context = get_context(["--multi-statement"]) + assert context.multi_statement is True + + context = get_context(["--no-multi-statement"]) + assert context.multi_statement is False + + +def test_check_explicit_migrations_args_ok(): + context = get_context(["--migrations", "001_init", "002_test2"]) + assert context.migrations == ["001_init", "002_test2"] + + +def test_check_fake_ok(): + context = get_context( + [ + "--fake", + ] + ) + assert context.fake is True + + context = get_context( + [ + "--no-fake", + ] + ) + assert context.fake is False diff --git a/tox.ini b/tox.ini index 5c1bff5..c828cc9 100644 --- a/tox.ini +++ b/tox.ini @@ -6,7 +6,7 @@ envlist = black-check flake8-check pylint-check - py{38,39,310,311,312} + py{39,310,311,312} coverage-report clean skip_missing_interpreters = True