From 624c150715cff9d31e0ea73446caf7769e16e943 Mon Sep 17 00:00:00 2001 From: Sergei Borodin Date: Tue, 10 Dec 2024 13:29:01 +0100 Subject: [PATCH] feat: BI-5954 BaseEntrySchemaMigration class (#735) * feat: BI-5954 BaseEntrySchemaMigration class * feat: BI-5954 BaseLinearEntrySchemaMigration class * feat: BI-5954 just one migration class * feat: BI-5954 migrations without ClassVar * feat: BI-5954 return new object as result --- .../us_manager/schema_migration/__init__.py | 0 .../us_manager/schema_migration/base.py | 93 +++++++++ .../unit/us_manager/test_migration.py | 188 ++++++++++++++++++ 3 files changed, 281 insertions(+) create mode 100644 lib/dl_core/dl_core/us_manager/schema_migration/__init__.py create mode 100644 lib/dl_core/dl_core/us_manager/schema_migration/base.py create mode 100644 lib/dl_core/dl_core_tests/unit/us_manager/test_migration.py diff --git a/lib/dl_core/dl_core/us_manager/schema_migration/__init__.py b/lib/dl_core/dl_core/us_manager/schema_migration/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/lib/dl_core/dl_core/us_manager/schema_migration/base.py b/lib/dl_core/dl_core/us_manager/schema_migration/base.py new file mode 100644 index 000000000..779c7551d --- /dev/null +++ b/lib/dl_core/dl_core/us_manager/schema_migration/base.py @@ -0,0 +1,93 @@ +from copy import deepcopy +from datetime import datetime +import logging +from typing import ( + Any, + Callable, +) + +import attr +from typing_extensions import Self + +from dl_api_commons.base_models import RequestContextInfo +from dl_app_tools.profiling_base import generic_profiler +from dl_core.services_registry import ServicesRegistry + + +LOGGER = logging.getLogger(__name__) + + +def isoformat_validator(instance: Any, attribute: attr.Attribute, value: str) -> None: + datetime.fromisoformat(value) + + +@attr.s +class Migration: + version: str = attr.ib(validator=isoformat_validator) + name: str = attr.ib() + function: Callable[[dict], dict] = attr.ib() + id: int = attr.ib(init=False) + + def __attrs_post_init__(self) -> None: + self.id = int(datetime.fromisoformat(self.version).timestamp()) + + def __lt__(self, other: Self) -> bool: + return self.id < other.id + + def migrate(self, entry: dict) -> dict: + entry = self.function(entry) + entry["data"]["schema_version"] = self.version + return entry + + +@attr.s +class BaseEntrySchemaMigration: + bi_context: RequestContextInfo | None = attr.ib(default=None) + services_registry: ServicesRegistry | None = attr.ib(default=None) + strict_migration: bool = attr.ib(default=False) + + @property + def migrations(self) -> list[Migration]: + return [] + + @property + def sorted_migrations(self) -> list[Migration]: + return sorted(self.migrations) + + @staticmethod + def _get_entry_schema_id(entry: dict) -> int: + entry_data = entry.get("data") + if not isinstance(entry_data, dict): + raise ValueError(f"Invalid entry: 'data' should be a dict, got {type(entry_data).__name__}") + + schema_version = entry_data.get("schema_version", "") + entry_schema_id = 1 + if schema_version != "1": + entry_schema_id = int(datetime.fromisoformat(schema_version).timestamp()) + return entry_schema_id + + def _migrate(self, entry: dict) -> dict: + seen_versions = set() + entry_schema_id = self._get_entry_schema_id(entry) + + for migration in self.sorted_migrations: + if migration.id <= entry_schema_id: + continue + if migration.version in seen_versions: + raise ValueError(f"Double migration detected for migration version: {migration.version}") + seen_versions.add(migration.version) + LOGGER.info(f"Apply migration ver={migration.version}, {migration.name}") + entry = migration.migrate(entry) + return entry + + @generic_profiler("migrate_entry") + def migrate(self, entry: dict) -> dict: + entry_copy = deepcopy(entry) + + try: + return self._migrate(entry_copy) + except Exception as exc: + if self.strict_migration: + raise exc + LOGGER.warning("Entry migration failed", exc_info=True) + return deepcopy(entry) diff --git a/lib/dl_core/dl_core_tests/unit/us_manager/test_migration.py b/lib/dl_core/dl_core_tests/unit/us_manager/test_migration.py new file mode 100644 index 000000000..37ac4d775 --- /dev/null +++ b/lib/dl_core/dl_core_tests/unit/us_manager/test_migration.py @@ -0,0 +1,188 @@ +from copy import deepcopy + +import attr +import pytest + +from dl_core.us_manager.schema_migration.base import ( + BaseEntrySchemaMigration, + Migration, +) + + +@attr.s +class Level1EntrySchemaMigration(BaseEntrySchemaMigration): + @property + def migrations(self) -> list[Migration]: + migrations = [ + Migration( + "2022-12-04 13:00:00", + "Second level 1 migration", + Level1EntrySchemaMigration._migrate_v2_to_v3, + ), + Migration( + "2022-12-01 12:00:00", + "First level 1 migration", + Level1EntrySchemaMigration._migrate_v1_to_v2, + ), + ] + migrations.extend(super().migrations) + return migrations + + @staticmethod + def _migrate_v1_to_v2(entry: dict) -> dict: + entry["data"]["new_field"] = entry["data"].pop("old_field", "default_value") + return entry + + @staticmethod + def _migrate_v2_to_v3(entry: dict) -> dict: + entry["data"]["l1_field"] = "added_in_l1" + return entry + + +@attr.s +class Level2EntrySchemaMigration(Level1EntrySchemaMigration): + @property + def migrations(self) -> list[Migration]: + migrations = [ + Migration( + "2022-12-03 13:00:00", + "Second level 2 migration", + Level2EntrySchemaMigration._migrate_v2_to_v3, + ), + Migration( + "2022-12-02 12:00:00", + "First level 2 migration", + Level2EntrySchemaMigration._migrate_v1_to_v2, + ), + ] + migrations.extend(super().migrations) + return migrations + + @staticmethod + def _migrate_v1_to_v2(entry: dict) -> dict: + entry["data"]["new_field"] = "default_value" + return entry + + @staticmethod + def _migrate_v2_to_v3(entry: dict) -> dict: + entry["data"]["l2_field"] = "added_in_l2" + return entry + + +@attr.s +class Level3EntrySchemaMigration(Level2EntrySchemaMigration): + @property + def migrations(self) -> list[Migration]: + migrations = [ + Migration("2022-12-03 13:00:00", "Third level 2 migration", Level3EntrySchemaMigration._migrate_v3_to_v1), + ] + migrations.extend(super().migrations) + return migrations + + @staticmethod + def _migrate_v3_to_v1(entry: dict) -> dict: + entry["data"]["abs_field"] = "one more new value" + return entry + + +@pytest.fixture +def l2_migrator(): + return Level2EntrySchemaMigration(strict_migration=True) + + +@pytest.fixture +def l3_migrator(): + return Level3EntrySchemaMigration(strict_migration=True) + + +@pytest.fixture +def l3_nonstrict_migrator(): + return Level3EntrySchemaMigration() + + +def test_successful_migration(l2_migrator): + entry = { + "data": { + "old_field": "value1", + "schema_version": "1", + } + } + expected = { + "data": { + "new_field": "default_value", + "l1_field": "added_in_l1", + "l2_field": "added_in_l2", + "schema_version": "2022-12-04 13:00:00", + } + } + result = l2_migrator.migrate(entry) + assert result == expected + + +def test_no_migration_needed(l2_migrator): + entry = { + "data": { + "new_field": "default_value", + "l1_field": "added_in_l1", + "l2_field": "added_in_l2", + "schema_version": "2022-12-04 13:00:00", + } + } + result = l2_migrator.migrate(entry) + assert result == entry + + +def test_invalid_data_format(l2_migrator): + entry = { + "data": "invalid_data_format", + } + with pytest.raises(ValueError, match="Invalid entry: 'data' should be a dict"): + l2_migrator.migrate(entry) + + +def test_missing_data_key(l2_migrator): + entry = {} + with pytest.raises(ValueError, match="Invalid entry: 'data' should be a dict"): + l2_migrator.migrate(entry) + + +def test_cyclic_migration(l3_migrator): + entry = { + "data": { + "old_field": "value1", + "schema_version": "1", + } + } + with pytest.raises(ValueError, match="Double migration detected"): + l3_migrator.migrate(entry) + + +def test_migration_failure(l3_nonstrict_migrator): + entry = { + "data": { + "old_field": "value1", + "schema_version": "1", + } + } + original_entry = deepcopy(entry) + + result = l3_nonstrict_migrator.migrate(entry) + assert result == original_entry + + +@pytest.mark.parametrize( + "migration_version", + ( + "some string", + "2022-13-03 23:00:00", + "2022-12-40 23:00:00", + "2022-12-03 33:00:00", + ), +) +def test_wrong_migration_version(migration_version): + with pytest.raises(ValueError): + Migration( + migration_version, + "Broken version migration", + lambda x: x, + )