From 0862da9a73c0ae5e6d50a1315a0676e30fd06a6e Mon Sep 17 00:00:00 2001 From: Kai Siren Date: Thu, 21 Nov 2024 10:40:58 -0800 Subject: [PATCH 1/8] Run analytics migrations like the API does them --- .github/workflows/ci-analytics.yml | 4 ++-- analytics/Makefile | 6 +++--- analytics/pyproject.toml | 3 ++- analytics/src/analytics/cli.py | 14 +++++--------- .../src/analytics/integrations/etldb/__init__.py | 4 ++-- analytics/src/analytics/integrations/etldb/main.py | 2 +- infra/analytics/app-config/outputs.tf | 6 +----- 7 files changed, 16 insertions(+), 23 deletions(-) diff --git a/.github/workflows/ci-analytics.yml b/.github/workflows/ci-analytics.yml index 6256cc074..2214079a5 100644 --- a/.github/workflows/ci-analytics.yml +++ b/.github/workflows/ci-analytics.yml @@ -39,8 +39,8 @@ jobs: - name: Run linting run: make lint - - name: Run database initialization - run: docker compose down --volumes && make init-db + - name: Run database migrations + run: docker compose down --volumes && make db-migrate - name: Run tests run: make test-audit diff --git a/analytics/Makefile b/analytics/Makefile index e6b5d85e3..cef68935f 100644 --- a/analytics/Makefile +++ b/analytics/Makefile @@ -144,10 +144,10 @@ lint: ## runs code quality checks # Data Commands # ################# -init-db: - @echo "=> Initializing the database schema" +db-migrate: + @echo "=> Migrating the database schema" @echo "=====================================================" - $(POETRY) analytics etl initialize_database + $(POETRY) analytics etl db_migrate @echo "=====================================================" gh-transform-and-load: diff --git a/analytics/pyproject.toml b/analytics/pyproject.toml index 32ad27631..236c350e6 100644 --- a/analytics/pyproject.toml +++ b/analytics/pyproject.toml @@ -8,11 +8,12 @@ version = "0.1.0" [tool.poetry.scripts] analytics = "analytics.cli:app" +db-migrate = "src.analytics.cli:db_migrate" [tool.poetry.dependencies] dynaconf = "^3.2.4" kaleido = "0.2.1" -notebook = "^7.0.0" # Goal is to replace this with another method of presenting charts +notebook = "^7.0.0" # Goal is to replace this with another method of presenting charts pandas = "^2.0.3" pandas-stubs = "^2.0.2.230605" plotly = "^5.15.0" diff --git a/analytics/src/analytics/cli.py b/analytics/src/analytics/cli.py index 44ba05c5f..ab3b1710d 100644 --- a/analytics/src/analytics/cli.py +++ b/analytics/src/analytics/cli.py @@ -264,12 +264,12 @@ def export_json_to_database(delivery_file: Annotated[str, ISSUE_FILE_ARG]) -> No # =========================================================== -@etl_app.command(name="initialize_database") -@ecs_background_task("initialize_database") -def initialize_database() -> None: +@etl_app.command(name="db_migrate") +@ecs_background_task("db_migrate") +def db_migrate() -> None: """Initialize etl database.""" logger.info("initializing database") - etldb.initialize_database() + etldb.db_migrate() logger.info("done") @@ -282,11 +282,7 @@ def transform_and_load( # validate effective date arg try: dateformat = "%Y-%m-%d" - datestamp = ( - datetime.strptime(effective_date, dateformat) - .astimezone() - .strftime(dateformat) - ) + datestamp = datetime.strptime(effective_date, dateformat).astimezone().strftime(dateformat) print(f"running transform and load with effective date {datestamp}") except ValueError: print("FATAL ERROR: malformed effective date, expected YYYY-MM-DD format") diff --git a/analytics/src/analytics/integrations/etldb/__init__.py b/analytics/src/analytics/integrations/etldb/__init__.py index 3608faff7..528f8553f 100644 --- a/analytics/src/analytics/integrations/etldb/__init__.py +++ b/analytics/src/analytics/integrations/etldb/__init__.py @@ -1,11 +1,11 @@ """Read and write data from/to delivery metrics database.""" __all__ = [ - "initialize_database", + "db_migrate", "sync_data", ] from analytics.integrations.etldb.main import ( - initialize_database, + db_migrate, sync_data, ) diff --git a/analytics/src/analytics/integrations/etldb/main.py b/analytics/src/analytics/integrations/etldb/main.py index ea9854d8d..81bfa5840 100644 --- a/analytics/src/analytics/integrations/etldb/main.py +++ b/analytics/src/analytics/integrations/etldb/main.py @@ -18,7 +18,7 @@ VERBOSE = False -def initialize_database() -> None: +def db_migrate() -> None: """ Create and/or update an etl database by applying a sequential set of migration scripts. diff --git a/infra/analytics/app-config/outputs.tf b/infra/analytics/app-config/outputs.tf index 398ba3cfb..afc053945 100644 --- a/infra/analytics/app-config/outputs.tf +++ b/infra/analytics/app-config/outputs.tf @@ -18,10 +18,6 @@ output "environment_configs" { value = local.environment_configs } -# This variable is slightly misnamed. It should really be called "has_migrations". -# It controls whether or not the `run-database-migrations.sh` script tries to run database -# migrations. The entire analytics application is going to have its schema controlled -# via ETL jobs, so we don't need to run migrations in the same way as the API. output "has_database" { - value = false + value = true } From 7002907fd3dd56d89f2d53bb305c38692e5c02c3 Mon Sep 17 00:00:00 2001 From: Kai Siren Date: Thu, 21 Nov 2024 10:43:45 -0800 Subject: [PATCH 2/8] run a migration actually --- .github/workflows/cd-analytics.yml | 2 ++ Makefile | 5 ++++- 2 files changed, 6 insertions(+), 1 deletion(-) diff --git a/.github/workflows/cd-analytics.yml b/.github/workflows/cd-analytics.yml index 5dfec8175..5f570e3b3 100644 --- a/.github/workflows/cd-analytics.yml +++ b/.github/workflows/cd-analytics.yml @@ -5,8 +5,10 @@ on: push: branches: - "main" + - "kai/analytics-migrations" paths: - "analytics/**" + - "Makefile" release: types: [published] workflow_dispatch: diff --git a/Makefile b/Makefile index 060d3a763..afecc8595 100644 --- a/Makefile +++ b/Makefile @@ -206,7 +206,7 @@ release-deploy: ## Deploy release to $APP_NAME's web service in $ENVIRONMENT metabase-deploy: ## Deploy metabase to $APP_NAME's web service in $ENVIRONMENT @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @:$(call check_defined, ENVIRONMENT, the name of the application environment e.g. "prod" or "dev") - ./bin/deploy-metabase.sh $(APP_NAME) $(IMAGE_TAG) $(ENVIRONMENT) + ./bin/deploy-metabase.sh $(APP_NAME) $(IMAGE_TAG) $(ENVIRONMENT) release-image-name: ## Prints the image name of the release image @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @@ -221,3 +221,6 @@ release-image-tag: ## Prints the image tag of the release image help: ## Prints the help documentation and info about each command @grep -E '^[/a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' + + +# RUN A MIGRATION From 3572e6a39b809785d088442e819d5b4428680413 Mon Sep 17 00:00:00 2001 From: Kai Siren Date: Thu, 21 Nov 2024 10:48:45 -0800 Subject: [PATCH 3/8] make format --- analytics/src/analytics/cli.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/analytics/src/analytics/cli.py b/analytics/src/analytics/cli.py index ab3b1710d..7fcf1f1b9 100644 --- a/analytics/src/analytics/cli.py +++ b/analytics/src/analytics/cli.py @@ -282,7 +282,11 @@ def transform_and_load( # validate effective date arg try: dateformat = "%Y-%m-%d" - datestamp = datetime.strptime(effective_date, dateformat).astimezone().strftime(dateformat) + datestamp = ( + datetime.strptime(effective_date, dateformat) + .astimezone() + .strftime(dateformat) + ) print(f"running transform and load with effective date {datestamp}") except ValueError: print("FATAL ERROR: malformed effective date, expected YYYY-MM-DD format") From 390a4f6d152f93df1de08ffe9150c3ad3925ead0 Mon Sep 17 00:00:00 2001 From: Kai Siren Date: Thu, 21 Nov 2024 11:18:18 -0800 Subject: [PATCH 4/8] update command --- analytics/tests/test_cli.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/analytics/tests/test_cli.py b/analytics/tests/test_cli.py index 05e5ada08..0ad3ae54e 100644 --- a/analytics/tests/test_cli.py +++ b/analytics/tests/test_cli.py @@ -285,7 +285,7 @@ def test_init_db(self): # setup - create command command = [ "etl", - "initialize_database", + "db_migrate", ] # execution result = runner.invoke(app, command) @@ -311,10 +311,7 @@ def test_transform_and_load_with_valid_parameters(self): print(result.stdout) # validation - check there wasn't an error assert result.exit_code == 0 - assert ( - f"running transform and load with effective date {self.EFFECTIVE_DATE}" - in result.stdout - ) + assert f"running transform and load with effective date {self.EFFECTIVE_DATE}" in result.stdout assert "project row(s) processed: 2" in result.stdout assert "quad row(s) processed: 1" in result.stdout assert "deliverable row(s) processed: 4" in result.stdout From f4f5c4829a2bf28dddb8838c8318652f5f72399e Mon Sep 17 00:00:00 2001 From: Kai Siren Date: Thu, 21 Nov 2024 11:22:38 -0800 Subject: [PATCH 5/8] remove infra, fix format --- analytics/tests/test_cli.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/analytics/tests/test_cli.py b/analytics/tests/test_cli.py index 0ad3ae54e..28f299bf8 100644 --- a/analytics/tests/test_cli.py +++ b/analytics/tests/test_cli.py @@ -311,7 +311,10 @@ def test_transform_and_load_with_valid_parameters(self): print(result.stdout) # validation - check there wasn't an error assert result.exit_code == 0 - assert f"running transform and load with effective date {self.EFFECTIVE_DATE}" in result.stdout + assert ( + f"running transform and load with effective date {self.EFFECTIVE_DATE}" + in result.stdout + ) assert "project row(s) processed: 2" in result.stdout assert "quad row(s) processed: 1" in result.stdout assert "deliverable row(s) processed: 4" in result.stdout From 9c441d6f52a754e150b31fcf3a9356a01c0ab4e2 Mon Sep 17 00:00:00 2001 From: Kai Siren Date: Thu, 21 Nov 2024 11:24:08 -0800 Subject: [PATCH 6/8] remove infra --- Makefile | 2 ++ infra/analytics/app-config/env-config/scheduled_jobs.tf | 5 ----- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/Makefile b/Makefile index afecc8595..58d31a166 100644 --- a/Makefile +++ b/Makefile @@ -224,3 +224,5 @@ help: ## Prints the help documentation and info about each command # RUN A MIGRATION + + diff --git a/infra/analytics/app-config/env-config/scheduled_jobs.tf b/infra/analytics/app-config/env-config/scheduled_jobs.tf index 80ac5a7a9..b6d098ffa 100644 --- a/infra/analytics/app-config/env-config/scheduled_jobs.tf +++ b/infra/analytics/app-config/env-config/scheduled_jobs.tf @@ -11,10 +11,5 @@ locals { schedule_expression = "rate(1 days)" state = "ENABLED" } - init-etldb = { - task_command = ["make", "init-db"] - schedule_expression = "rate(1 days)" - state = "ENABLED" - } } } From ed170034395aeaef59de3fd8e9e8523eabfb4cd9 Mon Sep 17 00:00:00 2001 From: Kai Siren Date: Thu, 21 Nov 2024 12:35:48 -0800 Subject: [PATCH 7/8] git restore --- .github/workflows/cd-analytics.yml | 2 -- Makefile | 7 +------ 2 files changed, 1 insertion(+), 8 deletions(-) diff --git a/.github/workflows/cd-analytics.yml b/.github/workflows/cd-analytics.yml index 5f570e3b3..5dfec8175 100644 --- a/.github/workflows/cd-analytics.yml +++ b/.github/workflows/cd-analytics.yml @@ -5,10 +5,8 @@ on: push: branches: - "main" - - "kai/analytics-migrations" paths: - "analytics/**" - - "Makefile" release: types: [published] workflow_dispatch: diff --git a/Makefile b/Makefile index 58d31a166..060d3a763 100644 --- a/Makefile +++ b/Makefile @@ -206,7 +206,7 @@ release-deploy: ## Deploy release to $APP_NAME's web service in $ENVIRONMENT metabase-deploy: ## Deploy metabase to $APP_NAME's web service in $ENVIRONMENT @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @:$(call check_defined, ENVIRONMENT, the name of the application environment e.g. "prod" or "dev") - ./bin/deploy-metabase.sh $(APP_NAME) $(IMAGE_TAG) $(ENVIRONMENT) + ./bin/deploy-metabase.sh $(APP_NAME) $(IMAGE_TAG) $(ENVIRONMENT) release-image-name: ## Prints the image name of the release image @:$(call check_defined, APP_NAME, the name of subdirectory of /infra that holds the application's infrastructure code) @@ -221,8 +221,3 @@ release-image-tag: ## Prints the image tag of the release image help: ## Prints the help documentation and info about each command @grep -E '^[/a-zA-Z0-9_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' - - -# RUN A MIGRATION - - From 84ec4d70482be42b21e9717ccc5d58bc80b59346 Mon Sep 17 00:00:00 2001 From: Kai Siren Date: Thu, 21 Nov 2024 13:53:09 -0800 Subject: [PATCH 8/8] db_migrate => migrate_database --- analytics/src/analytics/cli.py | 4 ++-- analytics/src/analytics/integrations/etldb/__init__.py | 4 ++-- analytics/src/analytics/integrations/etldb/main.py | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) diff --git a/analytics/src/analytics/cli.py b/analytics/src/analytics/cli.py index 7fcf1f1b9..ac360ddc1 100644 --- a/analytics/src/analytics/cli.py +++ b/analytics/src/analytics/cli.py @@ -266,10 +266,10 @@ def export_json_to_database(delivery_file: Annotated[str, ISSUE_FILE_ARG]) -> No @etl_app.command(name="db_migrate") @ecs_background_task("db_migrate") -def db_migrate() -> None: +def migrate_database() -> None: """Initialize etl database.""" logger.info("initializing database") - etldb.db_migrate() + etldb.migrate_database() logger.info("done") diff --git a/analytics/src/analytics/integrations/etldb/__init__.py b/analytics/src/analytics/integrations/etldb/__init__.py index 528f8553f..bac758406 100644 --- a/analytics/src/analytics/integrations/etldb/__init__.py +++ b/analytics/src/analytics/integrations/etldb/__init__.py @@ -1,11 +1,11 @@ """Read and write data from/to delivery metrics database.""" __all__ = [ - "db_migrate", + "migrate_database", "sync_data", ] from analytics.integrations.etldb.main import ( - db_migrate, + migrate_database, sync_data, ) diff --git a/analytics/src/analytics/integrations/etldb/main.py b/analytics/src/analytics/integrations/etldb/main.py index 81bfa5840..1e36a7c5a 100644 --- a/analytics/src/analytics/integrations/etldb/main.py +++ b/analytics/src/analytics/integrations/etldb/main.py @@ -18,7 +18,7 @@ VERBOSE = False -def db_migrate() -> None: +def migrate_database() -> None: """ Create and/or update an etl database by applying a sequential set of migration scripts.