From cc037d9f670a155ce4463d34fcbbc345db7ad911 Mon Sep 17 00:00:00 2001 From: Rory Sawyer Date: Wed, 18 Oct 2023 14:42:58 -0400 Subject: [PATCH 1/2] feat: create fact_enrollments as MV --- .../alembic/versions/0024_fact_enrollments.py | 80 +++++++++++++++++++ 1 file changed, 80 insertions(+) create mode 100644 tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0024_fact_enrollments.py diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0024_fact_enrollments.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0024_fact_enrollments.py new file mode 100644 index 000000000..8dd6e916e --- /dev/null +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0024_fact_enrollments.py @@ -0,0 +1,80 @@ +""" +create a materialized view to populate a denormalized fact table of enrollment events +""" + +from alembic import op + +revision = "0024" +down_revision = "0023" +branch_labels = None +depends_on = None +on_cluster = ( + " ON CLUSTER '{{CLICKHOUSE_CLUSTER_NAME}}' " + if "{{CLICKHOUSE_CLUSTER_NAME}}" + else "" +) +engine = "ReplicatedMergeTree" if "{{CLICKHOUSE_CLUSTER_NAME}}" else "MergeTree" + + +def upgrade(): + op.execute( + f""" + create table if not exists {{ ASPECTS_XAPI_DATABASE }}.fact_enrollments ( + emission_time DateTime, + org String, + course_key String, + course_name String, + course_run String, + actor_id String, + enrollment_mode LowCardinality(String), + enrollment_status String + ) ENGINE = {engine} + PRIMARY KEY (org, course_key) + ORDER BY (org, course_key, actor_id, enrollment_mode, enrollment_status, emission_time) + """ + ) + + op.execute( + f""" + CREATE MATERIALIZED VIEW IF NOT EXISTS {{ ASPECTS_XAPI_DATABASE }}.fact_enrollments_mv + {on_cluster} + TO {{ ASPECTS_XAPI_DATABASE }}.fact_enrollments AS + select + enrollments.emission_time as emission_time, + enrollments.org as org, + enrollments.course_key as course_key, + courses.course_name as course_name, + courses.course_run as course_run, + enrollments.actor_id as actor_id, + enrollments.enrollment_mode as enrollment_mode, + splitByString('/', enrollments.verb_id)[-1] as enrollment_status + from + {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_ENROLLMENT_EVENTS_TABLE }} enrollments + join {{ ASPECTS_EVENT_SINK_DATABASE }}.course_names courses + on enrollments.course_key = courses.course_key + """ + ) + + op.execute( + """ + insert into {{ ASPECTS_XAPI_DATABASE }}.fact_enrollments + select + enrollments.emission_time as emission_time, + enrollments.org as org, + enrollments.course_key as course_key, + courses.course_name as course_name, + courses.course_run as course_run, + enrollments.actor_id as actor_id, + enrollments.enrollment_mode as enrollment_mode, + splitByString('/', enrollments.verb_id)[-1] as enrollment_status + from + {{ ASPECTS_XAPI_DATABASE }}.{{ ASPECTS_ENROLLMENT_EVENTS_TABLE }} enrollments + join {{ ASPECTS_EVENT_SINK_DATABASE }}.course_names courses + on enrollments.course_key = courses.course_key + """ + ) + + +def downgrade(): + op.execute("DROP TABLE IF EXISTS {{ ASPECTS_XAPI_DATABASE }}.fact_enrollments") + op.execute("DROP VIEW IF EXISTS {{ ASPECTS_XAPI_DATABASE }}.fact_enrollments_mv") From c9a6ef58ddf3454dbe2cad85e9ae0c0bdbd1366d Mon Sep 17 00:00:00 2001 From: Rory Sawyer Date: Fri, 20 Oct 2023 16:04:11 -0400 Subject: [PATCH 2/2] refactor: make enrollment_status a low cardinality field --- .../migrations/alembic/versions/0024_fact_enrollments.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0024_fact_enrollments.py b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0024_fact_enrollments.py index 8dd6e916e..08cbb7151 100644 --- a/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0024_fact_enrollments.py +++ b/tutoraspects/templates/aspects/apps/aspects/migrations/alembic/versions/0024_fact_enrollments.py @@ -27,7 +27,7 @@ def upgrade(): course_run String, actor_id String, enrollment_mode LowCardinality(String), - enrollment_status String + enrollment_status LowCardinality(String) ) ENGINE = {engine} PRIMARY KEY (org, course_key) ORDER BY (org, course_key, actor_id, enrollment_mode, enrollment_status, emission_time)