Skip to content

Commit

Permalink
Make CAggs materialized only by default
Browse files Browse the repository at this point in the history
Historically creating a Continuous Aggregate make it realtime by default
but it confuse users specially when using `WITH NO DATA` option. Also is
well known that realtime Continuous Aggregates can potentially lead to
issues with Hierarchical and Data Tiering.

Improved the UX by making Continuous Aggregates non-realtime by default.
  • Loading branch information
fabriziomello committed Sep 26, 2023
1 parent 0ae6f95 commit 32a695e
Show file tree
Hide file tree
Showing 70 changed files with 1,173 additions and 883 deletions.
1 change: 1 addition & 0 deletions .unreleased/feature_6077
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Implements: #6077 Make Continous Aggregates materialized only (non-realtime) by default
2 changes: 1 addition & 1 deletion src/ts_catalog/continuous_agg.c
Original file line number Diff line number Diff line change
Expand Up @@ -58,7 +58,7 @@ static const WithClauseDefinition continuous_aggregate_with_clause_def[] = {
[ContinuousViewOptionMaterializedOnly] = {
.arg_name = "materialized_only",
.type_id = BOOLOID,
.default_val = (Datum)false,
.default_val = (Datum)true,
},
[ContinuousViewOptionCompress] = {
.arg_name = "compress",
Expand Down
2 changes: 1 addition & 1 deletion tsl/test/expected/bgw_custom.out
Original file line number Diff line number Diff line change
Expand Up @@ -514,7 +514,7 @@ SELECT decompress_chunk(c) FROM show_chunks('conditions') c;

-- TEST Continuous Aggregate job
CREATE MATERIALIZED VIEW conditions_summary_daily
WITH (timescaledb.continuous) AS
WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT location,
time_bucket(INTERVAL '1 day', time) AS bucket,
AVG(temperature),
Expand Down
160 changes: 139 additions & 21 deletions tsl/test/expected/cagg_ddl.out
Original file line number Diff line number Diff line change
Expand Up @@ -1097,65 +1097,65 @@ FROM (
(3 rows)

-- width expression for int2 hypertables
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(1::smallint, time)
FROM metrics_int2
GROUP BY 1;
psql:include/cagg_ddl_common.sql:750: NOTICE: continuous aggregate "width_expr" is already up-to-date
DROP MATERIALIZED VIEW width_expr;
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(1::smallint + 2::smallint, time)
FROM metrics_int2
GROUP BY 1;
psql:include/cagg_ddl_common.sql:757: NOTICE: continuous aggregate "width_expr" is already up-to-date
DROP MATERIALIZED VIEW width_expr;
-- width expression for int4 hypertables
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(1, time)
FROM metrics_int4
GROUP BY 1;
psql:include/cagg_ddl_common.sql:765: NOTICE: continuous aggregate "width_expr" is already up-to-date
DROP MATERIALIZED VIEW width_expr;
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(1 + 2, time)
FROM metrics_int4
GROUP BY 1;
psql:include/cagg_ddl_common.sql:772: NOTICE: continuous aggregate "width_expr" is already up-to-date
DROP MATERIALIZED VIEW width_expr;
-- width expression for int8 hypertables
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(1, time)
FROM metrics_int8
GROUP BY 1;
psql:include/cagg_ddl_common.sql:780: NOTICE: continuous aggregate "width_expr" is already up-to-date
DROP MATERIALIZED VIEW width_expr;
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(1 + 2, time)
FROM metrics_int8
GROUP BY 1;
psql:include/cagg_ddl_common.sql:787: NOTICE: continuous aggregate "width_expr" is already up-to-date
DROP MATERIALIZED VIEW width_expr;
\set ON_ERROR_STOP 0
-- non-immutable expresions should be rejected
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(extract(year FROM now())::smallint, time)
FROM metrics_int2
GROUP BY 1;
psql:include/cagg_ddl_common.sql:796: ERROR: only immutable expressions allowed in time bucket function
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(extract(year FROM now())::int, time)
FROM metrics_int4
GROUP BY 1;
psql:include/cagg_ddl_common.sql:801: ERROR: only immutable expressions allowed in time bucket function
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW width_expr WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(extract(year FROM now())::int, time)
FROM metrics_int8
GROUP BY 1;
psql:include/cagg_ddl_common.sql:806: ERROR: only immutable expressions allowed in time bucket function
\set ON_ERROR_STOP 1
-- Test various ALTER MATERIALIZED VIEW statements.
SET ROLE :ROLE_DEFAULT_PERM_USER;
CREATE MATERIALIZED VIEW owner_check WITH (timescaledb.continuous) AS
CREATE MATERIALIZED VIEW owner_check WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(1 + 2, time)
FROM metrics_int8
GROUP BY 1
Expand Down Expand Up @@ -1293,7 +1293,7 @@ SELECT create_hypertable('test_schema.telemetry_raw', 'ts');

\endif
CREATE MATERIALIZED VIEW test_schema.telemetry_1s
WITH (timescaledb.continuous)
WITH (timescaledb.continuous, timescaledb.materialized_only=false)
AS
SELECT time_bucket(INTERVAL '1s', ts) AS ts_1s,
avg(value)
Expand Down Expand Up @@ -1356,14 +1356,14 @@ SELECT create_hypertable('test_schema.telemetry_raw', 'ts');

\endif
CREATE MATERIALIZED VIEW test_schema.cagg1
WITH (timescaledb.continuous)
WITH (timescaledb.continuous, timescaledb.materialized_only=false)
AS
SELECT time_bucket(INTERVAL '1s', ts) AS ts_1s,
avg(value)
FROM test_schema.telemetry_raw
GROUP BY ts_1s WITH NO DATA;
CREATE MATERIALIZED VIEW test_schema.cagg2
WITH (timescaledb.continuous)
WITH (timescaledb.continuous, timescaledb.materialized_only=false)
AS
SELECT time_bucket(INTERVAL '1s', ts) AS ts_1s,
avg(value)
Expand Down Expand Up @@ -1604,13 +1604,13 @@ SELECT table_name FROM create_hypertable('i3696','time');
(1 row)

\endif
CREATE MATERIALIZED VIEW i3696_cagg1 WITH (timescaledb.continuous)
CREATE MATERIALIZED VIEW i3696_cagg1 WITH (timescaledb.continuous, timescaledb.materialized_only=false)
AS
SELECT search_query,count(search_query) as count, sum(cnt), time_bucket(INTERVAL '1 minute', time) AS bucket
FROM i3696 GROUP BY cnt +cnt2 , bucket, search_query;
psql:include/cagg_ddl_common.sql:1108: NOTICE: continuous aggregate "i3696_cagg1" is already up-to-date
ALTER MATERIALIZED VIEW i3696_cagg1 SET (timescaledb.materialized_only = 'true');
CREATE MATERIALIZED VIEW i3696_cagg2 WITH (timescaledb.continuous)
CREATE MATERIALIZED VIEW i3696_cagg2 WITH (timescaledb.continuous, timescaledb.materialized_only=false)
AS
SELECT search_query,count(search_query) as count, sum(cnt), time_bucket(INTERVAL '1 minute', time) AS bucket
FROM i3696 GROUP BY cnt + cnt2, bucket, search_query
Expand All @@ -1630,7 +1630,7 @@ SELECT create_hypertable('test_setting', 'time');
(1 row)

\endif
CREATE MATERIALIZED VIEW test_setting_cagg with (timescaledb.continuous)
CREATE MATERIALIZED VIEW test_setting_cagg with (timescaledb.continuous, timescaledb.materialized_only=false)
AS SELECT time_bucket('1h',time), avg(val), count(*) FROM test_setting GROUP BY 1;
psql:include/cagg_ddl_common.sql:1130: NOTICE: continuous aggregate "test_setting_cagg" is already up-to-date
INSERT INTO test_setting
Expand Down Expand Up @@ -1947,25 +1947,25 @@ SELECT * FROM cashflows;
-- 4. test named bucket width
-- named origin
CREATE MATERIALIZED VIEW cagg_named_origin WITH
(timescaledb.continuous) AS
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket('1h', time, 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
-- named timezone
CREATE MATERIALIZED VIEW cagg_named_tz_origin WITH
(timescaledb.continuous) AS
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket('1h', time, timezone => 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
-- named ts
CREATE MATERIALIZED VIEW cagg_named_ts_tz_origin WITH
(timescaledb.continuous) AS
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket('1h', ts => time, timezone => 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
-- named bucket width
CREATE MATERIALIZED VIEW cagg_named_all WITH
(timescaledb.continuous) AS
(timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(bucket_width => '1h', ts => time, timezone => 'UTC', origin => '2001-01-03 01:23:45') AS bucket,
avg(amount) as avg_amount
FROM transactions GROUP BY 1 WITH NO DATA;
Expand Down Expand Up @@ -2015,7 +2015,7 @@ SELECT * FROM transactions_montly ORDER BY bucket;

-- Check set_chunk_time_interval on continuous aggregate
CREATE MATERIALIZED VIEW cagg_set_chunk_time_interval
WITH (timescaledb.continuous) AS
WITH (timescaledb.continuous, timescaledb.materialized_only=false) AS
SELECT time_bucket(INTERVAL '1 month', time) AS bucket,
SUM(fiat_value),
MAX(fiat_value),
Expand All @@ -2039,3 +2039,121 @@ WHERE d.hypertable_id = ca.mat_hypertable_id;
t
(1 row)

-- Since #6077 CAggs are materialized only by default
DROP TABLE conditions CASCADE;
psql:include/cagg_ddl_common.sql:1365: NOTICE: drop cascades to 3 other objects
psql:include/cagg_ddl_common.sql:1365: NOTICE: drop cascades to 2 other objects
CREATE TABLE conditions (
time TIMESTAMPTZ NOT NULL,
location TEXT NOT NULL,
temperature DOUBLE PRECISION NULL
);
\if :IS_DISTRIBUTED
SELECT create_distributed_hypertable('conditions', 'time', replication_factor => 2);
\else
SELECT create_hypertable('conditions', 'time');
create_hypertable
--------------------------
(54,public,conditions,t)
(1 row)

\endif
INSERT INTO conditions VALUES ( '2018-01-01 09:20:00-08', 'SFO', 55);
INSERT INTO conditions VALUES ( '2018-01-02 09:30:00-08', 'por', 100);
INSERT INTO conditions VALUES ( '2018-01-02 09:20:00-08', 'SFO', 65);
INSERT INTO conditions VALUES ( '2018-01-02 09:10:00-08', 'NYC', 65);
INSERT INTO conditions VALUES ( '2018-11-01 09:20:00-08', 'NYC', 45);
INSERT INTO conditions VALUES ( '2018-11-01 10:40:00-08', 'NYC', 55);
INSERT INTO conditions VALUES ( '2018-11-01 11:50:00-08', 'NYC', 65);
INSERT INTO conditions VALUES ( '2018-11-01 12:10:00-08', 'NYC', 75);
INSERT INTO conditions VALUES ( '2018-11-01 13:10:00-08', 'NYC', 85);
INSERT INTO conditions VALUES ( '2018-11-02 09:20:00-08', 'NYC', 10);
INSERT INTO conditions VALUES ( '2018-11-02 10:30:00-08', 'NYC', 20);
CREATE MATERIALIZED VIEW conditions_daily
WITH (timescaledb.continuous) AS
SELECT location,
time_bucket(INTERVAL '1 day', time) AS bucket,
AVG(temperature)
FROM conditions
GROUP BY location, bucket
WITH NO DATA;
\d+ conditions_daily
View "public.conditions_daily"
Column | Type | Collation | Nullable | Default | Storage | Description
----------+--------------------------+-----------+----------+---------+----------+-------------
location | text | | | | extended |
bucket | timestamp with time zone | | | | plain |
avg | double precision | | | | plain |
View definition:
SELECT _materialized_hypertable_55.location,
_materialized_hypertable_55.bucket,
_materialized_hypertable_55.avg
FROM _timescaledb_internal._materialized_hypertable_55;

-- Should return NO ROWS
SELECT * FROM conditions_daily ORDER BY bucket, avg;
location | bucket | avg
----------+--------+-----
(0 rows)

ALTER MATERIALIZED VIEW conditions_daily SET (timescaledb.materialized_only=false);
\d+ conditions_daily
View "public.conditions_daily"
Column | Type | Collation | Nullable | Default | Storage | Description
----------+--------------------------+-----------+----------+---------+----------+-------------
location | text | | | | extended |
bucket | timestamp with time zone | | | | plain |
avg | double precision | | | | plain |
View definition:
SELECT _materialized_hypertable_55.location,
_materialized_hypertable_55.bucket,
_materialized_hypertable_55.avg
FROM _timescaledb_internal._materialized_hypertable_55
WHERE _materialized_hypertable_55.bucket < COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(55)), '-infinity'::timestamp with time zone)
UNION ALL
SELECT conditions.location,
time_bucket('@ 1 day'::interval, conditions."time") AS bucket,
avg(conditions.temperature) AS avg
FROM conditions
WHERE conditions."time" >= COALESCE(_timescaledb_functions.to_timestamp(_timescaledb_functions.cagg_watermark(55)), '-infinity'::timestamp with time zone)
GROUP BY conditions.location, (time_bucket('@ 1 day'::interval, conditions."time"));

-- Should return ROWS because now it is realtime
SELECT * FROM conditions_daily ORDER BY bucket, avg;
location | bucket | avg
----------+------------------------------+-----
SFO | Sun Dec 31 16:00:00 2017 PST | 55
SFO | Mon Jan 01 16:00:00 2018 PST | 65
NYC | Mon Jan 01 16:00:00 2018 PST | 65
por | Mon Jan 01 16:00:00 2018 PST | 100
NYC | Wed Oct 31 17:00:00 2018 PDT | 65
NYC | Thu Nov 01 17:00:00 2018 PDT | 15
(6 rows)

-- Should return ROWS because we refreshed it
ALTER MATERIALIZED VIEW conditions_daily SET (timescaledb.materialized_only=true);
\d+ conditions_daily
View "public.conditions_daily"
Column | Type | Collation | Nullable | Default | Storage | Description
----------+--------------------------+-----------+----------+---------+----------+-------------
location | text | | | | extended |
bucket | timestamp with time zone | | | | plain |
avg | double precision | | | | plain |
View definition:
SELECT _materialized_hypertable_55.location,
_materialized_hypertable_55.bucket,
_materialized_hypertable_55.avg
FROM _timescaledb_internal._materialized_hypertable_55;

CALL refresh_continuous_aggregate('conditions_daily', NULL, NULL);
SELECT * FROM conditions_daily ORDER BY bucket, avg;
location | bucket | avg
----------+------------------------------+-----
SFO | Sun Dec 31 16:00:00 2017 PST | 55
SFO | Mon Jan 01 16:00:00 2018 PST | 65
NYC | Mon Jan 01 16:00:00 2018 PST | 65
por | Mon Jan 01 16:00:00 2018 PST | 100
NYC | Wed Oct 31 17:00:00 2018 PDT | 65
NYC | Thu Nov 01 17:00:00 2018 PDT | 15
(6 rows)

Loading

0 comments on commit 32a695e

Please sign in to comment.