From 2e794a8234390f9cca92faf473e646fadd83a429 Mon Sep 17 00:00:00 2001 From: Sven Klemm Date: Sat, 16 Sep 2023 12:33:49 +0200 Subject: [PATCH] Improve compression datatype handling Fall back to btree operator input type when it is binary compatible with the column type and no operator for column type could be found. This should improve performance when using column types like char or varchar instead of text. --- tsl/src/compression/compression.c | 15 ++++- tsl/test/shared/expected/compression_dml.out | 64 ++++++++++++++++++++ tsl/test/shared/sql/compression_dml.sql | 39 ++++++++++++ 3 files changed, 116 insertions(+), 2 deletions(-) diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c index 6d6c0542d51..97fdfbcde36 100644 --- a/tsl/src/compression/compression.c +++ b/tsl/src/compression/compression.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include @@ -1962,8 +1963,18 @@ create_segment_filter_scankey(RowDecompressor *decompressor, char *segment_filte elog(ERROR, "no btree opfamily for type \"%s\"", format_type_be(atttypid)); Oid opr = get_opfamily_member(tce->btree_opf, atttypid, atttypid, strategy); - Assert(OidIsValid(opr)); - /* We should never end up here but: no operator, no optimization */ + + /* + * Fall back to btree operator input type when it is binary compatible with + * the column type and no operator for column type could be found. + */ + if (!OidIsValid(opr) && IsBinaryCoercible(atttypid, tce->btree_opintype)) + { + opr = + get_opfamily_member(tce->btree_opf, tce->btree_opintype, tce->btree_opintype, strategy); + } + + /* No operator could be found so we can't create the scankey. */ if (!OidIsValid(opr)) return num_scankeys; diff --git a/tsl/test/shared/expected/compression_dml.out b/tsl/test/shared/expected/compression_dml.out index 000d2428354..2a7da821355 100644 --- a/tsl/test/shared/expected/compression_dml.out +++ b/tsl/test/shared/expected/compression_dml.out @@ -104,3 +104,67 @@ QUERY PLAN DROP TABLE mytab CASCADE; NOTICE: drop cascades to table _timescaledb_internal.compress_hyper_X_X_chunk +-- test varchar segmentby +CREATE TABLE comp_seg_varchar ( + time timestamptz NOT NULL, + source_id varchar(64) NOT NULL, + label varchar NOT NULL, + data jsonb +); +SELECT table_name FROM create_hypertable('comp_seg_varchar', 'time'); +WARNING: column type "character varying" used for "source_id" does not follow best practices +WARNING: column type "character varying" used for "label" does not follow best practices + table_name + comp_seg_varchar +(1 row) + +CREATE UNIQUE INDEX ON comp_seg_varchar(source_id, label, "time" DESC); +ALTER TABLE comp_seg_varchar SET(timescaledb.compress, timescaledb.compress_segmentby = 'source_id, label', timescaledb.compress_orderby = 'time'); +INSERT INTO comp_seg_varchar +SELECT time, source_id, label, '{}' AS data +FROM +generate_series('1990-01-01'::timestamptz, '1990-01-10'::timestamptz, INTERVAL '1 day') AS g1(time), +generate_series(1, 3, 1 ) AS g2(source_id), +generate_series(1, 3, 1 ) AS g3(label); +SELECT compress_chunk(c) FROM show_chunks('comp_seg_varchar') c; + compress_chunk + _timescaledb_internal._hyper_X_X_chunk + _timescaledb_internal._hyper_X_X_chunk +(2 rows) + +-- all tuples should come from compressed chunks +EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar; +QUERY PLAN + Append (actual rows=90 loops=1) + -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=27 loops=1) + -> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1) + -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=63 loops=1) + -> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1) +(5 rows) + +INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', 'test', 'test', '{}'::jsonb) +ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}'; +-- no tuples should be moved into uncompressed +EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar; +QUERY PLAN + Append (actual rows=91 loops=1) + -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=27 loops=1) + -> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1) + -> Seq Scan on _hyper_X_X_chunk (actual rows=1 loops=1) + -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=63 loops=1) + -> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1) +(6 rows) + +INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', '1', '2', '{}'::jsonb) +ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}'; +EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar; +QUERY PLAN + Append (actual rows=92 loops=1) + -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=24 loops=1) + -> Seq Scan on compress_hyper_X_X_chunk (actual rows=8 loops=1) + -> Seq Scan on _hyper_X_X_chunk (actual rows=5 loops=1) + -> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=63 loops=1) + -> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1) +(6 rows) + +DROP TABLE comp_seg_varchar; diff --git a/tsl/test/shared/sql/compression_dml.sql b/tsl/test/shared/sql/compression_dml.sql index 14f5ee67983..eeb2b83c0ea 100644 --- a/tsl/test/shared/sql/compression_dml.sql +++ b/tsl/test/shared/sql/compression_dml.sql @@ -86,3 +86,42 @@ INSERT INTO mytab SELECT '2022-10-07 05:30:10+05:30'::timestamp with time zone, EXPLAIN (costs off) SELECT * FROM :chunk_table; DROP TABLE mytab CASCADE; +-- test varchar segmentby +CREATE TABLE comp_seg_varchar ( + time timestamptz NOT NULL, + source_id varchar(64) NOT NULL, + label varchar NOT NULL, + data jsonb +); + +SELECT table_name FROM create_hypertable('comp_seg_varchar', 'time'); + +CREATE UNIQUE INDEX ON comp_seg_varchar(source_id, label, "time" DESC); + +ALTER TABLE comp_seg_varchar SET(timescaledb.compress, timescaledb.compress_segmentby = 'source_id, label', timescaledb.compress_orderby = 'time'); + +INSERT INTO comp_seg_varchar +SELECT time, source_id, label, '{}' AS data +FROM +generate_series('1990-01-01'::timestamptz, '1990-01-10'::timestamptz, INTERVAL '1 day') AS g1(time), +generate_series(1, 3, 1 ) AS g2(source_id), +generate_series(1, 3, 1 ) AS g3(label); + +SELECT compress_chunk(c) FROM show_chunks('comp_seg_varchar') c; + + +-- all tuples should come from compressed chunks +EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar; + +INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', 'test', 'test', '{}'::jsonb) +ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}'; + +-- no tuples should be moved into uncompressed +EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar; + +INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', '1', '2', '{}'::jsonb) +ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}'; + +EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar; + +DROP TABLE comp_seg_varchar;