Skip to content

Commit

Permalink
Improve compression datatype handling
Browse files Browse the repository at this point in the history
Fall back to btree operator input type when it is binary compatible with
the column type and no operator for column type could be found. This
should improve performance when using column types like char or varchar
instead of text.
  • Loading branch information
svenklemm committed Sep 18, 2023
1 parent 24e4972 commit a7d742b
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 2 deletions.
2 changes: 2 additions & 0 deletions .unreleased/PR_6081
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
Fixes: #6081 Improve compressed DML datatype handling

15 changes: 13 additions & 2 deletions tsl/src/compression/compression.c
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <nodes/pg_list.h>
#include <nodes/print.h>
#include <parser/parsetree.h>
#include <parser/parse_coerce.h>
#include <storage/lmgr.h>
#include <storage/predicate.h>
#include <utils/builtins.h>
Expand Down Expand Up @@ -1962,8 +1963,18 @@ create_segment_filter_scankey(RowDecompressor *decompressor, char *segment_filte
elog(ERROR, "no btree opfamily for type \"%s\"", format_type_be(atttypid));

Oid opr = get_opfamily_member(tce->btree_opf, atttypid, atttypid, strategy);
Assert(OidIsValid(opr));
/* We should never end up here but: no operator, no optimization */

/*
* Fall back to btree operator input type when it is binary compatible with
* the column type and no operator for column type could be found.
*/
if (!OidIsValid(opr) && IsBinaryCoercible(atttypid, tce->btree_opintype))
{
opr =

Check warning on line 1973 in tsl/src/compression/compression.c

View check run for this annotation

Codecov / codecov/patch

tsl/src/compression/compression.c#L1973

Added line #L1973 was not covered by tests
get_opfamily_member(tce->btree_opf, tce->btree_opintype, tce->btree_opintype, strategy);
}

Check warning on line 1975 in tsl/src/compression/compression.c

View check run for this annotation

Codecov / codecov/patch

tsl/src/compression/compression.c#L1975

Added line #L1975 was not covered by tests

/* No operator could be found so we can't create the scankey. */
if (!OidIsValid(opr))
return num_scankeys;

Expand Down
64 changes: 64 additions & 0 deletions tsl/test/shared/expected/compression_dml.out
Original file line number Diff line number Diff line change
Expand Up @@ -104,3 +104,67 @@ QUERY PLAN

DROP TABLE mytab CASCADE;
NOTICE: drop cascades to table _timescaledb_internal.compress_hyper_X_X_chunk
-- test varchar segmentby
CREATE TABLE comp_seg_varchar (
time timestamptz NOT NULL,
source_id varchar(64) NOT NULL,
label varchar NOT NULL,
data jsonb
);
SELECT table_name FROM create_hypertable('comp_seg_varchar', 'time');
WARNING: column type "character varying" used for "source_id" does not follow best practices
WARNING: column type "character varying" used for "label" does not follow best practices
table_name
comp_seg_varchar
(1 row)

CREATE UNIQUE INDEX ON comp_seg_varchar(source_id, label, "time" DESC);
ALTER TABLE comp_seg_varchar SET(timescaledb.compress, timescaledb.compress_segmentby = 'source_id, label', timescaledb.compress_orderby = 'time');
INSERT INTO comp_seg_varchar
SELECT time, source_id, label, '{}' AS data
FROM
generate_series('1990-01-01'::timestamptz, '1990-01-10'::timestamptz, INTERVAL '1 day') AS g1(time),
generate_series(1, 3, 1 ) AS g2(source_id),
generate_series(1, 3, 1 ) AS g3(label);
SELECT compress_chunk(c) FROM show_chunks('comp_seg_varchar') c;
compress_chunk
_timescaledb_internal._hyper_X_X_chunk
_timescaledb_internal._hyper_X_X_chunk
(2 rows)

-- all tuples should come from compressed chunks
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;
QUERY PLAN
Append (actual rows=90 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=27 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=63 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
(5 rows)

INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', 'test', 'test', '{}'::jsonb)
ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}';
-- no tuples should be moved into uncompressed
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;
QUERY PLAN
Append (actual rows=91 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=27 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
-> Seq Scan on _hyper_X_X_chunk (actual rows=1 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=63 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
(6 rows)

INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', '1', '2', '{}'::jsonb)
ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}';
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;
QUERY PLAN
Append (actual rows=92 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=24 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=8 loops=1)
-> Seq Scan on _hyper_X_X_chunk (actual rows=5 loops=1)
-> Custom Scan (DecompressChunk) on _hyper_X_X_chunk (actual rows=63 loops=1)
-> Seq Scan on compress_hyper_X_X_chunk (actual rows=9 loops=1)
(6 rows)

DROP TABLE comp_seg_varchar;
39 changes: 39 additions & 0 deletions tsl/test/shared/sql/compression_dml.sql
Original file line number Diff line number Diff line change
Expand Up @@ -86,3 +86,42 @@ INSERT INTO mytab SELECT '2022-10-07 05:30:10+05:30'::timestamp with time zone,
EXPLAIN (costs off) SELECT * FROM :chunk_table;
DROP TABLE mytab CASCADE;

-- test varchar segmentby
CREATE TABLE comp_seg_varchar (
time timestamptz NOT NULL,
source_id varchar(64) NOT NULL,
label varchar NOT NULL,
data jsonb
);

SELECT table_name FROM create_hypertable('comp_seg_varchar', 'time');

CREATE UNIQUE INDEX ON comp_seg_varchar(source_id, label, "time" DESC);

ALTER TABLE comp_seg_varchar SET(timescaledb.compress, timescaledb.compress_segmentby = 'source_id, label', timescaledb.compress_orderby = 'time');

INSERT INTO comp_seg_varchar
SELECT time, source_id, label, '{}' AS data
FROM
generate_series('1990-01-01'::timestamptz, '1990-01-10'::timestamptz, INTERVAL '1 day') AS g1(time),
generate_series(1, 3, 1 ) AS g2(source_id),
generate_series(1, 3, 1 ) AS g3(label);

SELECT compress_chunk(c) FROM show_chunks('comp_seg_varchar') c;


-- all tuples should come from compressed chunks
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;

INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', 'test', 'test', '{}'::jsonb)
ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}';

-- no tuples should be moved into uncompressed
EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;

INSERT INTO comp_seg_varchar(time, source_id, label, data) VALUES ('1990-01-02 00:00:00+00', '1', '2', '{}'::jsonb)
ON CONFLICT (source_id, label, time) DO UPDATE SET data = '{"update": true}';

EXPLAIN (analyze,costs off, timing off, summary off) SELECT * FROM comp_seg_varchar;

DROP TABLE comp_seg_varchar;

0 comments on commit a7d742b

Please sign in to comment.