Avoid decompressing batches using an empty slot

When running COPY command into a compressed hypertable, we could end up using an empty slot for filtering compressed batches. This happens when a previously created copy buffer for a chunk does not contain any new tuples for inserting. The fix is to verify slots before attempting to do anything else.
timescale · Sep 27, 2023 · 1932c02 · 1932c02
1 parent 32a695e
commit 1932c02
Show file tree

Hide file tree

Showing 5 changed files with 1,070 additions and 0 deletions.
diff --git a/.unreleased/PR_6117 b/.unreleased/PR_6117
@@ -0,0 +1 @@
+Fixes: #6117 Avoid decompressing batches using an empty slot
diff --git a/tsl/src/compression/compression.c b/tsl/src/compression/compression.c
@@ -2000,6 +2000,15 @@ create_segment_filter_scankey(RowDecompressor *decompressor, char *segment_filte
 void
 decompress_batches_for_insert(ChunkInsertState *cis, Chunk *chunk, TupleTableSlot *slot)
 {
+	/* COPY operation can end up flushing an empty buffer which
+	 * could in turn send an empty slot our way. No need to decompress
+	 * anything if that happens.
+	 */
+	if (TTS_EMPTY(slot))
+	{
+		return;
+	}
+
 	Relation out_rel = cis->rel;
 
 	if (!ts_indexing_relation_has_primary_or_unique_index(out_rel))

diff --git a/tsl/test/expected/compression_insert.out b/tsl/test/expected/compression_insert.out
@@ -1013,3 +1013,32 @@ select count(compress_chunk(ch)) FROM show_chunks('test4') ch;
 ALTER TABLE test4 DROP COLUMN two;
 INSERT INTO test4 VALUES ('2021-10-14 17:50:16.207', '7', NULL);
 INSERT INTO test4 (timestamp, ident) VALUES ('2021-10-14 17:50:16.207', '7');
+DROP TABLE test4;
+-- Test COPY when trying to flush an empty buffer
+-- In this case we send an empty slot used to
+-- search for compressed tuples.
+CREATE TABLE test_copy (
+    timestamp int not null,
+    id bigint
+);
+CREATE UNIQUE INDEX timestamp_id_idx ON test_copy(timestamp, id);
+SELECT * FROM create_hypertable('test_copy', 'timestamp', chunk_time_interval=>10);
+ hypertable_id | schema_name | table_name | created 
+---------------+-------------+------------+---------
+            22 | public      | test_copy  | t
+(1 row)
+
+ALTER TABLE test_copy SET (
+    timescaledb.compress,
+    timescaledb.compress_orderby = 'timestamp',
+    timescaledb.compress_segmentby = 'id'
+);
+INSERT INTO test_copy SELECT generate_series(1,25,1), -1;
+SELECT count(compress_chunk(ch)) FROM show_chunks('test_copy') ch;
+ count 
+-------
+     3
+(1 row)
+
+\copy test_copy FROM data/copy_data.csv WITH CSV HEADER;
+DROP TABLE test_copy;
diff --git a/tsl/test/sql/compression_insert.sql b/tsl/test/sql/compression_insert.sql
@@ -659,3 +659,32 @@ ALTER TABLE test4 DROP COLUMN two;
 INSERT INTO test4 VALUES ('2021-10-14 17:50:16.207', '7', NULL);
 INSERT INTO test4 (timestamp, ident) VALUES ('2021-10-14 17:50:16.207', '7');
 
+DROP TABLE test4;
+
+
+-- Test COPY when trying to flush an empty buffer
+-- In this case we send an empty slot used to
+-- search for compressed tuples.
+
+CREATE TABLE test_copy (
+    timestamp int not null,
+    id bigint
+);
+
+CREATE UNIQUE INDEX timestamp_id_idx ON test_copy(timestamp, id);
+
+SELECT * FROM create_hypertable('test_copy', 'timestamp', chunk_time_interval=>10);
+
+ALTER TABLE test_copy SET (
+    timescaledb.compress,
+    timescaledb.compress_orderby = 'timestamp',
+    timescaledb.compress_segmentby = 'id'
+);
+
+INSERT INTO test_copy SELECT generate_series(1,25,1), -1;
+
+SELECT count(compress_chunk(ch)) FROM show_chunks('test_copy') ch;
+
+\copy test_copy FROM data/copy_data.csv WITH CSV HEADER;
+
+DROP TABLE test_copy;
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		Fixes: #6117 Avoid decompressing batches using an empty slot